{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 450.0, "eval_steps": 25.0, "global_step": 18900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.023880597014925373, "grad_norm": Infinity, "learning_rate": 1e-05, "loss": 76.8913, "step": 1 }, { "epoch": 0.04776119402985075, "grad_norm": 395.8459777832031, "learning_rate": 1e-05, "loss": 77.3235, "step": 2 }, { "epoch": 0.07164179104477612, "grad_norm": 161.73968505859375, "learning_rate": 9.988095238095239e-06, "loss": 67.3668, "step": 3 }, { "epoch": 0.0955223880597015, "grad_norm": 145.1616973876953, "learning_rate": 9.976190476190477e-06, "loss": 62.8542, "step": 4 }, { "epoch": 0.11940298507462686, "grad_norm": 52.10374069213867, "learning_rate": 9.964285714285714e-06, "loss": 59.8627, "step": 5 }, { "epoch": 0.14328358208955225, "grad_norm": 31.430763244628906, "learning_rate": 9.952380952380954e-06, "loss": 59.2378, "step": 6 }, { "epoch": 0.16716417910447762, "grad_norm": 37.42692947387695, "learning_rate": 9.940476190476192e-06, "loss": 58.3114, "step": 7 }, { "epoch": 0.191044776119403, "grad_norm": 32.284332275390625, "learning_rate": 9.92857142857143e-06, "loss": 57.3408, "step": 8 }, { "epoch": 0.21492537313432836, "grad_norm": 28.520849227905273, "learning_rate": 9.916666666666668e-06, "loss": 56.9256, "step": 9 }, { "epoch": 0.23880597014925373, "grad_norm": 40.480167388916016, "learning_rate": 9.904761904761906e-06, "loss": 57.4842, "step": 10 }, { "epoch": 0.2626865671641791, "grad_norm": 20.3512020111084, "learning_rate": 9.892857142857143e-06, "loss": 56.7597, "step": 11 }, { "epoch": 0.2865671641791045, "grad_norm": 22.308382034301758, "learning_rate": 9.880952380952381e-06, "loss": 56.594, "step": 12 }, { "epoch": 0.31044776119402984, "grad_norm": 14.086284637451172, "learning_rate": 9.869047619047621e-06, "loss": 56.6327, "step": 13 }, { "epoch": 0.33432835820895523, "grad_norm": NaN, "learning_rate": 9.857142857142859e-06, "loss": 63.0791, "step": 14 }, { "epoch": 0.3582089552238806, "grad_norm": 17.055604934692383, "learning_rate": 9.857142857142859e-06, "loss": 56.1551, "step": 15 }, { "epoch": 0.382089552238806, "grad_norm": 18.52287483215332, "learning_rate": 9.845238095238097e-06, "loss": 54.9502, "step": 16 }, { "epoch": 0.4059701492537313, "grad_norm": 32.66905212402344, "learning_rate": 9.833333333333333e-06, "loss": 55.6494, "step": 17 }, { "epoch": 0.4298507462686567, "grad_norm": 24.075742721557617, "learning_rate": 9.821428571428573e-06, "loss": 55.4766, "step": 18 }, { "epoch": 0.4537313432835821, "grad_norm": 31.505783081054688, "learning_rate": 9.80952380952381e-06, "loss": 55.1481, "step": 19 }, { "epoch": 0.47761194029850745, "grad_norm": 30.53020477294922, "learning_rate": 9.797619047619048e-06, "loss": 55.158, "step": 20 }, { "epoch": 0.5014925373134328, "grad_norm": 14.44444751739502, "learning_rate": 9.785714285714286e-06, "loss": 54.8286, "step": 21 }, { "epoch": 0.5253731343283582, "grad_norm": 53.601078033447266, "learning_rate": 9.773809523809524e-06, "loss": 55.1466, "step": 22 }, { "epoch": 0.5492537313432836, "grad_norm": 37.156028747558594, "learning_rate": 9.761904761904762e-06, "loss": 54.629, "step": 23 }, { "epoch": 0.573134328358209, "grad_norm": 41.84994125366211, "learning_rate": 9.75e-06, "loss": 55.2766, "step": 24 }, { "epoch": 0.5970149253731343, "grad_norm": 50.61705017089844, "learning_rate": 9.73809523809524e-06, "loss": 54.3497, "step": 25 }, { "epoch": 0.6208955223880597, "grad_norm": 16.964982986450195, "learning_rate": 9.726190476190477e-06, "loss": 55.1673, "step": 26 }, { "epoch": 0.6447761194029851, "grad_norm": 22.71157455444336, "learning_rate": 9.714285714285715e-06, "loss": 54.8001, "step": 27 }, { "epoch": 0.6686567164179105, "grad_norm": 16.372802734375, "learning_rate": 9.702380952380953e-06, "loss": 54.4506, "step": 28 }, { "epoch": 0.6925373134328359, "grad_norm": 13.21664047241211, "learning_rate": 9.690476190476191e-06, "loss": 53.0488, "step": 29 }, { "epoch": 0.7164179104477612, "grad_norm": NaN, "learning_rate": 9.678571428571429e-06, "loss": 74.7707, "step": 30 }, { "epoch": 0.7402985074626866, "grad_norm": 23.045652389526367, "learning_rate": 9.678571428571429e-06, "loss": 54.1909, "step": 31 }, { "epoch": 0.764179104477612, "grad_norm": 15.034178733825684, "learning_rate": 9.666666666666667e-06, "loss": 53.3253, "step": 32 }, { "epoch": 0.7880597014925373, "grad_norm": 14.148232460021973, "learning_rate": 9.654761904761906e-06, "loss": 53.693, "step": 33 }, { "epoch": 0.8119402985074626, "grad_norm": NaN, "learning_rate": 9.642857142857144e-06, "loss": 61.3517, "step": 34 }, { "epoch": 0.835820895522388, "grad_norm": 14.757994651794434, "learning_rate": 9.642857142857144e-06, "loss": 53.3175, "step": 35 }, { "epoch": 0.8597014925373134, "grad_norm": 10.875706672668457, "learning_rate": 9.630952380952382e-06, "loss": 54.2592, "step": 36 }, { "epoch": 0.8835820895522388, "grad_norm": 9.926539421081543, "learning_rate": 9.61904761904762e-06, "loss": 53.8721, "step": 37 }, { "epoch": 0.9074626865671642, "grad_norm": 17.697235107421875, "learning_rate": 9.607142857142858e-06, "loss": 54.2901, "step": 38 }, { "epoch": 0.9313432835820895, "grad_norm": 29.19430160522461, "learning_rate": 9.595238095238096e-06, "loss": 53.1261, "step": 39 }, { "epoch": 0.9552238805970149, "grad_norm": 12.3985595703125, "learning_rate": 9.583333333333335e-06, "loss": 53.9815, "step": 40 }, { "epoch": 0.9791044776119403, "grad_norm": 17.109691619873047, "learning_rate": 9.571428571428573e-06, "loss": 52.5838, "step": 41 }, { "epoch": 1.0, "grad_norm": 14.529239654541016, "learning_rate": 9.559523809523811e-06, "loss": 46.1888, "step": 42 }, { "epoch": 1.0238805970149254, "grad_norm": 15.683514595031738, "learning_rate": 9.547619047619049e-06, "loss": 52.2043, "step": 43 }, { "epoch": 1.0477611940298508, "grad_norm": 26.219507217407227, "learning_rate": 9.535714285714287e-06, "loss": 53.09, "step": 44 }, { "epoch": 1.0716417910447762, "grad_norm": 19.859697341918945, "learning_rate": 9.523809523809525e-06, "loss": 52.5858, "step": 45 }, { "epoch": 1.0955223880597016, "grad_norm": 11.090332984924316, "learning_rate": 9.511904761904763e-06, "loss": 53.7674, "step": 46 }, { "epoch": 1.1194029850746268, "grad_norm": 15.586993217468262, "learning_rate": 9.5e-06, "loss": 53.4901, "step": 47 }, { "epoch": 1.1432835820895522, "grad_norm": 22.734928131103516, "learning_rate": 9.488095238095238e-06, "loss": 53.6667, "step": 48 }, { "epoch": 1.1671641791044776, "grad_norm": 16.382047653198242, "learning_rate": 9.476190476190476e-06, "loss": 53.2914, "step": 49 }, { "epoch": 1.191044776119403, "grad_norm": 15.916092872619629, "learning_rate": 9.464285714285714e-06, "loss": 51.729, "step": 50 }, { "epoch": 1.2149253731343284, "grad_norm": NaN, "learning_rate": 9.452380952380952e-06, "loss": 66.3151, "step": 51 }, { "epoch": 1.2388059701492538, "grad_norm": 20.121395111083984, "learning_rate": 9.452380952380952e-06, "loss": 53.866, "step": 52 }, { "epoch": 1.2626865671641792, "grad_norm": NaN, "learning_rate": 9.440476190476192e-06, "loss": 67.3538, "step": 53 }, { "epoch": 1.2865671641791045, "grad_norm": 13.869222640991211, "learning_rate": 9.440476190476192e-06, "loss": 53.5431, "step": 54 }, { "epoch": 1.31044776119403, "grad_norm": 25.478107452392578, "learning_rate": 9.42857142857143e-06, "loss": 52.7126, "step": 55 }, { "epoch": 1.3343283582089551, "grad_norm": 35.76942825317383, "learning_rate": 9.416666666666667e-06, "loss": 53.8082, "step": 56 }, { "epoch": 1.3582089552238805, "grad_norm": 17.95109748840332, "learning_rate": 9.404761904761905e-06, "loss": 53.0275, "step": 57 }, { "epoch": 1.382089552238806, "grad_norm": 26.80129623413086, "learning_rate": 9.392857142857143e-06, "loss": 53.3295, "step": 58 }, { "epoch": 1.4059701492537313, "grad_norm": 25.803054809570312, "learning_rate": 9.380952380952381e-06, "loss": 52.3771, "step": 59 }, { "epoch": 1.4298507462686567, "grad_norm": 35.39850616455078, "learning_rate": 9.36904761904762e-06, "loss": 52.9467, "step": 60 }, { "epoch": 1.4537313432835821, "grad_norm": 27.43315887451172, "learning_rate": 9.357142857142859e-06, "loss": 53.1359, "step": 61 }, { "epoch": 1.4776119402985075, "grad_norm": 33.431400299072266, "learning_rate": 9.345238095238096e-06, "loss": 52.2006, "step": 62 }, { "epoch": 1.5014925373134327, "grad_norm": 33.08237075805664, "learning_rate": 9.333333333333334e-06, "loss": 52.6816, "step": 63 }, { "epoch": 1.5253731343283583, "grad_norm": 25.711997985839844, "learning_rate": 9.321428571428572e-06, "loss": 52.6113, "step": 64 }, { "epoch": 1.5492537313432835, "grad_norm": 32.688297271728516, "learning_rate": 9.30952380952381e-06, "loss": 51.9086, "step": 65 }, { "epoch": 1.573134328358209, "grad_norm": 31.856857299804688, "learning_rate": 9.297619047619048e-06, "loss": 53.1913, "step": 66 }, { "epoch": 1.5970149253731343, "grad_norm": 26.231773376464844, "learning_rate": 9.285714285714288e-06, "loss": 51.464, "step": 67 }, { "epoch": 1.6208955223880597, "grad_norm": 29.39109230041504, "learning_rate": 9.273809523809525e-06, "loss": 52.0572, "step": 68 }, { "epoch": 1.644776119402985, "grad_norm": 28.86277198791504, "learning_rate": 9.261904761904763e-06, "loss": 52.7115, "step": 69 }, { "epoch": 1.6686567164179105, "grad_norm": 28.82640266418457, "learning_rate": 9.250000000000001e-06, "loss": 53.1756, "step": 70 }, { "epoch": 1.6925373134328359, "grad_norm": 31.32577133178711, "learning_rate": 9.238095238095239e-06, "loss": 52.369, "step": 71 }, { "epoch": 1.716417910447761, "grad_norm": 18.739269256591797, "learning_rate": 9.226190476190477e-06, "loss": 52.6631, "step": 72 }, { "epoch": 1.7402985074626867, "grad_norm": 22.889320373535156, "learning_rate": 9.214285714285715e-06, "loss": 52.4786, "step": 73 }, { "epoch": 1.7641791044776118, "grad_norm": 24.175626754760742, "learning_rate": 9.202380952380953e-06, "loss": 52.1792, "step": 74 }, { "epoch": 1.7880597014925375, "grad_norm": 25.257095336914062, "learning_rate": 9.19047619047619e-06, "loss": 51.39, "step": 75 }, { "epoch": 1.8119402985074626, "grad_norm": 35.745208740234375, "learning_rate": 9.178571428571429e-06, "loss": 53.331, "step": 76 }, { "epoch": 1.835820895522388, "grad_norm": 23.815813064575195, "learning_rate": 9.166666666666666e-06, "loss": 52.7632, "step": 77 }, { "epoch": 1.8597014925373134, "grad_norm": 32.405757904052734, "learning_rate": 9.154761904761906e-06, "loss": 52.3165, "step": 78 }, { "epoch": 1.8835820895522388, "grad_norm": 38.95046615600586, "learning_rate": 9.142857142857144e-06, "loss": 52.0931, "step": 79 }, { "epoch": 1.9074626865671642, "grad_norm": 22.412342071533203, "learning_rate": 9.130952380952382e-06, "loss": 51.8732, "step": 80 }, { "epoch": 1.9313432835820894, "grad_norm": 35.088253021240234, "learning_rate": 9.11904761904762e-06, "loss": 52.0182, "step": 81 }, { "epoch": 1.955223880597015, "grad_norm": 20.136964797973633, "learning_rate": 9.107142857142858e-06, "loss": 52.7956, "step": 82 }, { "epoch": 1.9791044776119402, "grad_norm": 20.860034942626953, "learning_rate": 9.095238095238095e-06, "loss": 50.339, "step": 83 }, { "epoch": 2.0, "grad_norm": 20.38931655883789, "learning_rate": 9.083333333333333e-06, "loss": 45.3651, "step": 84 }, { "epoch": 2.023880597014925, "grad_norm": 21.519498825073242, "learning_rate": 9.071428571428573e-06, "loss": 52.1228, "step": 85 }, { "epoch": 2.047761194029851, "grad_norm": 30.47000503540039, "learning_rate": 9.05952380952381e-06, "loss": 51.1968, "step": 86 }, { "epoch": 2.071641791044776, "grad_norm": 19.728044509887695, "learning_rate": 9.047619047619049e-06, "loss": 52.1514, "step": 87 }, { "epoch": 2.0955223880597016, "grad_norm": 42.647281646728516, "learning_rate": 9.035714285714287e-06, "loss": 52.5597, "step": 88 }, { "epoch": 2.1194029850746268, "grad_norm": 55.45186233520508, "learning_rate": 9.023809523809524e-06, "loss": 53.7621, "step": 89 }, { "epoch": 2.1432835820895524, "grad_norm": 14.879026412963867, "learning_rate": 9.011904761904762e-06, "loss": 52.0743, "step": 90 }, { "epoch": 2.1671641791044776, "grad_norm": 48.55704116821289, "learning_rate": 9e-06, "loss": 52.0543, "step": 91 }, { "epoch": 2.191044776119403, "grad_norm": 37.96782302856445, "learning_rate": 8.98809523809524e-06, "loss": 52.4684, "step": 92 }, { "epoch": 2.2149253731343284, "grad_norm": 27.714475631713867, "learning_rate": 8.976190476190478e-06, "loss": 52.725, "step": 93 }, { "epoch": 2.2388059701492535, "grad_norm": 32.12433624267578, "learning_rate": 8.964285714285716e-06, "loss": 52.7161, "step": 94 }, { "epoch": 2.262686567164179, "grad_norm": 23.8153018951416, "learning_rate": 8.952380952380953e-06, "loss": 51.3124, "step": 95 }, { "epoch": 2.2865671641791043, "grad_norm": 31.269794464111328, "learning_rate": 8.940476190476191e-06, "loss": 51.9646, "step": 96 }, { "epoch": 2.31044776119403, "grad_norm": 16.611865997314453, "learning_rate": 8.92857142857143e-06, "loss": 51.8503, "step": 97 }, { "epoch": 2.334328358208955, "grad_norm": 26.69631004333496, "learning_rate": 8.916666666666667e-06, "loss": 52.4857, "step": 98 }, { "epoch": 2.3582089552238807, "grad_norm": 21.10638999938965, "learning_rate": 8.904761904761905e-06, "loss": 52.0022, "step": 99 }, { "epoch": 2.382089552238806, "grad_norm": 16.273351669311523, "learning_rate": 8.892857142857143e-06, "loss": 50.4367, "step": 100 }, { "epoch": 2.405970149253731, "grad_norm": 16.407167434692383, "learning_rate": 8.88095238095238e-06, "loss": 51.2079, "step": 101 }, { "epoch": 2.4298507462686567, "grad_norm": 16.22024154663086, "learning_rate": 8.869047619047619e-06, "loss": 50.4939, "step": 102 }, { "epoch": 2.4537313432835823, "grad_norm": 27.11235809326172, "learning_rate": 8.857142857142858e-06, "loss": 50.0872, "step": 103 }, { "epoch": 2.4776119402985075, "grad_norm": 18.912181854248047, "learning_rate": 8.845238095238096e-06, "loss": 51.8135, "step": 104 }, { "epoch": 2.5014925373134327, "grad_norm": 29.597028732299805, "learning_rate": 8.833333333333334e-06, "loss": 49.4789, "step": 105 }, { "epoch": 2.5253731343283583, "grad_norm": 30.51687240600586, "learning_rate": 8.821428571428572e-06, "loss": 52.5555, "step": 106 }, { "epoch": 2.5492537313432835, "grad_norm": 31.4583797454834, "learning_rate": 8.80952380952381e-06, "loss": 51.0073, "step": 107 }, { "epoch": 2.573134328358209, "grad_norm": 30.35653305053711, "learning_rate": 8.797619047619048e-06, "loss": 50.9501, "step": 108 }, { "epoch": 2.5970149253731343, "grad_norm": 24.041545867919922, "learning_rate": 8.785714285714286e-06, "loss": 49.5162, "step": 109 }, { "epoch": 2.62089552238806, "grad_norm": 23.52166175842285, "learning_rate": 8.773809523809525e-06, "loss": 52.9747, "step": 110 }, { "epoch": 2.644776119402985, "grad_norm": 28.871065139770508, "learning_rate": 8.761904761904763e-06, "loss": 50.2273, "step": 111 }, { "epoch": 2.6686567164179102, "grad_norm": 26.484140396118164, "learning_rate": 8.750000000000001e-06, "loss": 51.2286, "step": 112 }, { "epoch": 2.692537313432836, "grad_norm": 37.570743560791016, "learning_rate": 8.738095238095239e-06, "loss": 49.7131, "step": 113 }, { "epoch": 2.716417910447761, "grad_norm": 23.827178955078125, "learning_rate": 8.726190476190477e-06, "loss": 51.913, "step": 114 }, { "epoch": 2.7402985074626867, "grad_norm": 33.89924621582031, "learning_rate": 8.714285714285715e-06, "loss": 52.2382, "step": 115 }, { "epoch": 2.764179104477612, "grad_norm": 29.397851943969727, "learning_rate": 8.702380952380952e-06, "loss": 52.1548, "step": 116 }, { "epoch": 2.7880597014925375, "grad_norm": 28.73517417907715, "learning_rate": 8.690476190476192e-06, "loss": 51.2892, "step": 117 }, { "epoch": 2.8119402985074626, "grad_norm": 32.068138122558594, "learning_rate": 8.67857142857143e-06, "loss": 51.542, "step": 118 }, { "epoch": 2.835820895522388, "grad_norm": 22.76898956298828, "learning_rate": 8.666666666666668e-06, "loss": 50.373, "step": 119 }, { "epoch": 2.8597014925373134, "grad_norm": 33.528263092041016, "learning_rate": 8.654761904761906e-06, "loss": 51.3075, "step": 120 }, { "epoch": 2.883582089552239, "grad_norm": 21.655696868896484, "learning_rate": 8.642857142857144e-06, "loss": 51.1397, "step": 121 }, { "epoch": 2.9074626865671642, "grad_norm": 25.94880485534668, "learning_rate": 8.630952380952381e-06, "loss": 51.4326, "step": 122 }, { "epoch": 2.9313432835820894, "grad_norm": 36.14421844482422, "learning_rate": 8.61904761904762e-06, "loss": 50.9524, "step": 123 }, { "epoch": 2.955223880597015, "grad_norm": 28.361459732055664, "learning_rate": 8.607142857142859e-06, "loss": 51.3171, "step": 124 }, { "epoch": 2.97910447761194, "grad_norm": 30.784954071044922, "learning_rate": 8.595238095238097e-06, "loss": 49.9797, "step": 125 }, { "epoch": 3.0, "grad_norm": 22.98565101623535, "learning_rate": 8.583333333333333e-06, "loss": 44.3471, "step": 126 }, { "epoch": 3.023880597014925, "grad_norm": 25.601985931396484, "learning_rate": 8.571428571428571e-06, "loss": 51.6574, "step": 127 }, { "epoch": 3.047761194029851, "grad_norm": 27.648792266845703, "learning_rate": 8.55952380952381e-06, "loss": 51.3385, "step": 128 }, { "epoch": 3.071641791044776, "grad_norm": 18.773529052734375, "learning_rate": 8.547619047619048e-06, "loss": 51.1451, "step": 129 }, { "epoch": 3.0955223880597016, "grad_norm": 29.439353942871094, "learning_rate": 8.535714285714286e-06, "loss": 51.6092, "step": 130 }, { "epoch": 3.1194029850746268, "grad_norm": 32.41486740112305, "learning_rate": 8.523809523809524e-06, "loss": 50.9068, "step": 131 }, { "epoch": 3.1432835820895524, "grad_norm": 23.441896438598633, "learning_rate": 8.511904761904762e-06, "loss": 51.7453, "step": 132 }, { "epoch": 3.1671641791044776, "grad_norm": 29.218734741210938, "learning_rate": 8.5e-06, "loss": 49.9124, "step": 133 }, { "epoch": 3.191044776119403, "grad_norm": 20.988981246948242, "learning_rate": 8.488095238095238e-06, "loss": 50.9788, "step": 134 }, { "epoch": 3.2149253731343284, "grad_norm": 22.57052993774414, "learning_rate": 8.476190476190477e-06, "loss": 51.4228, "step": 135 }, { "epoch": 3.2388059701492535, "grad_norm": 26.112573623657227, "learning_rate": 8.464285714285715e-06, "loss": 50.3332, "step": 136 }, { "epoch": 3.262686567164179, "grad_norm": 23.8747615814209, "learning_rate": 8.452380952380953e-06, "loss": 51.1763, "step": 137 }, { "epoch": 3.2865671641791043, "grad_norm": 24.12811851501465, "learning_rate": 8.440476190476191e-06, "loss": 49.8539, "step": 138 }, { "epoch": 3.31044776119403, "grad_norm": 27.462984085083008, "learning_rate": 8.428571428571429e-06, "loss": 50.7766, "step": 139 }, { "epoch": 3.334328358208955, "grad_norm": 31.261472702026367, "learning_rate": 8.416666666666667e-06, "loss": 49.7599, "step": 140 }, { "epoch": 3.3582089552238807, "grad_norm": 21.049545288085938, "learning_rate": 8.404761904761905e-06, "loss": 49.6827, "step": 141 }, { "epoch": 3.382089552238806, "grad_norm": 30.103389739990234, "learning_rate": 8.392857142857144e-06, "loss": 49.3866, "step": 142 }, { "epoch": 3.405970149253731, "grad_norm": 31.348888397216797, "learning_rate": 8.380952380952382e-06, "loss": 51.4607, "step": 143 }, { "epoch": 3.4298507462686567, "grad_norm": 28.910200119018555, "learning_rate": 8.36904761904762e-06, "loss": 51.2337, "step": 144 }, { "epoch": 3.4537313432835823, "grad_norm": 21.00281524658203, "learning_rate": 8.357142857142858e-06, "loss": 50.9557, "step": 145 }, { "epoch": 3.4776119402985075, "grad_norm": 45.842002868652344, "learning_rate": 8.345238095238096e-06, "loss": 49.7377, "step": 146 }, { "epoch": 3.5014925373134327, "grad_norm": 30.77996253967285, "learning_rate": 8.333333333333334e-06, "loss": 51.1234, "step": 147 }, { "epoch": 3.5253731343283583, "grad_norm": 31.492767333984375, "learning_rate": 8.321428571428573e-06, "loss": 50.5733, "step": 148 }, { "epoch": 3.5492537313432835, "grad_norm": 36.57206344604492, "learning_rate": 8.309523809523811e-06, "loss": 50.6762, "step": 149 }, { "epoch": 3.573134328358209, "grad_norm": 33.86347198486328, "learning_rate": 8.297619047619049e-06, "loss": 50.8281, "step": 150 }, { "epoch": 3.5970149253731343, "grad_norm": 30.812152862548828, "learning_rate": 8.285714285714287e-06, "loss": 50.6509, "step": 151 }, { "epoch": 3.62089552238806, "grad_norm": 24.536882400512695, "learning_rate": 8.273809523809523e-06, "loss": 50.1112, "step": 152 }, { "epoch": 3.644776119402985, "grad_norm": 29.8430233001709, "learning_rate": 8.261904761904763e-06, "loss": 50.846, "step": 153 }, { "epoch": 3.6686567164179102, "grad_norm": 26.18596076965332, "learning_rate": 8.25e-06, "loss": 50.3806, "step": 154 }, { "epoch": 3.692537313432836, "grad_norm": 38.75019836425781, "learning_rate": 8.238095238095239e-06, "loss": 49.8915, "step": 155 }, { "epoch": 3.716417910447761, "grad_norm": 34.30149841308594, "learning_rate": 8.226190476190476e-06, "loss": 50.7886, "step": 156 }, { "epoch": 3.7402985074626867, "grad_norm": 33.179298400878906, "learning_rate": 8.214285714285714e-06, "loss": 50.8175, "step": 157 }, { "epoch": 3.764179104477612, "grad_norm": 34.90909957885742, "learning_rate": 8.202380952380952e-06, "loss": 50.3521, "step": 158 }, { "epoch": 3.7880597014925375, "grad_norm": 33.2717399597168, "learning_rate": 8.190476190476192e-06, "loss": 51.2006, "step": 159 }, { "epoch": 3.8119402985074626, "grad_norm": 33.082672119140625, "learning_rate": 8.17857142857143e-06, "loss": 49.5627, "step": 160 }, { "epoch": 3.835820895522388, "grad_norm": 23.65228843688965, "learning_rate": 8.166666666666668e-06, "loss": 49.9631, "step": 161 }, { "epoch": 3.8597014925373134, "grad_norm": 37.3172492980957, "learning_rate": 8.154761904761905e-06, "loss": 50.7175, "step": 162 }, { "epoch": 3.883582089552239, "grad_norm": 29.369930267333984, "learning_rate": 8.142857142857143e-06, "loss": 51.1435, "step": 163 }, { "epoch": 3.9074626865671642, "grad_norm": 28.807470321655273, "learning_rate": 8.130952380952381e-06, "loss": 50.3349, "step": 164 }, { "epoch": 3.9313432835820894, "grad_norm": 33.90628433227539, "learning_rate": 8.119047619047619e-06, "loss": 50.7241, "step": 165 }, { "epoch": 3.955223880597015, "grad_norm": 21.72952651977539, "learning_rate": 8.107142857142859e-06, "loss": 49.9013, "step": 166 }, { "epoch": 3.97910447761194, "grad_norm": 26.831520080566406, "learning_rate": 8.095238095238097e-06, "loss": 51.0161, "step": 167 }, { "epoch": 4.0, "grad_norm": 24.49069595336914, "learning_rate": 8.083333333333334e-06, "loss": 44.6758, "step": 168 }, { "epoch": 4.023880597014926, "grad_norm": 36.32711410522461, "learning_rate": 8.071428571428572e-06, "loss": 49.8601, "step": 169 }, { "epoch": 4.04776119402985, "grad_norm": 29.862812042236328, "learning_rate": 8.05952380952381e-06, "loss": 51.712, "step": 170 }, { "epoch": 4.071641791044776, "grad_norm": 40.245887756347656, "learning_rate": 8.047619047619048e-06, "loss": 50.3353, "step": 171 }, { "epoch": 4.095522388059702, "grad_norm": 34.22684097290039, "learning_rate": 8.035714285714286e-06, "loss": 50.6474, "step": 172 }, { "epoch": 4.119402985074627, "grad_norm": 36.754669189453125, "learning_rate": 8.023809523809526e-06, "loss": 50.1623, "step": 173 }, { "epoch": 4.143283582089552, "grad_norm": 35.76541519165039, "learning_rate": 8.011904761904763e-06, "loss": 50.2426, "step": 174 }, { "epoch": 4.167164179104478, "grad_norm": 25.851362228393555, "learning_rate": 8.000000000000001e-06, "loss": 49.9525, "step": 175 }, { "epoch": 4.191044776119403, "grad_norm": 24.48278045654297, "learning_rate": 7.98809523809524e-06, "loss": 49.1466, "step": 176 }, { "epoch": 4.214925373134328, "grad_norm": 28.79146385192871, "learning_rate": 7.976190476190477e-06, "loss": 49.9365, "step": 177 }, { "epoch": 4.2388059701492535, "grad_norm": 29.29482650756836, "learning_rate": 7.964285714285715e-06, "loss": 50.7427, "step": 178 }, { "epoch": 4.262686567164179, "grad_norm": 23.50571060180664, "learning_rate": 7.952380952380953e-06, "loss": 49.7287, "step": 179 }, { "epoch": 4.286567164179105, "grad_norm": 27.805828094482422, "learning_rate": 7.94047619047619e-06, "loss": 50.4316, "step": 180 }, { "epoch": 4.3104477611940295, "grad_norm": 28.323888778686523, "learning_rate": 7.928571428571429e-06, "loss": 50.0263, "step": 181 }, { "epoch": 4.334328358208955, "grad_norm": 25.43438148498535, "learning_rate": 7.916666666666667e-06, "loss": 49.426, "step": 182 }, { "epoch": 4.358208955223881, "grad_norm": 22.169496536254883, "learning_rate": 7.904761904761904e-06, "loss": 51.1048, "step": 183 }, { "epoch": 4.382089552238806, "grad_norm": 33.660545349121094, "learning_rate": 7.892857142857144e-06, "loss": 49.7654, "step": 184 }, { "epoch": 4.405970149253731, "grad_norm": 24.276273727416992, "learning_rate": 7.880952380952382e-06, "loss": 50.4976, "step": 185 }, { "epoch": 4.429850746268657, "grad_norm": 41.48741149902344, "learning_rate": 7.86904761904762e-06, "loss": 52.0386, "step": 186 }, { "epoch": 4.453731343283582, "grad_norm": 25.86789894104004, "learning_rate": 7.857142857142858e-06, "loss": 49.129, "step": 187 }, { "epoch": 4.477611940298507, "grad_norm": 26.607038497924805, "learning_rate": 7.845238095238096e-06, "loss": 49.3561, "step": 188 }, { "epoch": 4.501492537313433, "grad_norm": 43.54303741455078, "learning_rate": 7.833333333333333e-06, "loss": 50.1143, "step": 189 }, { "epoch": 4.525373134328358, "grad_norm": 45.6146354675293, "learning_rate": 7.821428571428571e-06, "loss": 49.3217, "step": 190 }, { "epoch": 4.549253731343284, "grad_norm": 24.00080680847168, "learning_rate": 7.809523809523811e-06, "loss": 50.484, "step": 191 }, { "epoch": 4.573134328358209, "grad_norm": 29.736740112304688, "learning_rate": 7.797619047619049e-06, "loss": 49.748, "step": 192 }, { "epoch": 4.597014925373134, "grad_norm": 33.08702850341797, "learning_rate": 7.785714285714287e-06, "loss": 50.2142, "step": 193 }, { "epoch": 4.62089552238806, "grad_norm": 19.16411018371582, "learning_rate": 7.773809523809525e-06, "loss": 50.1073, "step": 194 }, { "epoch": 4.6447761194029855, "grad_norm": 32.145721435546875, "learning_rate": 7.761904761904762e-06, "loss": 48.5769, "step": 195 }, { "epoch": 4.66865671641791, "grad_norm": 38.768341064453125, "learning_rate": 7.75e-06, "loss": 49.681, "step": 196 }, { "epoch": 4.692537313432836, "grad_norm": 26.108245849609375, "learning_rate": 7.738095238095238e-06, "loss": 49.9193, "step": 197 }, { "epoch": 4.7164179104477615, "grad_norm": 28.86294174194336, "learning_rate": 7.726190476190478e-06, "loss": 50.4584, "step": 198 }, { "epoch": 4.740298507462686, "grad_norm": 31.089380264282227, "learning_rate": 7.714285714285716e-06, "loss": 50.7873, "step": 199 }, { "epoch": 4.764179104477612, "grad_norm": 22.934032440185547, "learning_rate": 7.702380952380954e-06, "loss": 50.611, "step": 200 }, { "epoch": 4.7880597014925375, "grad_norm": 27.986371994018555, "learning_rate": 7.690476190476191e-06, "loss": 49.275, "step": 201 }, { "epoch": 4.811940298507462, "grad_norm": 23.44196319580078, "learning_rate": 7.67857142857143e-06, "loss": 50.2035, "step": 202 }, { "epoch": 4.835820895522388, "grad_norm": 22.05059242248535, "learning_rate": 7.666666666666667e-06, "loss": 48.9595, "step": 203 }, { "epoch": 4.859701492537313, "grad_norm": 29.709396362304688, "learning_rate": 7.654761904761905e-06, "loss": 50.5343, "step": 204 }, { "epoch": 4.883582089552239, "grad_norm": 23.702781677246094, "learning_rate": 7.642857142857143e-06, "loss": 50.2627, "step": 205 }, { "epoch": 4.907462686567165, "grad_norm": 20.144807815551758, "learning_rate": 7.630952380952381e-06, "loss": 51.0125, "step": 206 }, { "epoch": 4.931343283582089, "grad_norm": 28.83676528930664, "learning_rate": 7.61904761904762e-06, "loss": 50.5985, "step": 207 }, { "epoch": 4.955223880597015, "grad_norm": 34.40160369873047, "learning_rate": 7.6071428571428575e-06, "loss": 49.6469, "step": 208 }, { "epoch": 4.979104477611941, "grad_norm": 26.982925415039062, "learning_rate": 7.595238095238095e-06, "loss": 50.1666, "step": 209 }, { "epoch": 5.0, "grad_norm": 19.569746017456055, "learning_rate": 7.583333333333333e-06, "loss": 43.6715, "step": 210 }, { "epoch": 5.023880597014926, "grad_norm": 23.753328323364258, "learning_rate": 7.571428571428572e-06, "loss": 49.9273, "step": 211 }, { "epoch": 5.04776119402985, "grad_norm": 22.463659286499023, "learning_rate": 7.55952380952381e-06, "loss": 48.8499, "step": 212 }, { "epoch": 5.071641791044776, "grad_norm": 24.507875442504883, "learning_rate": 7.547619047619048e-06, "loss": 49.3275, "step": 213 }, { "epoch": 5.095522388059702, "grad_norm": 21.727603912353516, "learning_rate": 7.5357142857142865e-06, "loss": 49.1879, "step": 214 }, { "epoch": 5.119402985074627, "grad_norm": 26.122251510620117, "learning_rate": 7.523809523809524e-06, "loss": 50.1094, "step": 215 }, { "epoch": 5.143283582089552, "grad_norm": 24.142263412475586, "learning_rate": 7.511904761904762e-06, "loss": 50.2708, "step": 216 }, { "epoch": 5.167164179104478, "grad_norm": 22.762237548828125, "learning_rate": 7.500000000000001e-06, "loss": 50.441, "step": 217 }, { "epoch": 5.191044776119403, "grad_norm": 35.74570846557617, "learning_rate": 7.488095238095239e-06, "loss": 48.5121, "step": 218 }, { "epoch": 5.214925373134328, "grad_norm": 30.92180824279785, "learning_rate": 7.476190476190477e-06, "loss": 49.4257, "step": 219 }, { "epoch": 5.2388059701492535, "grad_norm": 26.90997314453125, "learning_rate": 7.464285714285715e-06, "loss": 50.9712, "step": 220 }, { "epoch": 5.262686567164179, "grad_norm": 35.544700622558594, "learning_rate": 7.4523809523809534e-06, "loss": 49.7908, "step": 221 }, { "epoch": 5.286567164179105, "grad_norm": 33.78145217895508, "learning_rate": 7.440476190476191e-06, "loss": 49.2105, "step": 222 }, { "epoch": 5.3104477611940295, "grad_norm": 32.16508102416992, "learning_rate": 7.428571428571429e-06, "loss": 49.8545, "step": 223 }, { "epoch": 5.334328358208955, "grad_norm": 30.4263973236084, "learning_rate": 7.416666666666668e-06, "loss": 50.0994, "step": 224 }, { "epoch": 5.358208955223881, "grad_norm": 25.801084518432617, "learning_rate": 7.404761904761906e-06, "loss": 49.6227, "step": 225 }, { "epoch": 5.382089552238806, "grad_norm": 27.16851234436035, "learning_rate": 7.392857142857144e-06, "loss": 50.0005, "step": 226 }, { "epoch": 5.405970149253731, "grad_norm": 30.102867126464844, "learning_rate": 7.380952380952382e-06, "loss": 50.7114, "step": 227 }, { "epoch": 5.429850746268657, "grad_norm": 26.032968521118164, "learning_rate": 7.36904761904762e-06, "loss": 48.99, "step": 228 }, { "epoch": 5.453731343283582, "grad_norm": 24.074424743652344, "learning_rate": 7.357142857142858e-06, "loss": 49.6965, "step": 229 }, { "epoch": 5.477611940298507, "grad_norm": 24.5870361328125, "learning_rate": 7.345238095238096e-06, "loss": 48.8593, "step": 230 }, { "epoch": 5.501492537313433, "grad_norm": 22.831932067871094, "learning_rate": 7.333333333333333e-06, "loss": 49.9975, "step": 231 }, { "epoch": 5.525373134328358, "grad_norm": 26.88197135925293, "learning_rate": 7.321428571428572e-06, "loss": 49.5131, "step": 232 }, { "epoch": 5.549253731343284, "grad_norm": 28.986154556274414, "learning_rate": 7.30952380952381e-06, "loss": 48.9042, "step": 233 }, { "epoch": 5.573134328358209, "grad_norm": 17.220605850219727, "learning_rate": 7.297619047619048e-06, "loss": 49.453, "step": 234 }, { "epoch": 5.597014925373134, "grad_norm": 25.110107421875, "learning_rate": 7.285714285714286e-06, "loss": 49.8196, "step": 235 }, { "epoch": 5.62089552238806, "grad_norm": 34.680870056152344, "learning_rate": 7.273809523809524e-06, "loss": 49.9709, "step": 236 }, { "epoch": 5.6447761194029855, "grad_norm": 24.10121726989746, "learning_rate": 7.261904761904762e-06, "loss": 49.4685, "step": 237 }, { "epoch": 5.66865671641791, "grad_norm": 28.65550422668457, "learning_rate": 7.25e-06, "loss": 50.3439, "step": 238 }, { "epoch": 5.692537313432836, "grad_norm": 27.49604606628418, "learning_rate": 7.238095238095239e-06, "loss": 50.0419, "step": 239 }, { "epoch": 5.7164179104477615, "grad_norm": 22.843509674072266, "learning_rate": 7.226190476190477e-06, "loss": 49.0357, "step": 240 }, { "epoch": 5.740298507462686, "grad_norm": 36.56801223754883, "learning_rate": 7.2142857142857145e-06, "loss": 49.4478, "step": 241 }, { "epoch": 5.764179104477612, "grad_norm": 38.233734130859375, "learning_rate": 7.202380952380953e-06, "loss": 50.4473, "step": 242 }, { "epoch": 5.7880597014925375, "grad_norm": 29.198333740234375, "learning_rate": 7.190476190476191e-06, "loss": 49.8598, "step": 243 }, { "epoch": 5.811940298507462, "grad_norm": 34.49404525756836, "learning_rate": 7.178571428571429e-06, "loss": 49.1441, "step": 244 }, { "epoch": 5.835820895522388, "grad_norm": 35.568359375, "learning_rate": 7.166666666666667e-06, "loss": 49.9402, "step": 245 }, { "epoch": 5.859701492537313, "grad_norm": 31.041446685791016, "learning_rate": 7.154761904761906e-06, "loss": 50.1265, "step": 246 }, { "epoch": 5.883582089552239, "grad_norm": 48.34186935424805, "learning_rate": 7.1428571428571436e-06, "loss": 50.7649, "step": 247 }, { "epoch": 5.907462686567165, "grad_norm": 39.171661376953125, "learning_rate": 7.1309523809523814e-06, "loss": 48.943, "step": 248 }, { "epoch": 5.931343283582089, "grad_norm": 28.724523544311523, "learning_rate": 7.11904761904762e-06, "loss": 50.8039, "step": 249 }, { "epoch": 5.955223880597015, "grad_norm": 36.57830810546875, "learning_rate": 7.107142857142858e-06, "loss": 50.2311, "step": 250 }, { "epoch": 5.979104477611941, "grad_norm": 39.91551971435547, "learning_rate": 7.095238095238096e-06, "loss": 49.1617, "step": 251 }, { "epoch": 6.0, "grad_norm": 17.104145050048828, "learning_rate": 7.083333333333335e-06, "loss": 42.8003, "step": 252 }, { "epoch": 6.023880597014926, "grad_norm": 33.03441619873047, "learning_rate": 7.0714285714285726e-06, "loss": 48.2896, "step": 253 }, { "epoch": 6.04776119402985, "grad_norm": 26.487470626831055, "learning_rate": 7.0595238095238105e-06, "loss": 49.205, "step": 254 }, { "epoch": 6.071641791044776, "grad_norm": 26.752981185913086, "learning_rate": 7.047619047619048e-06, "loss": 50.3943, "step": 255 }, { "epoch": 6.095522388059702, "grad_norm": 22.44376564025879, "learning_rate": 7.035714285714287e-06, "loss": 49.285, "step": 256 }, { "epoch": 6.119402985074627, "grad_norm": 31.066368103027344, "learning_rate": 7.023809523809524e-06, "loss": 49.3131, "step": 257 }, { "epoch": 6.143283582089552, "grad_norm": 28.67262840270996, "learning_rate": 7.011904761904762e-06, "loss": 50.6188, "step": 258 }, { "epoch": 6.167164179104478, "grad_norm": 24.013134002685547, "learning_rate": 7e-06, "loss": 50.4382, "step": 259 }, { "epoch": 6.191044776119403, "grad_norm": 26.5673828125, "learning_rate": 6.988095238095239e-06, "loss": 49.7058, "step": 260 }, { "epoch": 6.214925373134328, "grad_norm": 20.803695678710938, "learning_rate": 6.9761904761904765e-06, "loss": 48.9389, "step": 261 }, { "epoch": 6.2388059701492535, "grad_norm": 23.450183868408203, "learning_rate": 6.964285714285714e-06, "loss": 49.0091, "step": 262 }, { "epoch": 6.262686567164179, "grad_norm": 36.94446563720703, "learning_rate": 6.952380952380952e-06, "loss": 50.2589, "step": 263 }, { "epoch": 6.286567164179105, "grad_norm": 39.548095703125, "learning_rate": 6.940476190476191e-06, "loss": 49.3129, "step": 264 }, { "epoch": 6.3104477611940295, "grad_norm": 30.536083221435547, "learning_rate": 6.928571428571429e-06, "loss": 49.1838, "step": 265 }, { "epoch": 6.334328358208955, "grad_norm": 27.97296714782715, "learning_rate": 6.916666666666667e-06, "loss": 50.3184, "step": 266 }, { "epoch": 6.358208955223881, "grad_norm": 25.69655418395996, "learning_rate": 6.9047619047619055e-06, "loss": 49.2226, "step": 267 }, { "epoch": 6.382089552238806, "grad_norm": 22.114097595214844, "learning_rate": 6.892857142857143e-06, "loss": 49.4455, "step": 268 }, { "epoch": 6.405970149253731, "grad_norm": 30.47511100769043, "learning_rate": 6.880952380952381e-06, "loss": 49.7409, "step": 269 }, { "epoch": 6.429850746268657, "grad_norm": 26.32929039001465, "learning_rate": 6.86904761904762e-06, "loss": 50.3336, "step": 270 }, { "epoch": 6.453731343283582, "grad_norm": 28.09309196472168, "learning_rate": 6.857142857142858e-06, "loss": 49.6044, "step": 271 }, { "epoch": 6.477611940298507, "grad_norm": 25.840974807739258, "learning_rate": 6.845238095238096e-06, "loss": 49.9185, "step": 272 }, { "epoch": 6.501492537313433, "grad_norm": 31.89126205444336, "learning_rate": 6.833333333333334e-06, "loss": 48.2732, "step": 273 }, { "epoch": 6.525373134328358, "grad_norm": 24.013029098510742, "learning_rate": 6.8214285714285724e-06, "loss": 49.9752, "step": 274 }, { "epoch": 6.549253731343284, "grad_norm": 25.509836196899414, "learning_rate": 6.80952380952381e-06, "loss": 50.5493, "step": 275 }, { "epoch": 6.573134328358209, "grad_norm": 35.25442886352539, "learning_rate": 6.797619047619048e-06, "loss": 49.2553, "step": 276 }, { "epoch": 6.597014925373134, "grad_norm": 29.42585563659668, "learning_rate": 6.785714285714287e-06, "loss": 48.776, "step": 277 }, { "epoch": 6.62089552238806, "grad_norm": 25.90894889831543, "learning_rate": 6.773809523809525e-06, "loss": 49.1964, "step": 278 }, { "epoch": 6.6447761194029855, "grad_norm": 25.63600730895996, "learning_rate": 6.761904761904763e-06, "loss": 48.4528, "step": 279 }, { "epoch": 6.66865671641791, "grad_norm": 29.943740844726562, "learning_rate": 6.750000000000001e-06, "loss": 49.9026, "step": 280 }, { "epoch": 6.692537313432836, "grad_norm": 33.253910064697266, "learning_rate": 6.738095238095239e-06, "loss": 49.2364, "step": 281 }, { "epoch": 6.7164179104477615, "grad_norm": 23.465354919433594, "learning_rate": 6.726190476190477e-06, "loss": 49.2759, "step": 282 }, { "epoch": 6.740298507462686, "grad_norm": 31.023218154907227, "learning_rate": 6.714285714285714e-06, "loss": 49.3256, "step": 283 }, { "epoch": 6.764179104477612, "grad_norm": 32.376991271972656, "learning_rate": 6.702380952380952e-06, "loss": 47.7239, "step": 284 }, { "epoch": 6.7880597014925375, "grad_norm": 18.388896942138672, "learning_rate": 6.690476190476191e-06, "loss": 49.5751, "step": 285 }, { "epoch": 6.811940298507462, "grad_norm": 22.1639404296875, "learning_rate": 6.678571428571429e-06, "loss": 48.8153, "step": 286 }, { "epoch": 6.835820895522388, "grad_norm": 31.39455223083496, "learning_rate": 6.666666666666667e-06, "loss": 49.7872, "step": 287 }, { "epoch": 6.859701492537313, "grad_norm": 36.480533599853516, "learning_rate": 6.654761904761905e-06, "loss": 48.9679, "step": 288 }, { "epoch": 6.883582089552239, "grad_norm": 23.432872772216797, "learning_rate": 6.642857142857143e-06, "loss": 48.4627, "step": 289 }, { "epoch": 6.907462686567165, "grad_norm": 33.31097412109375, "learning_rate": 6.630952380952381e-06, "loss": 49.617, "step": 290 }, { "epoch": 6.931343283582089, "grad_norm": 34.07685852050781, "learning_rate": 6.619047619047619e-06, "loss": 49.3365, "step": 291 }, { "epoch": 6.955223880597015, "grad_norm": 25.68811798095703, "learning_rate": 6.607142857142858e-06, "loss": 49.7821, "step": 292 }, { "epoch": 6.979104477611941, "grad_norm": 24.179588317871094, "learning_rate": 6.595238095238096e-06, "loss": 48.5466, "step": 293 }, { "epoch": 7.0, "grad_norm": 26.158781051635742, "learning_rate": 6.5833333333333335e-06, "loss": 43.2838, "step": 294 }, { "epoch": 7.023880597014926, "grad_norm": 22.84689712524414, "learning_rate": 6.571428571428572e-06, "loss": 49.3253, "step": 295 }, { "epoch": 7.04776119402985, "grad_norm": NaN, "learning_rate": 6.55952380952381e-06, "loss": 75.2786, "step": 296 }, { "epoch": 7.071641791044776, "grad_norm": 31.13886070251465, "learning_rate": 6.55952380952381e-06, "loss": 49.984, "step": 297 }, { "epoch": 7.095522388059702, "grad_norm": 32.37982940673828, "learning_rate": 6.547619047619048e-06, "loss": 49.6632, "step": 298 }, { "epoch": 7.119402985074627, "grad_norm": 22.977916717529297, "learning_rate": 6.535714285714286e-06, "loss": 48.7802, "step": 299 }, { "epoch": 7.143283582089552, "grad_norm": NaN, "learning_rate": 6.523809523809525e-06, "loss": 60.3381, "step": 300 }, { "epoch": 7.167164179104478, "grad_norm": 32.18650817871094, "learning_rate": 6.523809523809525e-06, "loss": 49.2689, "step": 301 }, { "epoch": 7.191044776119403, "grad_norm": 30.0800724029541, "learning_rate": 6.5119047619047626e-06, "loss": 49.3891, "step": 302 }, { "epoch": 7.214925373134328, "grad_norm": 32.35110855102539, "learning_rate": 6.5000000000000004e-06, "loss": 48.4497, "step": 303 }, { "epoch": 7.2388059701492535, "grad_norm": 34.08786392211914, "learning_rate": 6.488095238095239e-06, "loss": 49.1321, "step": 304 }, { "epoch": 7.262686567164179, "grad_norm": 25.25969696044922, "learning_rate": 6.476190476190477e-06, "loss": 49.0524, "step": 305 }, { "epoch": 7.286567164179105, "grad_norm": 25.843929290771484, "learning_rate": 6.464285714285715e-06, "loss": 49.8077, "step": 306 }, { "epoch": 7.3104477611940295, "grad_norm": 34.57284927368164, "learning_rate": 6.452380952380954e-06, "loss": 49.5393, "step": 307 }, { "epoch": 7.334328358208955, "grad_norm": 33.44814682006836, "learning_rate": 6.4404761904761916e-06, "loss": 49.0375, "step": 308 }, { "epoch": 7.358208955223881, "grad_norm": 25.127429962158203, "learning_rate": 6.4285714285714295e-06, "loss": 48.8145, "step": 309 }, { "epoch": 7.382089552238806, "grad_norm": 31.81999969482422, "learning_rate": 6.416666666666667e-06, "loss": 49.6432, "step": 310 }, { "epoch": 7.405970149253731, "grad_norm": 22.428335189819336, "learning_rate": 6.404761904761904e-06, "loss": 47.6232, "step": 311 }, { "epoch": 7.429850746268657, "grad_norm": 45.87803268432617, "learning_rate": 6.392857142857143e-06, "loss": 48.3479, "step": 312 }, { "epoch": 7.453731343283582, "grad_norm": 37.441253662109375, "learning_rate": 6.380952380952381e-06, "loss": 48.593, "step": 313 }, { "epoch": 7.477611940298507, "grad_norm": 23.15785789489746, "learning_rate": 6.369047619047619e-06, "loss": 49.1204, "step": 314 }, { "epoch": 7.501492537313433, "grad_norm": 35.8905029296875, "learning_rate": 6.357142857142858e-06, "loss": 49.2918, "step": 315 }, { "epoch": 7.525373134328358, "grad_norm": 37.41954040527344, "learning_rate": 6.3452380952380955e-06, "loss": 47.7495, "step": 316 }, { "epoch": 7.549253731343284, "grad_norm": 31.173114776611328, "learning_rate": 6.333333333333333e-06, "loss": 49.539, "step": 317 }, { "epoch": 7.573134328358209, "grad_norm": 23.941965103149414, "learning_rate": 6.321428571428571e-06, "loss": 49.0958, "step": 318 }, { "epoch": 7.597014925373134, "grad_norm": 31.949769973754883, "learning_rate": 6.30952380952381e-06, "loss": 49.1945, "step": 319 }, { "epoch": 7.62089552238806, "grad_norm": 21.299409866333008, "learning_rate": 6.297619047619048e-06, "loss": 49.3823, "step": 320 }, { "epoch": 7.6447761194029855, "grad_norm": 34.93647766113281, "learning_rate": 6.285714285714286e-06, "loss": 48.8867, "step": 321 }, { "epoch": 7.66865671641791, "grad_norm": 30.189655303955078, "learning_rate": 6.2738095238095245e-06, "loss": 49.8644, "step": 322 }, { "epoch": 7.692537313432836, "grad_norm": 19.964523315429688, "learning_rate": 6.261904761904762e-06, "loss": 49.6489, "step": 323 }, { "epoch": 7.7164179104477615, "grad_norm": 22.253337860107422, "learning_rate": 6.25e-06, "loss": 48.0582, "step": 324 }, { "epoch": 7.740298507462686, "grad_norm": 26.631391525268555, "learning_rate": 6.238095238095239e-06, "loss": 48.5585, "step": 325 }, { "epoch": 7.764179104477612, "grad_norm": 26.0469913482666, "learning_rate": 6.226190476190477e-06, "loss": 49.4969, "step": 326 }, { "epoch": 7.7880597014925375, "grad_norm": 30.000507354736328, "learning_rate": 6.214285714285715e-06, "loss": 49.6044, "step": 327 }, { "epoch": 7.811940298507462, "grad_norm": 29.44800567626953, "learning_rate": 6.202380952380953e-06, "loss": 50.3622, "step": 328 }, { "epoch": 7.835820895522388, "grad_norm": 24.83717918395996, "learning_rate": 6.1904761904761914e-06, "loss": 50.0974, "step": 329 }, { "epoch": 7.859701492537313, "grad_norm": 30.0760555267334, "learning_rate": 6.178571428571429e-06, "loss": 48.9307, "step": 330 }, { "epoch": 7.883582089552239, "grad_norm": 21.087966918945312, "learning_rate": 6.166666666666667e-06, "loss": 49.3432, "step": 331 }, { "epoch": 7.907462686567165, "grad_norm": 23.193716049194336, "learning_rate": 6.154761904761906e-06, "loss": 48.6664, "step": 332 }, { "epoch": 7.931343283582089, "grad_norm": 22.764123916625977, "learning_rate": 6.142857142857144e-06, "loss": 49.3497, "step": 333 }, { "epoch": 7.955223880597015, "grad_norm": 22.411897659301758, "learning_rate": 6.130952380952382e-06, "loss": 49.4106, "step": 334 }, { "epoch": 7.979104477611941, "grad_norm": 29.535375595092773, "learning_rate": 6.11904761904762e-06, "loss": 49.0695, "step": 335 }, { "epoch": 8.0, "grad_norm": 21.094457626342773, "learning_rate": 6.107142857142858e-06, "loss": 42.1367, "step": 336 }, { "epoch": 8.023880597014925, "grad_norm": 33.74859619140625, "learning_rate": 6.095238095238096e-06, "loss": 47.0065, "step": 337 }, { "epoch": 8.047761194029851, "grad_norm": 32.539127349853516, "learning_rate": 6.083333333333333e-06, "loss": 47.9697, "step": 338 }, { "epoch": 8.071641791044776, "grad_norm": 19.168655395507812, "learning_rate": 6.071428571428571e-06, "loss": 49.4919, "step": 339 }, { "epoch": 8.0955223880597, "grad_norm": 30.041269302368164, "learning_rate": 6.05952380952381e-06, "loss": 48.7887, "step": 340 }, { "epoch": 8.119402985074627, "grad_norm": 21.070598602294922, "learning_rate": 6.047619047619048e-06, "loss": 48.5064, "step": 341 }, { "epoch": 8.143283582089552, "grad_norm": 29.560287475585938, "learning_rate": 6.035714285714286e-06, "loss": 47.7472, "step": 342 }, { "epoch": 8.167164179104478, "grad_norm": 24.256393432617188, "learning_rate": 6.023809523809524e-06, "loss": 48.8917, "step": 343 }, { "epoch": 8.191044776119403, "grad_norm": 29.970674514770508, "learning_rate": 6.011904761904762e-06, "loss": 48.3464, "step": 344 }, { "epoch": 8.214925373134328, "grad_norm": 25.274595260620117, "learning_rate": 6e-06, "loss": 49.1565, "step": 345 }, { "epoch": 8.238805970149254, "grad_norm": 24.000280380249023, "learning_rate": 5.988095238095238e-06, "loss": 49.3396, "step": 346 }, { "epoch": 8.26268656716418, "grad_norm": 25.110261917114258, "learning_rate": 5.976190476190477e-06, "loss": 49.786, "step": 347 }, { "epoch": 8.286567164179104, "grad_norm": 26.188514709472656, "learning_rate": 5.964285714285715e-06, "loss": 50.1652, "step": 348 }, { "epoch": 8.31044776119403, "grad_norm": 18.536714553833008, "learning_rate": 5.9523809523809525e-06, "loss": 49.7224, "step": 349 }, { "epoch": 8.334328358208955, "grad_norm": 33.79502868652344, "learning_rate": 5.940476190476191e-06, "loss": 48.2923, "step": 350 }, { "epoch": 8.35820895522388, "grad_norm": 33.03609085083008, "learning_rate": 5.928571428571429e-06, "loss": 49.128, "step": 351 }, { "epoch": 8.382089552238806, "grad_norm": 23.88555145263672, "learning_rate": 5.916666666666667e-06, "loss": 49.6072, "step": 352 }, { "epoch": 8.405970149253731, "grad_norm": 29.688135147094727, "learning_rate": 5.904761904761905e-06, "loss": 49.0984, "step": 353 }, { "epoch": 8.429850746268656, "grad_norm": 23.166162490844727, "learning_rate": 5.892857142857144e-06, "loss": 48.8104, "step": 354 }, { "epoch": 8.453731343283582, "grad_norm": 27.68876838684082, "learning_rate": 5.8809523809523816e-06, "loss": 48.7745, "step": 355 }, { "epoch": 8.477611940298507, "grad_norm": 26.520286560058594, "learning_rate": 5.8690476190476194e-06, "loss": 47.883, "step": 356 }, { "epoch": 8.501492537313434, "grad_norm": 28.830135345458984, "learning_rate": 5.857142857142858e-06, "loss": 49.1347, "step": 357 }, { "epoch": 8.525373134328358, "grad_norm": 27.387250900268555, "learning_rate": 5.845238095238096e-06, "loss": 48.2092, "step": 358 }, { "epoch": 8.549253731343283, "grad_norm": 23.53616714477539, "learning_rate": 5.833333333333334e-06, "loss": 48.437, "step": 359 }, { "epoch": 8.57313432835821, "grad_norm": 25.665664672851562, "learning_rate": 5.821428571428573e-06, "loss": 49.3006, "step": 360 }, { "epoch": 8.597014925373134, "grad_norm": 24.35331153869629, "learning_rate": 5.8095238095238106e-06, "loss": 49.5249, "step": 361 }, { "epoch": 8.620895522388059, "grad_norm": 28.612688064575195, "learning_rate": 5.7976190476190485e-06, "loss": 50.1344, "step": 362 }, { "epoch": 8.644776119402986, "grad_norm": 25.055545806884766, "learning_rate": 5.785714285714286e-06, "loss": 48.6014, "step": 363 }, { "epoch": 8.66865671641791, "grad_norm": 27.645490646362305, "learning_rate": 5.773809523809523e-06, "loss": 48.953, "step": 364 }, { "epoch": 8.692537313432837, "grad_norm": 26.791471481323242, "learning_rate": 5.761904761904762e-06, "loss": 49.5912, "step": 365 }, { "epoch": 8.716417910447761, "grad_norm": 27.57213592529297, "learning_rate": 5.75e-06, "loss": 48.9958, "step": 366 }, { "epoch": 8.740298507462686, "grad_norm": 20.936344146728516, "learning_rate": 5.738095238095238e-06, "loss": 48.3449, "step": 367 }, { "epoch": 8.764179104477613, "grad_norm": 31.695810317993164, "learning_rate": 5.726190476190477e-06, "loss": 49.1015, "step": 368 }, { "epoch": 8.788059701492537, "grad_norm": 31.584064483642578, "learning_rate": 5.7142857142857145e-06, "loss": 48.8249, "step": 369 }, { "epoch": 8.811940298507462, "grad_norm": 30.70412826538086, "learning_rate": 5.702380952380952e-06, "loss": 49.2984, "step": 370 }, { "epoch": 8.835820895522389, "grad_norm": 36.31315231323242, "learning_rate": 5.690476190476191e-06, "loss": 48.6769, "step": 371 }, { "epoch": 8.859701492537313, "grad_norm": 28.98838996887207, "learning_rate": 5.678571428571429e-06, "loss": 50.2101, "step": 372 }, { "epoch": 8.883582089552238, "grad_norm": 29.07052230834961, "learning_rate": 5.666666666666667e-06, "loss": 49.9206, "step": 373 }, { "epoch": 8.907462686567165, "grad_norm": 31.653087615966797, "learning_rate": 5.654761904761905e-06, "loss": 48.3035, "step": 374 }, { "epoch": 8.93134328358209, "grad_norm": 27.019704818725586, "learning_rate": 5.6428571428571435e-06, "loss": 48.6833, "step": 375 }, { "epoch": 8.955223880597014, "grad_norm": 30.919578552246094, "learning_rate": 5.630952380952381e-06, "loss": 47.7973, "step": 376 }, { "epoch": 8.97910447761194, "grad_norm": 28.002975463867188, "learning_rate": 5.619047619047619e-06, "loss": 49.5539, "step": 377 }, { "epoch": 9.0, "grad_norm": 27.587263107299805, "learning_rate": 5.607142857142858e-06, "loss": 42.9343, "step": 378 }, { "epoch": 9.023880597014925, "grad_norm": 31.024024963378906, "learning_rate": 5.595238095238096e-06, "loss": 48.6774, "step": 379 }, { "epoch": 9.047761194029851, "grad_norm": 27.262426376342773, "learning_rate": 5.583333333333334e-06, "loss": 47.8833, "step": 380 }, { "epoch": 9.071641791044776, "grad_norm": 29.223133087158203, "learning_rate": 5.571428571428572e-06, "loss": 47.1563, "step": 381 }, { "epoch": 9.0955223880597, "grad_norm": 21.004749298095703, "learning_rate": 5.5595238095238104e-06, "loss": 49.0407, "step": 382 }, { "epoch": 9.119402985074627, "grad_norm": 25.157907485961914, "learning_rate": 5.547619047619048e-06, "loss": 48.5233, "step": 383 }, { "epoch": 9.143283582089552, "grad_norm": 17.611478805541992, "learning_rate": 5.535714285714286e-06, "loss": 47.4846, "step": 384 }, { "epoch": 9.167164179104478, "grad_norm": 21.00395965576172, "learning_rate": 5.523809523809525e-06, "loss": 49.7533, "step": 385 }, { "epoch": 9.191044776119403, "grad_norm": 22.07697296142578, "learning_rate": 5.511904761904763e-06, "loss": 48.5003, "step": 386 }, { "epoch": 9.214925373134328, "grad_norm": 21.743778228759766, "learning_rate": 5.500000000000001e-06, "loss": 48.149, "step": 387 }, { "epoch": 9.238805970149254, "grad_norm": 23.499980926513672, "learning_rate": 5.4880952380952394e-06, "loss": 48.2213, "step": 388 }, { "epoch": 9.26268656716418, "grad_norm": 22.22580337524414, "learning_rate": 5.476190476190477e-06, "loss": 48.4671, "step": 389 }, { "epoch": 9.286567164179104, "grad_norm": 26.5915470123291, "learning_rate": 5.464285714285714e-06, "loss": 49.2343, "step": 390 }, { "epoch": 9.31044776119403, "grad_norm": 22.510892868041992, "learning_rate": 5.452380952380952e-06, "loss": 48.9363, "step": 391 }, { "epoch": 9.334328358208955, "grad_norm": 27.17405128479004, "learning_rate": 5.44047619047619e-06, "loss": 49.1814, "step": 392 }, { "epoch": 9.35820895522388, "grad_norm": 29.143529891967773, "learning_rate": 5.428571428571429e-06, "loss": 48.4786, "step": 393 }, { "epoch": 9.382089552238806, "grad_norm": 20.24784278869629, "learning_rate": 5.416666666666667e-06, "loss": 49.2987, "step": 394 }, { "epoch": 9.405970149253731, "grad_norm": 31.44426155090332, "learning_rate": 5.404761904761905e-06, "loss": 49.9466, "step": 395 }, { "epoch": 9.429850746268656, "grad_norm": 23.775951385498047, "learning_rate": 5.392857142857143e-06, "loss": 49.1681, "step": 396 }, { "epoch": 9.453731343283582, "grad_norm": 22.168636322021484, "learning_rate": 5.380952380952381e-06, "loss": 48.8523, "step": 397 }, { "epoch": 9.477611940298507, "grad_norm": 20.944936752319336, "learning_rate": 5.369047619047619e-06, "loss": 48.7369, "step": 398 }, { "epoch": 9.501492537313434, "grad_norm": 23.880292892456055, "learning_rate": 5.357142857142857e-06, "loss": 48.4703, "step": 399 }, { "epoch": 9.525373134328358, "grad_norm": 25.316978454589844, "learning_rate": 5.345238095238096e-06, "loss": 48.3752, "step": 400 }, { "epoch": 9.549253731343283, "grad_norm": 24.398311614990234, "learning_rate": 5.333333333333334e-06, "loss": 47.532, "step": 401 }, { "epoch": 9.57313432835821, "grad_norm": 23.157140731811523, "learning_rate": 5.3214285714285715e-06, "loss": 49.1824, "step": 402 }, { "epoch": 9.597014925373134, "grad_norm": 21.641061782836914, "learning_rate": 5.30952380952381e-06, "loss": 49.6601, "step": 403 }, { "epoch": 9.620895522388059, "grad_norm": 23.863712310791016, "learning_rate": 5.297619047619048e-06, "loss": 49.2146, "step": 404 }, { "epoch": 9.644776119402986, "grad_norm": 21.876007080078125, "learning_rate": 5.285714285714286e-06, "loss": 48.0027, "step": 405 }, { "epoch": 9.66865671641791, "grad_norm": 25.783042907714844, "learning_rate": 5.273809523809525e-06, "loss": 48.2702, "step": 406 }, { "epoch": 9.692537313432837, "grad_norm": 18.782087326049805, "learning_rate": 5.261904761904763e-06, "loss": 48.9365, "step": 407 }, { "epoch": 9.716417910447761, "grad_norm": 20.206588745117188, "learning_rate": 5.2500000000000006e-06, "loss": 49.4144, "step": 408 }, { "epoch": 9.740298507462686, "grad_norm": 20.98710823059082, "learning_rate": 5.2380952380952384e-06, "loss": 49.7442, "step": 409 }, { "epoch": 9.764179104477613, "grad_norm": 19.24452018737793, "learning_rate": 5.226190476190477e-06, "loss": 49.0249, "step": 410 }, { "epoch": 9.788059701492537, "grad_norm": 23.18075180053711, "learning_rate": 5.214285714285715e-06, "loss": 48.8795, "step": 411 }, { "epoch": 9.811940298507462, "grad_norm": 17.233261108398438, "learning_rate": 5.202380952380953e-06, "loss": 49.2985, "step": 412 }, { "epoch": 9.835820895522389, "grad_norm": 24.74007797241211, "learning_rate": 5.190476190476192e-06, "loss": 48.8793, "step": 413 }, { "epoch": 9.859701492537313, "grad_norm": 20.26863670349121, "learning_rate": 5.1785714285714296e-06, "loss": 49.6989, "step": 414 }, { "epoch": 9.883582089552238, "grad_norm": 26.168167114257812, "learning_rate": 5.1666666666666675e-06, "loss": 48.7413, "step": 415 }, { "epoch": 9.907462686567165, "grad_norm": 29.008501052856445, "learning_rate": 5.1547619047619045e-06, "loss": 48.7414, "step": 416 }, { "epoch": 9.93134328358209, "grad_norm": 18.459829330444336, "learning_rate": 5.142857142857142e-06, "loss": 47.7865, "step": 417 }, { "epoch": 9.955223880597014, "grad_norm": 20.898181915283203, "learning_rate": 5.130952380952381e-06, "loss": 47.7274, "step": 418 }, { "epoch": 9.97910447761194, "grad_norm": 23.5065860748291, "learning_rate": 5.119047619047619e-06, "loss": 48.471, "step": 419 }, { "epoch": 10.0, "grad_norm": 23.147043228149414, "learning_rate": 5.107142857142857e-06, "loss": 42.3971, "step": 420 }, { "epoch": 10.023880597014925, "grad_norm": 28.423707962036133, "learning_rate": 5.095238095238096e-06, "loss": 49.4977, "step": 421 }, { "epoch": 10.047761194029851, "grad_norm": 22.017820358276367, "learning_rate": 5.0833333333333335e-06, "loss": 47.0638, "step": 422 }, { "epoch": 10.071641791044776, "grad_norm": 18.173845291137695, "learning_rate": 5.071428571428571e-06, "loss": 48.338, "step": 423 }, { "epoch": 10.0955223880597, "grad_norm": 17.628551483154297, "learning_rate": 5.05952380952381e-06, "loss": 48.2847, "step": 424 }, { "epoch": 10.119402985074627, "grad_norm": 19.974040985107422, "learning_rate": 5.047619047619048e-06, "loss": 49.2284, "step": 425 }, { "epoch": 10.143283582089552, "grad_norm": 22.45549774169922, "learning_rate": 5.035714285714286e-06, "loss": 49.6345, "step": 426 }, { "epoch": 10.167164179104478, "grad_norm": 21.609479904174805, "learning_rate": 5.023809523809524e-06, "loss": 48.2098, "step": 427 }, { "epoch": 10.191044776119403, "grad_norm": 24.7137451171875, "learning_rate": 5.0119047619047625e-06, "loss": 47.9527, "step": 428 }, { "epoch": 10.214925373134328, "grad_norm": 22.888975143432617, "learning_rate": 5e-06, "loss": 49.781, "step": 429 }, { "epoch": 10.238805970149254, "grad_norm": 25.53217124938965, "learning_rate": 4.988095238095238e-06, "loss": 48.9902, "step": 430 }, { "epoch": 10.26268656716418, "grad_norm": 27.80384063720703, "learning_rate": 4.976190476190477e-06, "loss": 48.2545, "step": 431 }, { "epoch": 10.286567164179104, "grad_norm": 21.421342849731445, "learning_rate": 4.964285714285715e-06, "loss": 49.1483, "step": 432 }, { "epoch": 10.31044776119403, "grad_norm": 26.178152084350586, "learning_rate": 4.952380952380953e-06, "loss": 49.1129, "step": 433 }, { "epoch": 10.334328358208955, "grad_norm": 27.993371963500977, "learning_rate": 4.940476190476191e-06, "loss": 48.1783, "step": 434 }, { "epoch": 10.35820895522388, "grad_norm": 26.75821876525879, "learning_rate": 4.928571428571429e-06, "loss": 48.1773, "step": 435 }, { "epoch": 10.382089552238806, "grad_norm": 25.641353607177734, "learning_rate": 4.9166666666666665e-06, "loss": 48.9295, "step": 436 }, { "epoch": 10.405970149253731, "grad_norm": 23.26271629333496, "learning_rate": 4.904761904761905e-06, "loss": 49.5486, "step": 437 }, { "epoch": 10.429850746268656, "grad_norm": 23.637466430664062, "learning_rate": 4.892857142857143e-06, "loss": 48.1263, "step": 438 }, { "epoch": 10.453731343283582, "grad_norm": 29.285432815551758, "learning_rate": 4.880952380952381e-06, "loss": 48.2424, "step": 439 }, { "epoch": 10.477611940298507, "grad_norm": 29.91914939880371, "learning_rate": 4.86904761904762e-06, "loss": 48.3695, "step": 440 }, { "epoch": 10.501492537313434, "grad_norm": 25.249099731445312, "learning_rate": 4.857142857142858e-06, "loss": 48.3644, "step": 441 }, { "epoch": 10.525373134328358, "grad_norm": 22.37591552734375, "learning_rate": 4.8452380952380955e-06, "loss": 49.397, "step": 442 }, { "epoch": 10.549253731343283, "grad_norm": 22.805437088012695, "learning_rate": 4.833333333333333e-06, "loss": 48.6522, "step": 443 }, { "epoch": 10.57313432835821, "grad_norm": 21.229095458984375, "learning_rate": 4.821428571428572e-06, "loss": 47.6681, "step": 444 }, { "epoch": 10.597014925373134, "grad_norm": 23.359468460083008, "learning_rate": 4.80952380952381e-06, "loss": 48.602, "step": 445 }, { "epoch": 10.620895522388059, "grad_norm": 20.953310012817383, "learning_rate": 4.797619047619048e-06, "loss": 49.3366, "step": 446 }, { "epoch": 10.644776119402986, "grad_norm": 21.970388412475586, "learning_rate": 4.785714285714287e-06, "loss": 46.964, "step": 447 }, { "epoch": 10.66865671641791, "grad_norm": 24.282426834106445, "learning_rate": 4.7738095238095245e-06, "loss": 48.2676, "step": 448 }, { "epoch": 10.692537313432837, "grad_norm": 15.47967529296875, "learning_rate": 4.761904761904762e-06, "loss": 48.1993, "step": 449 }, { "epoch": 10.716417910447761, "grad_norm": 23.230947494506836, "learning_rate": 4.75e-06, "loss": 48.5229, "step": 450 }, { "epoch": 10.740298507462686, "grad_norm": 20.514225006103516, "learning_rate": 4.738095238095238e-06, "loss": 48.062, "step": 451 }, { "epoch": 10.764179104477613, "grad_norm": 19.060667037963867, "learning_rate": 4.726190476190476e-06, "loss": 48.3893, "step": 452 }, { "epoch": 10.788059701492537, "grad_norm": 29.78558349609375, "learning_rate": 4.714285714285715e-06, "loss": 48.9921, "step": 453 }, { "epoch": 10.811940298507462, "grad_norm": 23.262001037597656, "learning_rate": 4.702380952380953e-06, "loss": 48.5597, "step": 454 }, { "epoch": 10.835820895522389, "grad_norm": 25.83403778076172, "learning_rate": 4.6904761904761905e-06, "loss": 49.2911, "step": 455 }, { "epoch": 10.859701492537313, "grad_norm": 21.846391677856445, "learning_rate": 4.678571428571429e-06, "loss": 47.3256, "step": 456 }, { "epoch": 10.883582089552238, "grad_norm": 17.09532356262207, "learning_rate": 4.666666666666667e-06, "loss": 48.3647, "step": 457 }, { "epoch": 10.907462686567165, "grad_norm": 31.050525665283203, "learning_rate": 4.654761904761905e-06, "loss": 48.3605, "step": 458 }, { "epoch": 10.93134328358209, "grad_norm": 22.532379150390625, "learning_rate": 4.642857142857144e-06, "loss": 49.0826, "step": 459 }, { "epoch": 10.955223880597014, "grad_norm": 23.585033416748047, "learning_rate": 4.630952380952382e-06, "loss": 48.5111, "step": 460 }, { "epoch": 10.97910447761194, "grad_norm": NaN, "learning_rate": 4.6190476190476196e-06, "loss": 66.9717, "step": 461 }, { "epoch": 11.0, "grad_norm": 24.73590087890625, "learning_rate": 4.6190476190476196e-06, "loss": 41.9122, "step": 462 }, { "epoch": 11.023880597014925, "grad_norm": 27.4709415435791, "learning_rate": 4.6071428571428574e-06, "loss": 48.4682, "step": 463 }, { "epoch": 11.047761194029851, "grad_norm": 26.158245086669922, "learning_rate": 4.595238095238095e-06, "loss": 48.1845, "step": 464 }, { "epoch": 11.071641791044776, "grad_norm": 25.14693260192871, "learning_rate": 4.583333333333333e-06, "loss": 48.4229, "step": 465 }, { "epoch": 11.0955223880597, "grad_norm": 22.229764938354492, "learning_rate": 4.571428571428572e-06, "loss": 47.8876, "step": 466 }, { "epoch": 11.119402985074627, "grad_norm": 24.202686309814453, "learning_rate": 4.55952380952381e-06, "loss": 48.4304, "step": 467 }, { "epoch": 11.143283582089552, "grad_norm": 21.449726104736328, "learning_rate": 4.547619047619048e-06, "loss": 47.6457, "step": 468 }, { "epoch": 11.167164179104478, "grad_norm": 23.769763946533203, "learning_rate": 4.5357142857142865e-06, "loss": 49.1031, "step": 469 }, { "epoch": 11.191044776119403, "grad_norm": 21.20684814453125, "learning_rate": 4.523809523809524e-06, "loss": 47.6488, "step": 470 }, { "epoch": 11.214925373134328, "grad_norm": 17.992631912231445, "learning_rate": 4.511904761904762e-06, "loss": 47.9435, "step": 471 }, { "epoch": 11.238805970149254, "grad_norm": 22.017776489257812, "learning_rate": 4.5e-06, "loss": 48.5224, "step": 472 }, { "epoch": 11.26268656716418, "grad_norm": 22.98673439025879, "learning_rate": 4.488095238095239e-06, "loss": 47.9258, "step": 473 }, { "epoch": 11.286567164179104, "grad_norm": 16.146743774414062, "learning_rate": 4.476190476190477e-06, "loss": 48.3957, "step": 474 }, { "epoch": 11.31044776119403, "grad_norm": 23.30071258544922, "learning_rate": 4.464285714285715e-06, "loss": 48.5472, "step": 475 }, { "epoch": 11.334328358208955, "grad_norm": 24.949913024902344, "learning_rate": 4.4523809523809525e-06, "loss": 48.2387, "step": 476 }, { "epoch": 11.35820895522388, "grad_norm": 23.10662841796875, "learning_rate": 4.44047619047619e-06, "loss": 49.0681, "step": 477 }, { "epoch": 11.382089552238806, "grad_norm": 19.024614334106445, "learning_rate": 4.428571428571429e-06, "loss": 49.3255, "step": 478 }, { "epoch": 11.405970149253731, "grad_norm": 22.34437370300293, "learning_rate": 4.416666666666667e-06, "loss": 47.0069, "step": 479 }, { "epoch": 11.429850746268656, "grad_norm": 23.563596725463867, "learning_rate": 4.404761904761905e-06, "loss": 46.8188, "step": 480 }, { "epoch": 11.453731343283582, "grad_norm": 20.5488338470459, "learning_rate": 4.392857142857143e-06, "loss": 47.8277, "step": 481 }, { "epoch": 11.477611940298507, "grad_norm": 18.416519165039062, "learning_rate": 4.3809523809523815e-06, "loss": 48.2203, "step": 482 }, { "epoch": 11.501492537313434, "grad_norm": 28.21132469177246, "learning_rate": 4.369047619047619e-06, "loss": 48.0691, "step": 483 }, { "epoch": 11.525373134328358, "grad_norm": 21.36182975769043, "learning_rate": 4.357142857142857e-06, "loss": 48.273, "step": 484 }, { "epoch": 11.549253731343283, "grad_norm": 25.726530075073242, "learning_rate": 4.345238095238096e-06, "loss": 48.7529, "step": 485 }, { "epoch": 11.57313432835821, "grad_norm": 21.686412811279297, "learning_rate": 4.333333333333334e-06, "loss": 48.3005, "step": 486 }, { "epoch": 11.597014925373134, "grad_norm": 20.56638526916504, "learning_rate": 4.321428571428572e-06, "loss": 50.1248, "step": 487 }, { "epoch": 11.620895522388059, "grad_norm": 24.193323135375977, "learning_rate": 4.30952380952381e-06, "loss": 48.6031, "step": 488 }, { "epoch": 11.644776119402986, "grad_norm": 17.18548583984375, "learning_rate": 4.297619047619048e-06, "loss": 49.2039, "step": 489 }, { "epoch": 11.66865671641791, "grad_norm": 19.07050895690918, "learning_rate": 4.2857142857142855e-06, "loss": 48.0961, "step": 490 }, { "epoch": 11.692537313432837, "grad_norm": 19.831188201904297, "learning_rate": 4.273809523809524e-06, "loss": 48.5481, "step": 491 }, { "epoch": 11.716417910447761, "grad_norm": 23.408592224121094, "learning_rate": 4.261904761904762e-06, "loss": 48.583, "step": 492 }, { "epoch": 11.740298507462686, "grad_norm": 22.152788162231445, "learning_rate": 4.25e-06, "loss": 48.6684, "step": 493 }, { "epoch": 11.764179104477613, "grad_norm": NaN, "learning_rate": 4.238095238095239e-06, "loss": 54.7097, "step": 494 }, { "epoch": 11.788059701492537, "grad_norm": 23.1225528717041, "learning_rate": 4.238095238095239e-06, "loss": 48.5439, "step": 495 }, { "epoch": 11.811940298507462, "grad_norm": 24.673904418945312, "learning_rate": 4.226190476190477e-06, "loss": 48.2645, "step": 496 }, { "epoch": 11.835820895522389, "grad_norm": 23.318784713745117, "learning_rate": 4.2142857142857145e-06, "loss": 47.9159, "step": 497 }, { "epoch": 11.859701492537313, "grad_norm": 24.62889289855957, "learning_rate": 4.202380952380952e-06, "loss": 48.1392, "step": 498 }, { "epoch": 11.883582089552238, "grad_norm": 17.315168380737305, "learning_rate": 4.190476190476191e-06, "loss": 49.399, "step": 499 }, { "epoch": 11.907462686567165, "grad_norm": 24.458532333374023, "learning_rate": 4.178571428571429e-06, "loss": 49.2189, "step": 500 }, { "epoch": 11.93134328358209, "grad_norm": 28.294036865234375, "learning_rate": 4.166666666666667e-06, "loss": 48.4759, "step": 501 }, { "epoch": 11.955223880597014, "grad_norm": 22.393577575683594, "learning_rate": 4.154761904761906e-06, "loss": 48.9718, "step": 502 }, { "epoch": 11.97910447761194, "grad_norm": 20.199522018432617, "learning_rate": 4.1428571428571435e-06, "loss": 47.5364, "step": 503 }, { "epoch": 12.0, "grad_norm": 22.080204010009766, "learning_rate": 4.130952380952381e-06, "loss": 42.5308, "step": 504 }, { "epoch": 12.023880597014925, "grad_norm": 28.897024154663086, "learning_rate": 4.119047619047619e-06, "loss": 48.9022, "step": 505 }, { "epoch": 12.047761194029851, "grad_norm": 28.31342887878418, "learning_rate": 4.107142857142857e-06, "loss": 47.6489, "step": 506 }, { "epoch": 12.071641791044776, "grad_norm": 22.62079620361328, "learning_rate": 4.095238095238096e-06, "loss": 48.1606, "step": 507 }, { "epoch": 12.0955223880597, "grad_norm": 33.49858474731445, "learning_rate": 4.083333333333334e-06, "loss": 47.8462, "step": 508 }, { "epoch": 12.119402985074627, "grad_norm": 22.20858383178711, "learning_rate": 4.071428571428572e-06, "loss": 47.2505, "step": 509 }, { "epoch": 12.143283582089552, "grad_norm": 25.425495147705078, "learning_rate": 4.0595238095238095e-06, "loss": 48.6289, "step": 510 }, { "epoch": 12.167164179104478, "grad_norm": 29.32784652709961, "learning_rate": 4.047619047619048e-06, "loss": 47.7772, "step": 511 }, { "epoch": 12.191044776119403, "grad_norm": 20.661781311035156, "learning_rate": 4.035714285714286e-06, "loss": 47.1414, "step": 512 }, { "epoch": 12.214925373134328, "grad_norm": 31.4210205078125, "learning_rate": 4.023809523809524e-06, "loss": 47.4312, "step": 513 }, { "epoch": 12.238805970149254, "grad_norm": 32.390071868896484, "learning_rate": 4.011904761904763e-06, "loss": 49.9899, "step": 514 }, { "epoch": 12.26268656716418, "grad_norm": 17.431835174560547, "learning_rate": 4.000000000000001e-06, "loss": 48.8975, "step": 515 }, { "epoch": 12.286567164179104, "grad_norm": 29.32766342163086, "learning_rate": 3.9880952380952386e-06, "loss": 48.8764, "step": 516 }, { "epoch": 12.31044776119403, "grad_norm": 29.523069381713867, "learning_rate": 3.9761904761904764e-06, "loss": 48.2602, "step": 517 }, { "epoch": 12.334328358208955, "grad_norm": 23.866840362548828, "learning_rate": 3.964285714285714e-06, "loss": 47.4016, "step": 518 }, { "epoch": 12.35820895522388, "grad_norm": 27.464962005615234, "learning_rate": 3.952380952380952e-06, "loss": 48.2559, "step": 519 }, { "epoch": 12.382089552238806, "grad_norm": 19.796552658081055, "learning_rate": 3.940476190476191e-06, "loss": 48.7665, "step": 520 }, { "epoch": 12.405970149253731, "grad_norm": 18.637983322143555, "learning_rate": 3.928571428571429e-06, "loss": 48.1456, "step": 521 }, { "epoch": 12.429850746268656, "grad_norm": 22.065799713134766, "learning_rate": 3.916666666666667e-06, "loss": 48.7803, "step": 522 }, { "epoch": 12.453731343283582, "grad_norm": 22.648218154907227, "learning_rate": 3.9047619047619055e-06, "loss": 47.3376, "step": 523 }, { "epoch": 12.477611940298507, "grad_norm": 17.55946922302246, "learning_rate": 3.892857142857143e-06, "loss": 47.6002, "step": 524 }, { "epoch": 12.501492537313434, "grad_norm": 19.173139572143555, "learning_rate": 3.880952380952381e-06, "loss": 48.8976, "step": 525 }, { "epoch": 12.525373134328358, "grad_norm": 24.052696228027344, "learning_rate": 3.869047619047619e-06, "loss": 48.1851, "step": 526 }, { "epoch": 12.549253731343283, "grad_norm": 19.28683090209961, "learning_rate": 3.857142857142858e-06, "loss": 48.0342, "step": 527 }, { "epoch": 12.57313432835821, "grad_norm": 21.528470993041992, "learning_rate": 3.845238095238096e-06, "loss": 49.3597, "step": 528 }, { "epoch": 12.597014925373134, "grad_norm": 22.880159378051758, "learning_rate": 3.833333333333334e-06, "loss": 47.9594, "step": 529 }, { "epoch": 12.620895522388059, "grad_norm": 19.00438117980957, "learning_rate": 3.8214285714285715e-06, "loss": 47.2837, "step": 530 }, { "epoch": 12.644776119402986, "grad_norm": 22.21845054626465, "learning_rate": 3.80952380952381e-06, "loss": 47.1453, "step": 531 }, { "epoch": 12.66865671641791, "grad_norm": 18.551712036132812, "learning_rate": 3.7976190476190477e-06, "loss": 47.9594, "step": 532 }, { "epoch": 12.692537313432837, "grad_norm": 17.805360794067383, "learning_rate": 3.785714285714286e-06, "loss": 49.1036, "step": 533 }, { "epoch": 12.716417910447761, "grad_norm": 14.508918762207031, "learning_rate": 3.773809523809524e-06, "loss": 48.1203, "step": 534 }, { "epoch": 12.740298507462686, "grad_norm": 19.395994186401367, "learning_rate": 3.761904761904762e-06, "loss": 47.7891, "step": 535 }, { "epoch": 12.764179104477613, "grad_norm": 27.492908477783203, "learning_rate": 3.7500000000000005e-06, "loss": 48.9027, "step": 536 }, { "epoch": 12.788059701492537, "grad_norm": 21.751968383789062, "learning_rate": 3.7380952380952384e-06, "loss": 48.0929, "step": 537 }, { "epoch": 12.811940298507462, "grad_norm": 24.78274917602539, "learning_rate": 3.7261904761904767e-06, "loss": 48.1678, "step": 538 }, { "epoch": 12.835820895522389, "grad_norm": 26.319196701049805, "learning_rate": 3.7142857142857146e-06, "loss": 49.1874, "step": 539 }, { "epoch": 12.859701492537313, "grad_norm": 20.670148849487305, "learning_rate": 3.702380952380953e-06, "loss": 48.8441, "step": 540 }, { "epoch": 12.883582089552238, "grad_norm": 23.578706741333008, "learning_rate": 3.690476190476191e-06, "loss": 47.1627, "step": 541 }, { "epoch": 12.907462686567165, "grad_norm": 23.807973861694336, "learning_rate": 3.678571428571429e-06, "loss": 47.493, "step": 542 }, { "epoch": 12.93134328358209, "grad_norm": 20.977373123168945, "learning_rate": 3.6666666666666666e-06, "loss": 49.3489, "step": 543 }, { "epoch": 12.955223880597014, "grad_norm": 21.219995498657227, "learning_rate": 3.654761904761905e-06, "loss": 49.8562, "step": 544 }, { "epoch": 12.97910447761194, "grad_norm": 17.777210235595703, "learning_rate": 3.642857142857143e-06, "loss": 48.4018, "step": 545 }, { "epoch": 13.0, "grad_norm": 17.52475929260254, "learning_rate": 3.630952380952381e-06, "loss": 42.3621, "step": 546 }, { "epoch": 13.023880597014925, "grad_norm": 23.431884765625, "learning_rate": 3.6190476190476194e-06, "loss": 49.0982, "step": 547 }, { "epoch": 13.047761194029851, "grad_norm": 25.512338638305664, "learning_rate": 3.6071428571428573e-06, "loss": 47.5758, "step": 548 }, { "epoch": 13.071641791044776, "grad_norm": 25.41205406188965, "learning_rate": 3.5952380952380956e-06, "loss": 49.0519, "step": 549 }, { "epoch": 13.0955223880597, "grad_norm": 20.511945724487305, "learning_rate": 3.5833333333333335e-06, "loss": 48.9739, "step": 550 }, { "epoch": 13.119402985074627, "grad_norm": 18.88302993774414, "learning_rate": 3.5714285714285718e-06, "loss": 47.0551, "step": 551 }, { "epoch": 13.143283582089552, "grad_norm": 17.176782608032227, "learning_rate": 3.55952380952381e-06, "loss": 48.0771, "step": 552 }, { "epoch": 13.167164179104478, "grad_norm": 19.72154426574707, "learning_rate": 3.547619047619048e-06, "loss": 49.5084, "step": 553 }, { "epoch": 13.191044776119403, "grad_norm": 24.780994415283203, "learning_rate": 3.5357142857142863e-06, "loss": 46.5557, "step": 554 }, { "epoch": 13.214925373134328, "grad_norm": 20.380996704101562, "learning_rate": 3.523809523809524e-06, "loss": 48.841, "step": 555 }, { "epoch": 13.238805970149254, "grad_norm": 26.90860939025879, "learning_rate": 3.511904761904762e-06, "loss": 47.6185, "step": 556 }, { "epoch": 13.26268656716418, "grad_norm": 21.40388298034668, "learning_rate": 3.5e-06, "loss": 47.787, "step": 557 }, { "epoch": 13.286567164179104, "grad_norm": 24.708845138549805, "learning_rate": 3.4880952380952383e-06, "loss": 47.1974, "step": 558 }, { "epoch": 13.31044776119403, "grad_norm": 25.317148208618164, "learning_rate": 3.476190476190476e-06, "loss": 49.2282, "step": 559 }, { "epoch": 13.334328358208955, "grad_norm": 22.903011322021484, "learning_rate": 3.4642857142857145e-06, "loss": 47.0762, "step": 560 }, { "epoch": 13.35820895522388, "grad_norm": 23.626604080200195, "learning_rate": 3.4523809523809528e-06, "loss": 47.3622, "step": 561 }, { "epoch": 13.382089552238806, "grad_norm": 16.69061279296875, "learning_rate": 3.4404761904761907e-06, "loss": 48.5621, "step": 562 }, { "epoch": 13.405970149253731, "grad_norm": 20.52508544921875, "learning_rate": 3.428571428571429e-06, "loss": 47.6565, "step": 563 }, { "epoch": 13.429850746268656, "grad_norm": 25.125743865966797, "learning_rate": 3.416666666666667e-06, "loss": 48.1353, "step": 564 }, { "epoch": 13.453731343283582, "grad_norm": 20.697166442871094, "learning_rate": 3.404761904761905e-06, "loss": 47.9368, "step": 565 }, { "epoch": 13.477611940298507, "grad_norm": 22.396892547607422, "learning_rate": 3.3928571428571435e-06, "loss": 48.2956, "step": 566 }, { "epoch": 13.501492537313434, "grad_norm": 24.770437240600586, "learning_rate": 3.3809523809523814e-06, "loss": 48.4467, "step": 567 }, { "epoch": 13.525373134328358, "grad_norm": 19.44706153869629, "learning_rate": 3.3690476190476197e-06, "loss": 48.3155, "step": 568 }, { "epoch": 13.549253731343283, "grad_norm": 27.680660247802734, "learning_rate": 3.357142857142857e-06, "loss": 47.9039, "step": 569 }, { "epoch": 13.57313432835821, "grad_norm": 21.89419174194336, "learning_rate": 3.3452380952380954e-06, "loss": 47.9416, "step": 570 }, { "epoch": 13.597014925373134, "grad_norm": 19.10918426513672, "learning_rate": 3.3333333333333333e-06, "loss": 48.45, "step": 571 }, { "epoch": 13.620895522388059, "grad_norm": 29.83106231689453, "learning_rate": 3.3214285714285716e-06, "loss": 48.9583, "step": 572 }, { "epoch": 13.644776119402986, "grad_norm": 28.05882453918457, "learning_rate": 3.3095238095238095e-06, "loss": 49.108, "step": 573 }, { "epoch": 13.66865671641791, "grad_norm": 17.379384994506836, "learning_rate": 3.297619047619048e-06, "loss": 48.4707, "step": 574 }, { "epoch": 13.692537313432837, "grad_norm": 19.15117645263672, "learning_rate": 3.285714285714286e-06, "loss": 48.476, "step": 575 }, { "epoch": 13.716417910447761, "grad_norm": 23.892152786254883, "learning_rate": 3.273809523809524e-06, "loss": 48.0321, "step": 576 }, { "epoch": 13.740298507462686, "grad_norm": 18.658008575439453, "learning_rate": 3.2619047619047623e-06, "loss": 47.2192, "step": 577 }, { "epoch": 13.764179104477613, "grad_norm": 16.940099716186523, "learning_rate": 3.2500000000000002e-06, "loss": 49.2263, "step": 578 }, { "epoch": 13.788059701492537, "grad_norm": 25.7972412109375, "learning_rate": 3.2380952380952385e-06, "loss": 47.5039, "step": 579 }, { "epoch": 13.811940298507462, "grad_norm": 28.928129196166992, "learning_rate": 3.226190476190477e-06, "loss": 47.9264, "step": 580 }, { "epoch": 13.835820895522389, "grad_norm": 23.67597007751465, "learning_rate": 3.2142857142857147e-06, "loss": 49.1464, "step": 581 }, { "epoch": 13.859701492537313, "grad_norm": 18.345443725585938, "learning_rate": 3.202380952380952e-06, "loss": 47.888, "step": 582 }, { "epoch": 13.883582089552238, "grad_norm": 19.80716896057129, "learning_rate": 3.1904761904761905e-06, "loss": 47.4324, "step": 583 }, { "epoch": 13.907462686567165, "grad_norm": 20.488346099853516, "learning_rate": 3.178571428571429e-06, "loss": 48.3033, "step": 584 }, { "epoch": 13.93134328358209, "grad_norm": 22.3657283782959, "learning_rate": 3.1666666666666667e-06, "loss": 48.1474, "step": 585 }, { "epoch": 13.955223880597014, "grad_norm": 17.457408905029297, "learning_rate": 3.154761904761905e-06, "loss": 47.2418, "step": 586 }, { "epoch": 13.97910447761194, "grad_norm": NaN, "learning_rate": 3.142857142857143e-06, "loss": 54.1812, "step": 587 }, { "epoch": 14.0, "grad_norm": 17.137672424316406, "learning_rate": 3.142857142857143e-06, "loss": 42.3703, "step": 588 }, { "epoch": 14.023880597014925, "grad_norm": 20.55642318725586, "learning_rate": 3.130952380952381e-06, "loss": 49.4628, "step": 589 }, { "epoch": 14.047761194029851, "grad_norm": 19.925596237182617, "learning_rate": 3.1190476190476195e-06, "loss": 47.5266, "step": 590 }, { "epoch": 14.071641791044776, "grad_norm": 12.49276065826416, "learning_rate": 3.1071428571428574e-06, "loss": 47.8654, "step": 591 }, { "epoch": 14.0955223880597, "grad_norm": 17.266550064086914, "learning_rate": 3.0952380952380957e-06, "loss": 48.4362, "step": 592 }, { "epoch": 14.119402985074627, "grad_norm": 18.234397888183594, "learning_rate": 3.0833333333333336e-06, "loss": 48.9532, "step": 593 }, { "epoch": 14.143283582089552, "grad_norm": 19.880165100097656, "learning_rate": 3.071428571428572e-06, "loss": 48.0088, "step": 594 }, { "epoch": 14.167164179104478, "grad_norm": 23.04216766357422, "learning_rate": 3.05952380952381e-06, "loss": 48.0934, "step": 595 }, { "epoch": 14.191044776119403, "grad_norm": 19.199676513671875, "learning_rate": 3.047619047619048e-06, "loss": 48.3845, "step": 596 }, { "epoch": 14.214925373134328, "grad_norm": 20.758337020874023, "learning_rate": 3.0357142857142856e-06, "loss": 47.4652, "step": 597 }, { "epoch": 14.238805970149254, "grad_norm": 17.532787322998047, "learning_rate": 3.023809523809524e-06, "loss": 48.0212, "step": 598 }, { "epoch": 14.26268656716418, "grad_norm": 16.547094345092773, "learning_rate": 3.011904761904762e-06, "loss": 48.6113, "step": 599 }, { "epoch": 14.286567164179104, "grad_norm": 16.324464797973633, "learning_rate": 3e-06, "loss": 47.9735, "step": 600 }, { "epoch": 14.31044776119403, "grad_norm": 16.54167938232422, "learning_rate": 2.9880952380952384e-06, "loss": 47.4436, "step": 601 }, { "epoch": 14.334328358208955, "grad_norm": 23.455759048461914, "learning_rate": 2.9761904761904763e-06, "loss": 47.6631, "step": 602 }, { "epoch": 14.35820895522388, "grad_norm": 19.159008026123047, "learning_rate": 2.9642857142857146e-06, "loss": 48.0291, "step": 603 }, { "epoch": 14.382089552238806, "grad_norm": 18.66881561279297, "learning_rate": 2.9523809523809525e-06, "loss": 46.4582, "step": 604 }, { "epoch": 14.405970149253731, "grad_norm": 19.129064559936523, "learning_rate": 2.9404761904761908e-06, "loss": 49.4455, "step": 605 }, { "epoch": 14.429850746268656, "grad_norm": NaN, "learning_rate": 2.928571428571429e-06, "loss": 78.6564, "step": 606 }, { "epoch": 14.453731343283582, "grad_norm": 18.47364044189453, "learning_rate": 2.928571428571429e-06, "loss": 48.1748, "step": 607 }, { "epoch": 14.477611940298507, "grad_norm": 17.920883178710938, "learning_rate": 2.916666666666667e-06, "loss": 47.6447, "step": 608 }, { "epoch": 14.501492537313434, "grad_norm": 18.263038635253906, "learning_rate": 2.9047619047619053e-06, "loss": 48.7324, "step": 609 }, { "epoch": 14.525373134328358, "grad_norm": 24.323266983032227, "learning_rate": 2.892857142857143e-06, "loss": 48.6135, "step": 610 }, { "epoch": 14.549253731343283, "grad_norm": 21.56492042541504, "learning_rate": 2.880952380952381e-06, "loss": 47.0007, "step": 611 }, { "epoch": 14.57313432835821, "grad_norm": 17.741748809814453, "learning_rate": 2.869047619047619e-06, "loss": 46.3136, "step": 612 }, { "epoch": 14.597014925373134, "grad_norm": 17.218914031982422, "learning_rate": 2.8571428571428573e-06, "loss": 47.6417, "step": 613 }, { "epoch": 14.620895522388059, "grad_norm": 22.856996536254883, "learning_rate": 2.8452380952380956e-06, "loss": 47.6898, "step": 614 }, { "epoch": 14.644776119402986, "grad_norm": NaN, "learning_rate": 2.8333333333333335e-06, "loss": 53.529, "step": 615 }, { "epoch": 14.66865671641791, "grad_norm": 23.29751968383789, "learning_rate": 2.8333333333333335e-06, "loss": 48.164, "step": 616 }, { "epoch": 14.692537313432837, "grad_norm": 15.633321762084961, "learning_rate": 2.8214285714285718e-06, "loss": 46.9866, "step": 617 }, { "epoch": 14.716417910447761, "grad_norm": 21.713376998901367, "learning_rate": 2.8095238095238096e-06, "loss": 48.2856, "step": 618 }, { "epoch": 14.740298507462686, "grad_norm": 17.07369613647461, "learning_rate": 2.797619047619048e-06, "loss": 46.4404, "step": 619 }, { "epoch": 14.764179104477613, "grad_norm": 14.855449676513672, "learning_rate": 2.785714285714286e-06, "loss": 48.2668, "step": 620 }, { "epoch": 14.788059701492537, "grad_norm": 16.479616165161133, "learning_rate": 2.773809523809524e-06, "loss": 49.2661, "step": 621 }, { "epoch": 14.811940298507462, "grad_norm": 14.471490859985352, "learning_rate": 2.7619047619047625e-06, "loss": 47.0484, "step": 622 }, { "epoch": 14.835820895522389, "grad_norm": 19.018714904785156, "learning_rate": 2.7500000000000004e-06, "loss": 49.2253, "step": 623 }, { "epoch": 14.859701492537313, "grad_norm": 16.21799087524414, "learning_rate": 2.7380952380952387e-06, "loss": 49.0738, "step": 624 }, { "epoch": 14.883582089552238, "grad_norm": 20.86383628845215, "learning_rate": 2.726190476190476e-06, "loss": 48.4231, "step": 625 }, { "epoch": 14.907462686567165, "grad_norm": 20.60930633544922, "learning_rate": 2.7142857142857144e-06, "loss": 47.7464, "step": 626 }, { "epoch": 14.93134328358209, "grad_norm": 20.909135818481445, "learning_rate": 2.7023809523809523e-06, "loss": 48.519, "step": 627 }, { "epoch": 14.955223880597014, "grad_norm": 18.555694580078125, "learning_rate": 2.6904761904761906e-06, "loss": 48.1625, "step": 628 }, { "epoch": 14.97910447761194, "grad_norm": 18.154813766479492, "learning_rate": 2.6785714285714285e-06, "loss": 48.9444, "step": 629 }, { "epoch": 15.0, "grad_norm": 19.308523178100586, "learning_rate": 2.666666666666667e-06, "loss": 42.1936, "step": 630 }, { "epoch": 15.023880597014925, "grad_norm": 22.725357055664062, "learning_rate": 2.654761904761905e-06, "loss": 49.5597, "step": 631 }, { "epoch": 15.047761194029851, "grad_norm": 18.862451553344727, "learning_rate": 2.642857142857143e-06, "loss": 46.914, "step": 632 }, { "epoch": 15.071641791044776, "grad_norm": 19.017065048217773, "learning_rate": 2.6309523809523813e-06, "loss": 47.7233, "step": 633 }, { "epoch": 15.0955223880597, "grad_norm": 19.03627586364746, "learning_rate": 2.6190476190476192e-06, "loss": 48.0406, "step": 634 }, { "epoch": 15.119402985074627, "grad_norm": 18.53116798400879, "learning_rate": 2.6071428571428575e-06, "loss": 47.3259, "step": 635 }, { "epoch": 15.143283582089552, "grad_norm": 19.265275955200195, "learning_rate": 2.595238095238096e-06, "loss": 47.2465, "step": 636 }, { "epoch": 15.167164179104478, "grad_norm": 19.497289657592773, "learning_rate": 2.5833333333333337e-06, "loss": 48.5984, "step": 637 }, { "epoch": 15.191044776119403, "grad_norm": 20.183780670166016, "learning_rate": 2.571428571428571e-06, "loss": 46.6221, "step": 638 }, { "epoch": 15.214925373134328, "grad_norm": 22.911672592163086, "learning_rate": 2.5595238095238095e-06, "loss": 48.0178, "step": 639 }, { "epoch": 15.238805970149254, "grad_norm": 20.678709030151367, "learning_rate": 2.547619047619048e-06, "loss": 47.0322, "step": 640 }, { "epoch": 15.26268656716418, "grad_norm": 18.579042434692383, "learning_rate": 2.5357142857142857e-06, "loss": 48.1428, "step": 641 }, { "epoch": 15.286567164179104, "grad_norm": 23.61576271057129, "learning_rate": 2.523809523809524e-06, "loss": 48.444, "step": 642 }, { "epoch": 15.31044776119403, "grad_norm": 19.602746963500977, "learning_rate": 2.511904761904762e-06, "loss": 48.2582, "step": 643 }, { "epoch": 15.334328358208955, "grad_norm": 12.509607315063477, "learning_rate": 2.5e-06, "loss": 48.8531, "step": 644 }, { "epoch": 15.35820895522388, "grad_norm": 18.749767303466797, "learning_rate": 2.4880952380952385e-06, "loss": 47.6453, "step": 645 }, { "epoch": 15.382089552238806, "grad_norm": 20.612041473388672, "learning_rate": 2.4761904761904764e-06, "loss": 48.7038, "step": 646 }, { "epoch": 15.405970149253731, "grad_norm": 18.65719985961914, "learning_rate": 2.4642857142857147e-06, "loss": 47.7954, "step": 647 }, { "epoch": 15.429850746268656, "grad_norm": 22.636686325073242, "learning_rate": 2.4523809523809526e-06, "loss": 48.1164, "step": 648 }, { "epoch": 15.453731343283582, "grad_norm": 20.93446922302246, "learning_rate": 2.4404761904761905e-06, "loss": 48.5955, "step": 649 }, { "epoch": 15.477611940298507, "grad_norm": 20.77125358581543, "learning_rate": 2.428571428571429e-06, "loss": 48.4369, "step": 650 }, { "epoch": 15.501492537313434, "grad_norm": 17.003498077392578, "learning_rate": 2.4166666666666667e-06, "loss": 49.0355, "step": 651 }, { "epoch": 15.525373134328358, "grad_norm": 20.743436813354492, "learning_rate": 2.404761904761905e-06, "loss": 47.8368, "step": 652 }, { "epoch": 15.549253731343283, "grad_norm": NaN, "learning_rate": 2.3928571428571433e-06, "loss": 41.6371, "step": 653 }, { "epoch": 15.57313432835821, "grad_norm": 21.716781616210938, "learning_rate": 2.3928571428571433e-06, "loss": 48.5806, "step": 654 }, { "epoch": 15.597014925373134, "grad_norm": 18.8812198638916, "learning_rate": 2.380952380952381e-06, "loss": 49.0707, "step": 655 }, { "epoch": 15.620895522388059, "grad_norm": 22.305049896240234, "learning_rate": 2.369047619047619e-06, "loss": 47.7556, "step": 656 }, { "epoch": 15.644776119402986, "grad_norm": 20.51401710510254, "learning_rate": 2.3571428571428574e-06, "loss": 48.1588, "step": 657 }, { "epoch": 15.66865671641791, "grad_norm": 17.691770553588867, "learning_rate": 2.3452380952380953e-06, "loss": 47.5187, "step": 658 }, { "epoch": 15.692537313432837, "grad_norm": 22.343585968017578, "learning_rate": 2.3333333333333336e-06, "loss": 47.5725, "step": 659 }, { "epoch": 15.716417910447761, "grad_norm": 21.656587600708008, "learning_rate": 2.321428571428572e-06, "loss": 47.6903, "step": 660 }, { "epoch": 15.740298507462686, "grad_norm": 20.632055282592773, "learning_rate": 2.3095238095238098e-06, "loss": 47.4526, "step": 661 }, { "epoch": 15.764179104477613, "grad_norm": 22.324811935424805, "learning_rate": 2.2976190476190477e-06, "loss": 47.3316, "step": 662 }, { "epoch": 15.788059701492537, "grad_norm": 19.320737838745117, "learning_rate": 2.285714285714286e-06, "loss": 48.2315, "step": 663 }, { "epoch": 15.811940298507462, "grad_norm": 18.58050537109375, "learning_rate": 2.273809523809524e-06, "loss": 47.9147, "step": 664 }, { "epoch": 15.835820895522389, "grad_norm": 20.37384796142578, "learning_rate": 2.261904761904762e-06, "loss": 47.5874, "step": 665 }, { "epoch": 15.859701492537313, "grad_norm": 20.893856048583984, "learning_rate": 2.25e-06, "loss": 49.225, "step": 666 }, { "epoch": 15.883582089552238, "grad_norm": 18.4589786529541, "learning_rate": 2.2380952380952384e-06, "loss": 47.3042, "step": 667 }, { "epoch": 15.907462686567165, "grad_norm": 20.845996856689453, "learning_rate": 2.2261904761904763e-06, "loss": 47.3255, "step": 668 }, { "epoch": 15.93134328358209, "grad_norm": 20.149137496948242, "learning_rate": 2.2142857142857146e-06, "loss": 48.6543, "step": 669 }, { "epoch": 15.955223880597014, "grad_norm": 14.768882751464844, "learning_rate": 2.2023809523809525e-06, "loss": 46.8274, "step": 670 }, { "epoch": 15.97910447761194, "grad_norm": 26.926074981689453, "learning_rate": 2.1904761904761908e-06, "loss": 48.2035, "step": 671 }, { "epoch": 16.0, "grad_norm": 22.840618133544922, "learning_rate": 2.1785714285714286e-06, "loss": 42.8242, "step": 672 }, { "epoch": 16.023880597014927, "grad_norm": 16.183008193969727, "learning_rate": 2.166666666666667e-06, "loss": 47.8309, "step": 673 }, { "epoch": 16.04776119402985, "grad_norm": 20.603744506835938, "learning_rate": 2.154761904761905e-06, "loss": 48.5197, "step": 674 }, { "epoch": 16.071641791044776, "grad_norm": 26.492107391357422, "learning_rate": 2.1428571428571427e-06, "loss": 47.2312, "step": 675 }, { "epoch": 16.095522388059702, "grad_norm": 19.786901473999023, "learning_rate": 2.130952380952381e-06, "loss": 49.6201, "step": 676 }, { "epoch": 16.119402985074625, "grad_norm": 18.150909423828125, "learning_rate": 2.1190476190476194e-06, "loss": 48.7407, "step": 677 }, { "epoch": 16.143283582089552, "grad_norm": 18.797983169555664, "learning_rate": 2.1071428571428572e-06, "loss": 47.0801, "step": 678 }, { "epoch": 16.16716417910448, "grad_norm": 16.088953018188477, "learning_rate": 2.0952380952380955e-06, "loss": 47.6509, "step": 679 }, { "epoch": 16.1910447761194, "grad_norm": 20.359085083007812, "learning_rate": 2.0833333333333334e-06, "loss": 48.9226, "step": 680 }, { "epoch": 16.214925373134328, "grad_norm": 21.99265480041504, "learning_rate": 2.0714285714285717e-06, "loss": 47.3775, "step": 681 }, { "epoch": 16.238805970149254, "grad_norm": 18.616743087768555, "learning_rate": 2.0595238095238096e-06, "loss": 45.8448, "step": 682 }, { "epoch": 16.262686567164177, "grad_norm": 19.6337947845459, "learning_rate": 2.047619047619048e-06, "loss": 48.2077, "step": 683 }, { "epoch": 16.286567164179104, "grad_norm": 23.881439208984375, "learning_rate": 2.035714285714286e-06, "loss": 48.6796, "step": 684 }, { "epoch": 16.31044776119403, "grad_norm": 19.665023803710938, "learning_rate": 2.023809523809524e-06, "loss": 48.7275, "step": 685 }, { "epoch": 16.334328358208957, "grad_norm": 18.438793182373047, "learning_rate": 2.011904761904762e-06, "loss": 49.585, "step": 686 }, { "epoch": 16.35820895522388, "grad_norm": 17.073816299438477, "learning_rate": 2.0000000000000003e-06, "loss": 47.4548, "step": 687 }, { "epoch": 16.382089552238806, "grad_norm": 20.504276275634766, "learning_rate": 1.9880952380952382e-06, "loss": 47.3555, "step": 688 }, { "epoch": 16.405970149253733, "grad_norm": 21.564546585083008, "learning_rate": 1.976190476190476e-06, "loss": 47.6304, "step": 689 }, { "epoch": 16.429850746268656, "grad_norm": 16.773197174072266, "learning_rate": 1.9642857142857144e-06, "loss": 49.078, "step": 690 }, { "epoch": 16.453731343283582, "grad_norm": 22.77934455871582, "learning_rate": 1.9523809523809527e-06, "loss": 47.8289, "step": 691 }, { "epoch": 16.47761194029851, "grad_norm": 17.375993728637695, "learning_rate": 1.9404761904761906e-06, "loss": 48.4812, "step": 692 }, { "epoch": 16.501492537313432, "grad_norm": 21.407329559326172, "learning_rate": 1.928571428571429e-06, "loss": 48.2934, "step": 693 }, { "epoch": 16.52537313432836, "grad_norm": 15.673316955566406, "learning_rate": 1.916666666666667e-06, "loss": 46.7304, "step": 694 }, { "epoch": 16.549253731343285, "grad_norm": 24.577089309692383, "learning_rate": 1.904761904761905e-06, "loss": 47.9352, "step": 695 }, { "epoch": 16.573134328358208, "grad_norm": 24.46076774597168, "learning_rate": 1.892857142857143e-06, "loss": 48.8173, "step": 696 }, { "epoch": 16.597014925373134, "grad_norm": 14.248388290405273, "learning_rate": 1.880952380952381e-06, "loss": 48.5858, "step": 697 }, { "epoch": 16.62089552238806, "grad_norm": 16.925329208374023, "learning_rate": 1.8690476190476192e-06, "loss": 47.8278, "step": 698 }, { "epoch": 16.644776119402984, "grad_norm": 25.52614402770996, "learning_rate": 1.8571428571428573e-06, "loss": 48.1248, "step": 699 }, { "epoch": 16.66865671641791, "grad_norm": 21.011341094970703, "learning_rate": 1.8452380952380954e-06, "loss": 47.8154, "step": 700 }, { "epoch": 16.692537313432837, "grad_norm": 14.694896697998047, "learning_rate": 1.8333333333333333e-06, "loss": 47.9668, "step": 701 }, { "epoch": 16.71641791044776, "grad_norm": 22.32903480529785, "learning_rate": 1.8214285714285716e-06, "loss": 48.6784, "step": 702 }, { "epoch": 16.740298507462686, "grad_norm": 17.19482421875, "learning_rate": 1.8095238095238097e-06, "loss": 46.9973, "step": 703 }, { "epoch": 16.764179104477613, "grad_norm": 14.590733528137207, "learning_rate": 1.7976190476190478e-06, "loss": 47.2393, "step": 704 }, { "epoch": 16.788059701492536, "grad_norm": 17.131982803344727, "learning_rate": 1.7857142857142859e-06, "loss": 47.9412, "step": 705 }, { "epoch": 16.811940298507462, "grad_norm": 18.513992309570312, "learning_rate": 1.773809523809524e-06, "loss": 48.8777, "step": 706 }, { "epoch": 16.83582089552239, "grad_norm": 17.625539779663086, "learning_rate": 1.761904761904762e-06, "loss": 48.3885, "step": 707 }, { "epoch": 16.85970149253731, "grad_norm": 16.540056228637695, "learning_rate": 1.75e-06, "loss": 47.8561, "step": 708 }, { "epoch": 16.883582089552238, "grad_norm": 20.070533752441406, "learning_rate": 1.738095238095238e-06, "loss": 46.6418, "step": 709 }, { "epoch": 16.907462686567165, "grad_norm": 18.742460250854492, "learning_rate": 1.7261904761904764e-06, "loss": 46.7471, "step": 710 }, { "epoch": 16.93134328358209, "grad_norm": 17.491954803466797, "learning_rate": 1.7142857142857145e-06, "loss": 47.5558, "step": 711 }, { "epoch": 16.955223880597014, "grad_norm": 17.457130432128906, "learning_rate": 1.7023809523809526e-06, "loss": 47.4441, "step": 712 }, { "epoch": 16.97910447761194, "grad_norm": 21.053844451904297, "learning_rate": 1.6904761904761907e-06, "loss": 48.1931, "step": 713 }, { "epoch": 17.0, "grad_norm": 16.943801879882812, "learning_rate": 1.6785714285714286e-06, "loss": 41.9934, "step": 714 }, { "epoch": 17.023880597014927, "grad_norm": 21.56785011291504, "learning_rate": 1.6666666666666667e-06, "loss": 47.1652, "step": 715 }, { "epoch": 17.04776119402985, "grad_norm": 21.193382263183594, "learning_rate": 1.6547619047619048e-06, "loss": 47.6751, "step": 716 }, { "epoch": 17.071641791044776, "grad_norm": 16.245115280151367, "learning_rate": 1.642857142857143e-06, "loss": 47.4133, "step": 717 }, { "epoch": 17.095522388059702, "grad_norm": 18.834646224975586, "learning_rate": 1.6309523809523812e-06, "loss": 48.145, "step": 718 }, { "epoch": 17.119402985074625, "grad_norm": 15.769698143005371, "learning_rate": 1.6190476190476193e-06, "loss": 48.1181, "step": 719 }, { "epoch": 17.143283582089552, "grad_norm": 13.460511207580566, "learning_rate": 1.6071428571428574e-06, "loss": 49.1229, "step": 720 }, { "epoch": 17.16716417910448, "grad_norm": 18.58087158203125, "learning_rate": 1.5952380952380953e-06, "loss": 47.5095, "step": 721 }, { "epoch": 17.1910447761194, "grad_norm": 18.607332229614258, "learning_rate": 1.5833333333333333e-06, "loss": 49.1334, "step": 722 }, { "epoch": 17.214925373134328, "grad_norm": 15.046488761901855, "learning_rate": 1.5714285714285714e-06, "loss": 47.6151, "step": 723 }, { "epoch": 17.238805970149254, "grad_norm": 17.442358016967773, "learning_rate": 1.5595238095238098e-06, "loss": 47.3771, "step": 724 }, { "epoch": 17.262686567164177, "grad_norm": 11.690101623535156, "learning_rate": 1.5476190476190479e-06, "loss": 48.1095, "step": 725 }, { "epoch": 17.286567164179104, "grad_norm": 17.945192337036133, "learning_rate": 1.535714285714286e-06, "loss": 47.8941, "step": 726 }, { "epoch": 17.31044776119403, "grad_norm": 13.878116607666016, "learning_rate": 1.523809523809524e-06, "loss": 47.6422, "step": 727 }, { "epoch": 17.334328358208957, "grad_norm": 15.942928314208984, "learning_rate": 1.511904761904762e-06, "loss": 46.9964, "step": 728 }, { "epoch": 17.35820895522388, "grad_norm": 13.57482624053955, "learning_rate": 1.5e-06, "loss": 47.1832, "step": 729 }, { "epoch": 17.382089552238806, "grad_norm": 13.781617164611816, "learning_rate": 1.4880952380952381e-06, "loss": 48.9621, "step": 730 }, { "epoch": 17.405970149253733, "grad_norm": 14.26857852935791, "learning_rate": 1.4761904761904762e-06, "loss": 48.6631, "step": 731 }, { "epoch": 17.429850746268656, "grad_norm": 16.23444938659668, "learning_rate": 1.4642857142857145e-06, "loss": 46.84, "step": 732 }, { "epoch": 17.453731343283582, "grad_norm": 17.442630767822266, "learning_rate": 1.4523809523809526e-06, "loss": 48.2996, "step": 733 }, { "epoch": 17.47761194029851, "grad_norm": 14.329082489013672, "learning_rate": 1.4404761904761905e-06, "loss": 47.396, "step": 734 }, { "epoch": 17.501492537313432, "grad_norm": 14.772257804870605, "learning_rate": 1.4285714285714286e-06, "loss": 48.5733, "step": 735 }, { "epoch": 17.52537313432836, "grad_norm": 14.331324577331543, "learning_rate": 1.4166666666666667e-06, "loss": 48.2969, "step": 736 }, { "epoch": 17.549253731343285, "grad_norm": 17.498600006103516, "learning_rate": 1.4047619047619048e-06, "loss": 48.0221, "step": 737 }, { "epoch": 17.573134328358208, "grad_norm": 16.155025482177734, "learning_rate": 1.392857142857143e-06, "loss": 47.9848, "step": 738 }, { "epoch": 17.597014925373134, "grad_norm": 15.552813529968262, "learning_rate": 1.3809523809523812e-06, "loss": 48.4413, "step": 739 }, { "epoch": 17.62089552238806, "grad_norm": 15.887310981750488, "learning_rate": 1.3690476190476193e-06, "loss": 47.6463, "step": 740 }, { "epoch": 17.644776119402984, "grad_norm": 17.783411026000977, "learning_rate": 1.3571428571428572e-06, "loss": 47.8009, "step": 741 }, { "epoch": 17.66865671641791, "grad_norm": 17.108932495117188, "learning_rate": 1.3452380952380953e-06, "loss": 47.9888, "step": 742 }, { "epoch": 17.692537313432837, "grad_norm": 19.79203224182129, "learning_rate": 1.3333333333333334e-06, "loss": 48.5732, "step": 743 }, { "epoch": 17.71641791044776, "grad_norm": 17.06324005126953, "learning_rate": 1.3214285714285715e-06, "loss": 48.4815, "step": 744 }, { "epoch": 17.740298507462686, "grad_norm": 17.399097442626953, "learning_rate": 1.3095238095238096e-06, "loss": 47.5591, "step": 745 }, { "epoch": 17.764179104477613, "grad_norm": 15.836935997009277, "learning_rate": 1.297619047619048e-06, "loss": 47.994, "step": 746 }, { "epoch": 17.788059701492536, "grad_norm": 18.20856475830078, "learning_rate": 1.2857142857142856e-06, "loss": 47.9979, "step": 747 }, { "epoch": 17.811940298507462, "grad_norm": 19.10239601135254, "learning_rate": 1.273809523809524e-06, "loss": 48.1196, "step": 748 }, { "epoch": 17.83582089552239, "grad_norm": 17.21087646484375, "learning_rate": 1.261904761904762e-06, "loss": 47.8816, "step": 749 }, { "epoch": 17.85970149253731, "grad_norm": 14.792268753051758, "learning_rate": 1.25e-06, "loss": 47.8182, "step": 750 }, { "epoch": 17.883582089552238, "grad_norm": 13.695488929748535, "learning_rate": 1.2380952380952382e-06, "loss": 47.4298, "step": 751 }, { "epoch": 17.907462686567165, "grad_norm": 15.197646141052246, "learning_rate": 1.2261904761904763e-06, "loss": 47.7132, "step": 752 }, { "epoch": 17.93134328358209, "grad_norm": 19.13431739807129, "learning_rate": 1.2142857142857144e-06, "loss": 48.431, "step": 753 }, { "epoch": 17.955223880597014, "grad_norm": 15.690411567687988, "learning_rate": 1.2023809523809525e-06, "loss": 47.4529, "step": 754 }, { "epoch": 17.97910447761194, "grad_norm": 14.75414752960205, "learning_rate": 1.1904761904761906e-06, "loss": 47.9668, "step": 755 }, { "epoch": 18.0, "grad_norm": 11.497115135192871, "learning_rate": 1.1785714285714287e-06, "loss": 41.8653, "step": 756 }, { "epoch": 18.023880597014927, "grad_norm": 16.20159339904785, "learning_rate": 1.1666666666666668e-06, "loss": 47.2871, "step": 757 }, { "epoch": 18.04776119402985, "grad_norm": 15.400497436523438, "learning_rate": 1.1547619047619049e-06, "loss": 46.3673, "step": 758 }, { "epoch": 18.071641791044776, "grad_norm": 12.16773509979248, "learning_rate": 1.142857142857143e-06, "loss": 47.7463, "step": 759 }, { "epoch": 18.095522388059702, "grad_norm": 19.978351593017578, "learning_rate": 1.130952380952381e-06, "loss": 47.4632, "step": 760 }, { "epoch": 18.119402985074625, "grad_norm": 14.090561866760254, "learning_rate": 1.1190476190476192e-06, "loss": 48.9356, "step": 761 }, { "epoch": 18.143283582089552, "grad_norm": 13.143173217773438, "learning_rate": 1.1071428571428573e-06, "loss": 48.1129, "step": 762 }, { "epoch": 18.16716417910448, "grad_norm": 15.609000205993652, "learning_rate": 1.0952380952380954e-06, "loss": 48.8554, "step": 763 }, { "epoch": 18.1910447761194, "grad_norm": 14.012611389160156, "learning_rate": 1.0833333333333335e-06, "loss": 47.6785, "step": 764 }, { "epoch": 18.214925373134328, "grad_norm": 13.417494773864746, "learning_rate": 1.0714285714285714e-06, "loss": 48.2733, "step": 765 }, { "epoch": 18.238805970149254, "grad_norm": 15.562864303588867, "learning_rate": 1.0595238095238097e-06, "loss": 48.0488, "step": 766 }, { "epoch": 18.262686567164177, "grad_norm": 17.083723068237305, "learning_rate": 1.0476190476190478e-06, "loss": 49.3136, "step": 767 }, { "epoch": 18.286567164179104, "grad_norm": 16.564395904541016, "learning_rate": 1.0357142857142859e-06, "loss": 48.1835, "step": 768 }, { "epoch": 18.31044776119403, "grad_norm": 14.694929122924805, "learning_rate": 1.023809523809524e-06, "loss": 48.8632, "step": 769 }, { "epoch": 18.334328358208957, "grad_norm": 13.4928560256958, "learning_rate": 1.011904761904762e-06, "loss": 47.3333, "step": 770 }, { "epoch": 18.35820895522388, "grad_norm": 12.980204582214355, "learning_rate": 1.0000000000000002e-06, "loss": 47.7961, "step": 771 }, { "epoch": 18.382089552238806, "grad_norm": 14.19666862487793, "learning_rate": 9.88095238095238e-07, "loss": 47.7806, "step": 772 }, { "epoch": 18.405970149253733, "grad_norm": 13.2017183303833, "learning_rate": 9.761904761904764e-07, "loss": 46.1119, "step": 773 }, { "epoch": 18.429850746268656, "grad_norm": 15.064650535583496, "learning_rate": 9.642857142857145e-07, "loss": 47.8105, "step": 774 }, { "epoch": 18.453731343283582, "grad_norm": 14.713834762573242, "learning_rate": 9.523809523809525e-07, "loss": 47.5723, "step": 775 }, { "epoch": 18.47761194029851, "grad_norm": 13.394201278686523, "learning_rate": 9.404761904761906e-07, "loss": 48.1632, "step": 776 }, { "epoch": 18.501492537313432, "grad_norm": 10.742532730102539, "learning_rate": 9.285714285714287e-07, "loss": 48.6118, "step": 777 }, { "epoch": 18.52537313432836, "grad_norm": 12.402650833129883, "learning_rate": 9.166666666666666e-07, "loss": 48.6597, "step": 778 }, { "epoch": 18.549253731343285, "grad_norm": 15.73616886138916, "learning_rate": 9.047619047619048e-07, "loss": 47.9931, "step": 779 }, { "epoch": 18.573134328358208, "grad_norm": 14.188780784606934, "learning_rate": 8.928571428571429e-07, "loss": 47.0081, "step": 780 }, { "epoch": 18.597014925373134, "grad_norm": 12.516701698303223, "learning_rate": 8.80952380952381e-07, "loss": 49.0287, "step": 781 }, { "epoch": 18.62089552238806, "grad_norm": 15.069429397583008, "learning_rate": 8.69047619047619e-07, "loss": 47.8721, "step": 782 }, { "epoch": 18.644776119402984, "grad_norm": 13.091047286987305, "learning_rate": 8.571428571428572e-07, "loss": 48.1678, "step": 783 }, { "epoch": 18.66865671641791, "grad_norm": 15.017065048217773, "learning_rate": 8.452380952380953e-07, "loss": 47.1277, "step": 784 }, { "epoch": 18.692537313432837, "grad_norm": 12.091531753540039, "learning_rate": 8.333333333333333e-07, "loss": 47.4962, "step": 785 }, { "epoch": 18.71641791044776, "grad_norm": 15.20182991027832, "learning_rate": 8.214285714285715e-07, "loss": 48.6224, "step": 786 }, { "epoch": 18.740298507462686, "grad_norm": 11.17827320098877, "learning_rate": 8.095238095238096e-07, "loss": 47.9759, "step": 787 }, { "epoch": 18.764179104477613, "grad_norm": 14.884525299072266, "learning_rate": 7.976190476190476e-07, "loss": 47.9749, "step": 788 }, { "epoch": 18.788059701492536, "grad_norm": 14.360984802246094, "learning_rate": 7.857142857142857e-07, "loss": 48.9952, "step": 789 }, { "epoch": 18.811940298507462, "grad_norm": 11.265621185302734, "learning_rate": 7.738095238095239e-07, "loss": 47.4274, "step": 790 }, { "epoch": 18.83582089552239, "grad_norm": 18.072290420532227, "learning_rate": 7.61904761904762e-07, "loss": 47.8815, "step": 791 }, { "epoch": 18.85970149253731, "grad_norm": 15.310029029846191, "learning_rate": 7.5e-07, "loss": 47.3907, "step": 792 }, { "epoch": 18.883582089552238, "grad_norm": 14.032752990722656, "learning_rate": 7.380952380952381e-07, "loss": 48.0883, "step": 793 }, { "epoch": 18.907462686567165, "grad_norm": 12.853668212890625, "learning_rate": 7.261904761904763e-07, "loss": 47.1452, "step": 794 }, { "epoch": 18.93134328358209, "grad_norm": 15.6067476272583, "learning_rate": 7.142857142857143e-07, "loss": 46.6303, "step": 795 }, { "epoch": 18.955223880597014, "grad_norm": 12.828201293945312, "learning_rate": 7.023809523809524e-07, "loss": 47.9885, "step": 796 }, { "epoch": 18.97910447761194, "grad_norm": 13.336589813232422, "learning_rate": 6.904761904761906e-07, "loss": 48.2315, "step": 797 }, { "epoch": 19.0, "grad_norm": 13.629434585571289, "learning_rate": 6.785714285714286e-07, "loss": 41.9374, "step": 798 }, { "epoch": 19.023880597014927, "grad_norm": 13.237930297851562, "learning_rate": 6.666666666666667e-07, "loss": 46.6802, "step": 799 }, { "epoch": 19.04776119402985, "grad_norm": 13.715863227844238, "learning_rate": 6.547619047619048e-07, "loss": 49.0494, "step": 800 }, { "epoch": 19.071641791044776, "grad_norm": 13.439970016479492, "learning_rate": 6.428571428571428e-07, "loss": 46.3647, "step": 801 }, { "epoch": 19.095522388059702, "grad_norm": 15.468942642211914, "learning_rate": 6.30952380952381e-07, "loss": 48.4725, "step": 802 }, { "epoch": 19.119402985074625, "grad_norm": 14.160257339477539, "learning_rate": 6.190476190476191e-07, "loss": 47.4033, "step": 803 }, { "epoch": 19.143283582089552, "grad_norm": 13.667155265808105, "learning_rate": 6.071428571428572e-07, "loss": 48.4729, "step": 804 }, { "epoch": 19.16716417910448, "grad_norm": 12.428313255310059, "learning_rate": 5.952380952380953e-07, "loss": 48.8939, "step": 805 }, { "epoch": 19.1910447761194, "grad_norm": 12.985882759094238, "learning_rate": 5.833333333333334e-07, "loss": 47.0663, "step": 806 }, { "epoch": 19.214925373134328, "grad_norm": 12.827404975891113, "learning_rate": 5.714285714285715e-07, "loss": 47.5614, "step": 807 }, { "epoch": 19.238805970149254, "grad_norm": 11.078653335571289, "learning_rate": 5.595238095238096e-07, "loss": 48.564, "step": 808 }, { "epoch": 19.262686567164177, "grad_norm": 13.346016883850098, "learning_rate": 5.476190476190477e-07, "loss": 48.0823, "step": 809 }, { "epoch": 19.286567164179104, "grad_norm": 14.523963928222656, "learning_rate": 5.357142857142857e-07, "loss": 48.4225, "step": 810 }, { "epoch": 19.31044776119403, "grad_norm": 12.598445892333984, "learning_rate": 5.238095238095239e-07, "loss": 47.2514, "step": 811 }, { "epoch": 19.334328358208957, "grad_norm": 12.203497886657715, "learning_rate": 5.11904761904762e-07, "loss": 47.217, "step": 812 }, { "epoch": 19.35820895522388, "grad_norm": 12.144754409790039, "learning_rate": 5.000000000000001e-07, "loss": 47.191, "step": 813 }, { "epoch": 19.382089552238806, "grad_norm": 12.585047721862793, "learning_rate": 4.880952380952382e-07, "loss": 48.4947, "step": 814 }, { "epoch": 19.405970149253733, "grad_norm": 11.295561790466309, "learning_rate": 4.7619047619047623e-07, "loss": 46.9444, "step": 815 }, { "epoch": 19.429850746268656, "grad_norm": 13.055256843566895, "learning_rate": 4.642857142857143e-07, "loss": 48.4469, "step": 816 }, { "epoch": 19.453731343283582, "grad_norm": 12.051807403564453, "learning_rate": 4.523809523809524e-07, "loss": 48.0547, "step": 817 }, { "epoch": 19.47761194029851, "grad_norm": 13.44185733795166, "learning_rate": 4.404761904761905e-07, "loss": 48.3155, "step": 818 }, { "epoch": 19.501492537313432, "grad_norm": 12.405723571777344, "learning_rate": 4.285714285714286e-07, "loss": 48.3982, "step": 819 }, { "epoch": 19.52537313432836, "grad_norm": 14.900402069091797, "learning_rate": 4.1666666666666667e-07, "loss": 48.2653, "step": 820 }, { "epoch": 19.549253731343285, "grad_norm": 10.70801067352295, "learning_rate": 4.047619047619048e-07, "loss": 48.0384, "step": 821 }, { "epoch": 19.573134328358208, "grad_norm": 12.318074226379395, "learning_rate": 3.9285714285714286e-07, "loss": 47.554, "step": 822 }, { "epoch": 19.597014925373134, "grad_norm": 12.898431777954102, "learning_rate": 3.80952380952381e-07, "loss": 48.3586, "step": 823 }, { "epoch": 19.62089552238806, "grad_norm": 15.45779800415039, "learning_rate": 3.6904761904761906e-07, "loss": 48.4193, "step": 824 }, { "epoch": 19.644776119402984, "grad_norm": 11.230570793151855, "learning_rate": 3.5714285714285716e-07, "loss": 48.5294, "step": 825 }, { "epoch": 19.66865671641791, "grad_norm": 13.647272109985352, "learning_rate": 3.452380952380953e-07, "loss": 47.2569, "step": 826 }, { "epoch": 19.692537313432837, "grad_norm": 11.521178245544434, "learning_rate": 3.3333333333333335e-07, "loss": 47.2899, "step": 827 }, { "epoch": 19.71641791044776, "grad_norm": 11.537907600402832, "learning_rate": 3.214285714285714e-07, "loss": 46.6462, "step": 828 }, { "epoch": 19.740298507462686, "grad_norm": 11.670267105102539, "learning_rate": 3.0952380952380955e-07, "loss": 47.9797, "step": 829 }, { "epoch": 19.764179104477613, "grad_norm": 11.660557746887207, "learning_rate": 2.9761904761904765e-07, "loss": 47.9744, "step": 830 }, { "epoch": 19.788059701492536, "grad_norm": 12.332269668579102, "learning_rate": 2.8571428571428575e-07, "loss": 48.6015, "step": 831 }, { "epoch": 19.811940298507462, "grad_norm": 12.228848457336426, "learning_rate": 2.7380952380952385e-07, "loss": 47.3215, "step": 832 }, { "epoch": 19.83582089552239, "grad_norm": 13.780754089355469, "learning_rate": 2.6190476190476194e-07, "loss": 48.853, "step": 833 }, { "epoch": 19.85970149253731, "grad_norm": 11.639240264892578, "learning_rate": 2.5000000000000004e-07, "loss": 48.8199, "step": 834 }, { "epoch": 19.883582089552238, "grad_norm": 10.796862602233887, "learning_rate": 2.3809523809523811e-07, "loss": 47.5373, "step": 835 }, { "epoch": 19.907462686567165, "grad_norm": 13.573180198669434, "learning_rate": 2.261904761904762e-07, "loss": 47.8368, "step": 836 }, { "epoch": 19.93134328358209, "grad_norm": 11.497776985168457, "learning_rate": 2.142857142857143e-07, "loss": 47.8226, "step": 837 }, { "epoch": 19.955223880597014, "grad_norm": 10.777889251708984, "learning_rate": 2.023809523809524e-07, "loss": 47.6424, "step": 838 }, { "epoch": 19.97910447761194, "grad_norm": 10.77852725982666, "learning_rate": 1.904761904761905e-07, "loss": 46.379, "step": 839 }, { "epoch": 20.0, "grad_norm": 13.582564353942871, "learning_rate": 1.7857142857142858e-07, "loss": 42.5239, "step": 840 }, { "epoch": 20.0, "step": 840, "total_flos": 4.130470305428237e+16, "train_loss": 49.47331008002872, "train_runtime": 26137.3223, "train_samples_per_second": 4.095, "train_steps_per_second": 0.032 }, { "epoch": 20.023880597014927, "grad_norm": 21.18770408630371, "learning_rate": 1e-05, "loss": 48.1474, "step": 841 }, { "epoch": 20.04776119402985, "grad_norm": Infinity, "learning_rate": 9.99404761904762e-06, "loss": 60.2758, "step": 842 }, { "epoch": 20.071641791044776, "grad_norm": Infinity, "learning_rate": 9.99404761904762e-06, "loss": 61.7211, "step": 843 }, { "epoch": 20.095522388059702, "grad_norm": 504.4407958984375, "learning_rate": 9.99404761904762e-06, "loss": 60.8189, "step": 844 }, { "epoch": 20.119402985074625, "grad_norm": 221.96849060058594, "learning_rate": 9.988095238095239e-06, "loss": 54.4658, "step": 845 }, { "epoch": 20.143283582089552, "grad_norm": 110.4036865234375, "learning_rate": 9.982142857142858e-06, "loss": 52.4242, "step": 846 }, { "epoch": 20.16716417910448, "grad_norm": 82.75493621826172, "learning_rate": 9.976190476190477e-06, "loss": 50.3129, "step": 847 }, { "epoch": 20.1910447761194, "grad_norm": 62.56040573120117, "learning_rate": 9.970238095238096e-06, "loss": 49.7171, "step": 848 }, { "epoch": 20.214925373134328, "grad_norm": 70.04007720947266, "learning_rate": 9.964285714285714e-06, "loss": 48.185, "step": 849 }, { "epoch": 20.238805970149254, "grad_norm": 56.70342254638672, "learning_rate": 9.958333333333334e-06, "loss": 49.5787, "step": 850 }, { "epoch": 20.262686567164177, "grad_norm": 64.66405487060547, "learning_rate": 9.952380952380954e-06, "loss": 49.6106, "step": 851 }, { "epoch": 20.286567164179104, "grad_norm": 43.37612533569336, "learning_rate": 9.946428571428572e-06, "loss": 49.2966, "step": 852 }, { "epoch": 20.31044776119403, "grad_norm": 42.66206359863281, "learning_rate": 9.940476190476192e-06, "loss": 48.7073, "step": 853 }, { "epoch": 20.334328358208957, "grad_norm": 37.17741775512695, "learning_rate": 9.93452380952381e-06, "loss": 48.7592, "step": 854 }, { "epoch": 20.35820895522388, "grad_norm": 39.27332305908203, "learning_rate": 9.92857142857143e-06, "loss": 48.1181, "step": 855 }, { "epoch": 20.382089552238806, "grad_norm": 31.37261390686035, "learning_rate": 9.922619047619048e-06, "loss": 47.4873, "step": 856 }, { "epoch": 20.405970149253733, "grad_norm": 41.693809509277344, "learning_rate": 9.916666666666668e-06, "loss": 48.9428, "step": 857 }, { "epoch": 20.429850746268656, "grad_norm": 29.33939552307129, "learning_rate": 9.910714285714288e-06, "loss": 49.2928, "step": 858 }, { "epoch": 20.453731343283582, "grad_norm": 30.606157302856445, "learning_rate": 9.904761904761906e-06, "loss": 49.1506, "step": 859 }, { "epoch": 20.47761194029851, "grad_norm": 27.273784637451172, "learning_rate": 9.898809523809525e-06, "loss": 46.6136, "step": 860 }, { "epoch": 20.501492537313432, "grad_norm": 24.410682678222656, "learning_rate": 9.892857142857143e-06, "loss": 48.3989, "step": 861 }, { "epoch": 20.52537313432836, "grad_norm": 24.138607025146484, "learning_rate": 9.886904761904763e-06, "loss": 49.3858, "step": 862 }, { "epoch": 20.549253731343285, "grad_norm": 27.50669288635254, "learning_rate": 9.880952380952381e-06, "loss": 48.5058, "step": 863 }, { "epoch": 20.573134328358208, "grad_norm": 27.739347457885742, "learning_rate": 9.875000000000001e-06, "loss": 49.676, "step": 864 }, { "epoch": 20.597014925373134, "grad_norm": 22.63895034790039, "learning_rate": 9.869047619047621e-06, "loss": 47.6998, "step": 865 }, { "epoch": 20.62089552238806, "grad_norm": 26.80891990661621, "learning_rate": 9.863095238095239e-06, "loss": 47.9571, "step": 866 }, { "epoch": 20.644776119402984, "grad_norm": 26.259008407592773, "learning_rate": 9.857142857142859e-06, "loss": 48.8771, "step": 867 }, { "epoch": 20.66865671641791, "grad_norm": 23.716773986816406, "learning_rate": 9.851190476190477e-06, "loss": 47.1255, "step": 868 }, { "epoch": 20.692537313432837, "grad_norm": 26.96156120300293, "learning_rate": 9.845238095238097e-06, "loss": 47.2227, "step": 869 }, { "epoch": 20.71641791044776, "grad_norm": 25.1954345703125, "learning_rate": 9.839285714285715e-06, "loss": 47.6847, "step": 870 }, { "epoch": 20.740298507462686, "grad_norm": 21.56642723083496, "learning_rate": 9.833333333333333e-06, "loss": 47.7292, "step": 871 }, { "epoch": 20.764179104477613, "grad_norm": 25.091773986816406, "learning_rate": 9.827380952380953e-06, "loss": 46.6588, "step": 872 }, { "epoch": 20.788059701492536, "grad_norm": 26.45799446105957, "learning_rate": 9.821428571428573e-06, "loss": 47.3963, "step": 873 }, { "epoch": 20.811940298507462, "grad_norm": 25.865068435668945, "learning_rate": 9.81547619047619e-06, "loss": 48.8823, "step": 874 }, { "epoch": 20.83582089552239, "grad_norm": 27.056106567382812, "learning_rate": 9.80952380952381e-06, "loss": 47.2222, "step": 875 }, { "epoch": 20.85970149253731, "grad_norm": 27.02417755126953, "learning_rate": 9.803571428571428e-06, "loss": 47.4543, "step": 876 }, { "epoch": 20.883582089552238, "grad_norm": 23.681915283203125, "learning_rate": 9.797619047619048e-06, "loss": 47.7518, "step": 877 }, { "epoch": 20.907462686567165, "grad_norm": 20.77193260192871, "learning_rate": 9.791666666666666e-06, "loss": 46.3164, "step": 878 }, { "epoch": 20.93134328358209, "grad_norm": 24.61642837524414, "learning_rate": 9.785714285714286e-06, "loss": 48.6711, "step": 879 }, { "epoch": 20.955223880597014, "grad_norm": 20.59898567199707, "learning_rate": 9.779761904761906e-06, "loss": 49.114, "step": 880 }, { "epoch": 20.97910447761194, "grad_norm": 24.815736770629883, "learning_rate": 9.773809523809524e-06, "loss": 48.4315, "step": 881 }, { "epoch": 21.0, "grad_norm": 17.920352935791016, "learning_rate": 9.767857142857144e-06, "loss": 41.3634, "step": 882 }, { "epoch": 21.023880597014927, "grad_norm": 26.69571876525879, "learning_rate": 9.761904761904762e-06, "loss": 47.8968, "step": 883 }, { "epoch": 21.04776119402985, "grad_norm": 23.156524658203125, "learning_rate": 9.755952380952382e-06, "loss": 48.5914, "step": 884 }, { "epoch": 21.071641791044776, "grad_norm": 21.612483978271484, "learning_rate": 9.75e-06, "loss": 47.8711, "step": 885 }, { "epoch": 21.095522388059702, "grad_norm": 24.346399307250977, "learning_rate": 9.74404761904762e-06, "loss": 48.8689, "step": 886 }, { "epoch": 21.119402985074625, "grad_norm": 21.973896026611328, "learning_rate": 9.73809523809524e-06, "loss": 46.7465, "step": 887 }, { "epoch": 21.143283582089552, "grad_norm": 20.034557342529297, "learning_rate": 9.732142857142858e-06, "loss": 47.1505, "step": 888 }, { "epoch": 21.16716417910448, "grad_norm": 20.113008499145508, "learning_rate": 9.726190476190477e-06, "loss": 47.9234, "step": 889 }, { "epoch": 21.1910447761194, "grad_norm": 24.743249893188477, "learning_rate": 9.720238095238095e-06, "loss": 47.4432, "step": 890 }, { "epoch": 21.214925373134328, "grad_norm": 25.538530349731445, "learning_rate": 9.714285714285715e-06, "loss": 47.0015, "step": 891 }, { "epoch": 21.238805970149254, "grad_norm": 27.43077278137207, "learning_rate": 9.708333333333333e-06, "loss": 48.6757, "step": 892 }, { "epoch": 21.262686567164177, "grad_norm": 25.34470558166504, "learning_rate": 9.702380952380953e-06, "loss": 46.8118, "step": 893 }, { "epoch": 21.286567164179104, "grad_norm": 29.590490341186523, "learning_rate": 9.696428571428573e-06, "loss": 47.5079, "step": 894 }, { "epoch": 21.31044776119403, "grad_norm": 16.418222427368164, "learning_rate": 9.690476190476191e-06, "loss": 48.7679, "step": 895 }, { "epoch": 21.334328358208957, "grad_norm": 30.906719207763672, "learning_rate": 9.68452380952381e-06, "loss": 48.4926, "step": 896 }, { "epoch": 21.35820895522388, "grad_norm": 30.252347946166992, "learning_rate": 9.678571428571429e-06, "loss": 48.9318, "step": 897 }, { "epoch": 21.382089552238806, "grad_norm": 30.137592315673828, "learning_rate": 9.672619047619049e-06, "loss": 47.0388, "step": 898 }, { "epoch": 21.405970149253733, "grad_norm": 25.297151565551758, "learning_rate": 9.666666666666667e-06, "loss": 47.334, "step": 899 }, { "epoch": 21.429850746268656, "grad_norm": 31.72736358642578, "learning_rate": 9.660714285714287e-06, "loss": 47.8769, "step": 900 }, { "epoch": 21.453731343283582, "grad_norm": 24.4852294921875, "learning_rate": 9.654761904761906e-06, "loss": 47.4009, "step": 901 }, { "epoch": 21.47761194029851, "grad_norm": 31.223567962646484, "learning_rate": 9.648809523809524e-06, "loss": 48.4972, "step": 902 }, { "epoch": 21.501492537313432, "grad_norm": 24.1851806640625, "learning_rate": 9.642857142857144e-06, "loss": 46.1818, "step": 903 }, { "epoch": 21.52537313432836, "grad_norm": NaN, "learning_rate": 9.636904761904762e-06, "loss": 70.1176, "step": 904 }, { "epoch": 21.549253731343285, "grad_norm": 29.140161514282227, "learning_rate": 9.636904761904762e-06, "loss": 47.1614, "step": 905 }, { "epoch": 21.573134328358208, "grad_norm": 31.186546325683594, "learning_rate": 9.630952380952382e-06, "loss": 47.3643, "step": 906 }, { "epoch": 21.597014925373134, "grad_norm": 24.395353317260742, "learning_rate": 9.625e-06, "loss": 48.3591, "step": 907 }, { "epoch": 21.62089552238806, "grad_norm": 29.287492752075195, "learning_rate": 9.61904761904762e-06, "loss": 47.261, "step": 908 }, { "epoch": 21.644776119402984, "grad_norm": 26.76996612548828, "learning_rate": 9.61309523809524e-06, "loss": 48.7017, "step": 909 }, { "epoch": 21.66865671641791, "grad_norm": 29.820920944213867, "learning_rate": 9.607142857142858e-06, "loss": 48.5165, "step": 910 }, { "epoch": 21.692537313432837, "grad_norm": 30.011823654174805, "learning_rate": 9.601190476190478e-06, "loss": 46.5558, "step": 911 }, { "epoch": 21.71641791044776, "grad_norm": 32.796905517578125, "learning_rate": 9.595238095238096e-06, "loss": 47.276, "step": 912 }, { "epoch": 21.740298507462686, "grad_norm": 28.798233032226562, "learning_rate": 9.589285714285716e-06, "loss": 47.6033, "step": 913 }, { "epoch": 21.764179104477613, "grad_norm": 31.51072120666504, "learning_rate": 9.583333333333335e-06, "loss": 48.1236, "step": 914 }, { "epoch": 21.788059701492536, "grad_norm": 20.611305236816406, "learning_rate": 9.577380952380953e-06, "loss": 48.2839, "step": 915 }, { "epoch": 21.811940298507462, "grad_norm": 26.748571395874023, "learning_rate": 9.571428571428573e-06, "loss": 48.2225, "step": 916 }, { "epoch": 21.83582089552239, "grad_norm": 22.262859344482422, "learning_rate": 9.565476190476191e-06, "loss": 46.661, "step": 917 }, { "epoch": 21.85970149253731, "grad_norm": 34.15045166015625, "learning_rate": 9.559523809523811e-06, "loss": 47.3229, "step": 918 }, { "epoch": 21.883582089552238, "grad_norm": 24.26387596130371, "learning_rate": 9.55357142857143e-06, "loss": 47.4686, "step": 919 }, { "epoch": 21.907462686567165, "grad_norm": 29.463472366333008, "learning_rate": 9.547619047619049e-06, "loss": 47.6019, "step": 920 }, { "epoch": 21.93134328358209, "grad_norm": 31.184497833251953, "learning_rate": 9.541666666666669e-06, "loss": 47.3228, "step": 921 }, { "epoch": 21.955223880597014, "grad_norm": 26.506031036376953, "learning_rate": 9.535714285714287e-06, "loss": 47.9961, "step": 922 }, { "epoch": 21.97910447761194, "grad_norm": 30.547340393066406, "learning_rate": 9.529761904761905e-06, "loss": 47.8973, "step": 923 }, { "epoch": 22.0, "grad_norm": 22.91999053955078, "learning_rate": 9.523809523809525e-06, "loss": 41.3426, "step": 924 }, { "epoch": 22.023880597014927, "grad_norm": 28.242450714111328, "learning_rate": 9.517857142857143e-06, "loss": 47.2478, "step": 925 }, { "epoch": 22.04776119402985, "grad_norm": 33.07649612426758, "learning_rate": 9.511904761904763e-06, "loss": 47.6489, "step": 926 }, { "epoch": 22.071641791044776, "grad_norm": 28.14696502685547, "learning_rate": 9.50595238095238e-06, "loss": 46.521, "step": 927 }, { "epoch": 22.095522388059702, "grad_norm": 34.472206115722656, "learning_rate": 9.5e-06, "loss": 47.6476, "step": 928 }, { "epoch": 22.119402985074625, "grad_norm": 25.370718002319336, "learning_rate": 9.494047619047619e-06, "loss": 47.7215, "step": 929 }, { "epoch": 22.143283582089552, "grad_norm": 31.77129554748535, "learning_rate": 9.488095238095238e-06, "loss": 46.5566, "step": 930 }, { "epoch": 22.16716417910448, "grad_norm": 25.42667579650879, "learning_rate": 9.482142857142858e-06, "loss": 47.9832, "step": 931 }, { "epoch": 22.1910447761194, "grad_norm": 26.3134765625, "learning_rate": 9.476190476190476e-06, "loss": 47.9402, "step": 932 }, { "epoch": 22.214925373134328, "grad_norm": 31.683523178100586, "learning_rate": 9.470238095238096e-06, "loss": 47.4404, "step": 933 }, { "epoch": 22.238805970149254, "grad_norm": 31.90761375427246, "learning_rate": 9.464285714285714e-06, "loss": 47.7601, "step": 934 }, { "epoch": 22.262686567164177, "grad_norm": 24.635921478271484, "learning_rate": 9.458333333333334e-06, "loss": 46.2573, "step": 935 }, { "epoch": 22.286567164179104, "grad_norm": 25.32915496826172, "learning_rate": 9.452380952380952e-06, "loss": 48.4756, "step": 936 }, { "epoch": 22.31044776119403, "grad_norm": 28.117773056030273, "learning_rate": 9.446428571428572e-06, "loss": 48.6971, "step": 937 }, { "epoch": 22.334328358208957, "grad_norm": 22.504152297973633, "learning_rate": 9.440476190476192e-06, "loss": 47.4534, "step": 938 }, { "epoch": 22.35820895522388, "grad_norm": 31.765676498413086, "learning_rate": 9.43452380952381e-06, "loss": 48.0168, "step": 939 }, { "epoch": 22.382089552238806, "grad_norm": 27.647945404052734, "learning_rate": 9.42857142857143e-06, "loss": 48.0918, "step": 940 }, { "epoch": 22.405970149253733, "grad_norm": 33.35643005371094, "learning_rate": 9.422619047619048e-06, "loss": 48.295, "step": 941 }, { "epoch": 22.429850746268656, "grad_norm": 26.12603187561035, "learning_rate": 9.416666666666667e-06, "loss": 48.8921, "step": 942 }, { "epoch": 22.453731343283582, "grad_norm": 23.728809356689453, "learning_rate": 9.410714285714286e-06, "loss": 47.3206, "step": 943 }, { "epoch": 22.47761194029851, "grad_norm": 28.772401809692383, "learning_rate": 9.404761904761905e-06, "loss": 47.6536, "step": 944 }, { "epoch": 22.501492537313432, "grad_norm": 28.205202102661133, "learning_rate": 9.398809523809525e-06, "loss": 47.1952, "step": 945 }, { "epoch": 22.52537313432836, "grad_norm": 33.80730438232422, "learning_rate": 9.392857142857143e-06, "loss": 47.1336, "step": 946 }, { "epoch": 22.549253731343285, "grad_norm": 25.538846969604492, "learning_rate": 9.386904761904763e-06, "loss": 46.4229, "step": 947 }, { "epoch": 22.573134328358208, "grad_norm": 41.13503646850586, "learning_rate": 9.380952380952381e-06, "loss": 46.8325, "step": 948 }, { "epoch": 22.597014925373134, "grad_norm": 36.823001861572266, "learning_rate": 9.375000000000001e-06, "loss": 47.205, "step": 949 }, { "epoch": 22.62089552238806, "grad_norm": 29.992229461669922, "learning_rate": 9.36904761904762e-06, "loss": 46.683, "step": 950 }, { "epoch": 22.644776119402984, "grad_norm": 40.20172882080078, "learning_rate": 9.363095238095239e-06, "loss": 48.4859, "step": 951 }, { "epoch": 22.66865671641791, "grad_norm": 27.357097625732422, "learning_rate": 9.357142857142859e-06, "loss": 47.2987, "step": 952 }, { "epoch": 22.692537313432837, "grad_norm": 40.66689682006836, "learning_rate": 9.351190476190477e-06, "loss": 46.3579, "step": 953 }, { "epoch": 22.71641791044776, "grad_norm": 35.37788391113281, "learning_rate": 9.345238095238096e-06, "loss": 47.3369, "step": 954 }, { "epoch": 22.740298507462686, "grad_norm": 36.279151916503906, "learning_rate": 9.339285714285715e-06, "loss": 47.1137, "step": 955 }, { "epoch": 22.764179104477613, "grad_norm": 27.949628829956055, "learning_rate": 9.333333333333334e-06, "loss": 47.1438, "step": 956 }, { "epoch": 22.788059701492536, "grad_norm": 45.424556732177734, "learning_rate": 9.327380952380954e-06, "loss": 48.3171, "step": 957 }, { "epoch": 22.811940298507462, "grad_norm": 27.726537704467773, "learning_rate": 9.321428571428572e-06, "loss": 47.1718, "step": 958 }, { "epoch": 22.83582089552239, "grad_norm": 58.36731719970703, "learning_rate": 9.315476190476192e-06, "loss": 47.5895, "step": 959 }, { "epoch": 22.85970149253731, "grad_norm": 58.96028137207031, "learning_rate": 9.30952380952381e-06, "loss": 47.4109, "step": 960 }, { "epoch": 22.883582089552238, "grad_norm": 24.928117752075195, "learning_rate": 9.30357142857143e-06, "loss": 48.1841, "step": 961 }, { "epoch": 22.907462686567165, "grad_norm": 38.36846160888672, "learning_rate": 9.297619047619048e-06, "loss": 47.7438, "step": 962 }, { "epoch": 22.93134328358209, "grad_norm": 37.60481643676758, "learning_rate": 9.291666666666668e-06, "loss": 46.5067, "step": 963 }, { "epoch": 22.955223880597014, "grad_norm": NaN, "learning_rate": 9.285714285714288e-06, "loss": 78.3124, "step": 964 }, { "epoch": 22.97910447761194, "grad_norm": 28.587425231933594, "learning_rate": 9.285714285714288e-06, "loss": 47.1599, "step": 965 }, { "epoch": 23.0, "grad_norm": 41.493404388427734, "learning_rate": 9.279761904761906e-06, "loss": 41.2983, "step": 966 }, { "epoch": 23.023880597014927, "grad_norm": 41.00606918334961, "learning_rate": 9.273809523809525e-06, "loss": 46.8696, "step": 967 }, { "epoch": 23.04776119402985, "grad_norm": 31.043148040771484, "learning_rate": 9.267857142857144e-06, "loss": 46.4614, "step": 968 }, { "epoch": 23.071641791044776, "grad_norm": 36.815940856933594, "learning_rate": 9.261904761904763e-06, "loss": 47.5987, "step": 969 }, { "epoch": 23.095522388059702, "grad_norm": 35.73536682128906, "learning_rate": 9.255952380952381e-06, "loss": 47.8339, "step": 970 }, { "epoch": 23.119402985074625, "grad_norm": 26.95656967163086, "learning_rate": 9.250000000000001e-06, "loss": 48.0632, "step": 971 }, { "epoch": 23.143283582089552, "grad_norm": 40.408348083496094, "learning_rate": 9.244047619047621e-06, "loss": 47.5458, "step": 972 }, { "epoch": 23.16716417910448, "grad_norm": 41.97018051147461, "learning_rate": 9.238095238095239e-06, "loss": 48.8528, "step": 973 }, { "epoch": 23.1910447761194, "grad_norm": 23.809162139892578, "learning_rate": 9.232142857142859e-06, "loss": 47.663, "step": 974 }, { "epoch": 23.214925373134328, "grad_norm": 36.0232048034668, "learning_rate": 9.226190476190477e-06, "loss": 47.496, "step": 975 }, { "epoch": 23.238805970149254, "grad_norm": 32.06623077392578, "learning_rate": 9.220238095238097e-06, "loss": 47.4472, "step": 976 }, { "epoch": 23.262686567164177, "grad_norm": 30.663307189941406, "learning_rate": 9.214285714285715e-06, "loss": 47.1342, "step": 977 }, { "epoch": 23.286567164179104, "grad_norm": 39.121437072753906, "learning_rate": 9.208333333333333e-06, "loss": 47.9977, "step": 978 }, { "epoch": 23.31044776119403, "grad_norm": 31.75649642944336, "learning_rate": 9.202380952380953e-06, "loss": 49.2196, "step": 979 }, { "epoch": 23.334328358208957, "grad_norm": 50.10381317138672, "learning_rate": 9.196428571428571e-06, "loss": 47.6487, "step": 980 }, { "epoch": 23.35820895522388, "grad_norm": 36.412906646728516, "learning_rate": 9.19047619047619e-06, "loss": 47.0012, "step": 981 }, { "epoch": 23.382089552238806, "grad_norm": 40.47570037841797, "learning_rate": 9.18452380952381e-06, "loss": 45.4449, "step": 982 }, { "epoch": 23.405970149253733, "grad_norm": 43.92324447631836, "learning_rate": 9.178571428571429e-06, "loss": 47.8727, "step": 983 }, { "epoch": 23.429850746268656, "grad_norm": 28.896121978759766, "learning_rate": 9.172619047619048e-06, "loss": 47.8489, "step": 984 }, { "epoch": 23.453731343283582, "grad_norm": 37.02536392211914, "learning_rate": 9.166666666666666e-06, "loss": 48.4484, "step": 985 }, { "epoch": 23.47761194029851, "grad_norm": 26.289518356323242, "learning_rate": 9.160714285714286e-06, "loss": 47.0221, "step": 986 }, { "epoch": 23.501492537313432, "grad_norm": 33.60945129394531, "learning_rate": 9.154761904761906e-06, "loss": 47.7454, "step": 987 }, { "epoch": 23.52537313432836, "grad_norm": 31.284311294555664, "learning_rate": 9.148809523809524e-06, "loss": 47.0558, "step": 988 }, { "epoch": 23.549253731343285, "grad_norm": 30.488906860351562, "learning_rate": 9.142857142857144e-06, "loss": 46.4408, "step": 989 }, { "epoch": 23.573134328358208, "grad_norm": 34.29289245605469, "learning_rate": 9.136904761904762e-06, "loss": 46.2796, "step": 990 }, { "epoch": 23.597014925373134, "grad_norm": 22.803457260131836, "learning_rate": 9.130952380952382e-06, "loss": 47.2684, "step": 991 }, { "epoch": 23.62089552238806, "grad_norm": 33.18730926513672, "learning_rate": 9.125e-06, "loss": 47.5, "step": 992 }, { "epoch": 23.644776119402984, "grad_norm": 28.421592712402344, "learning_rate": 9.11904761904762e-06, "loss": 46.7508, "step": 993 }, { "epoch": 23.66865671641791, "grad_norm": 23.492319107055664, "learning_rate": 9.11309523809524e-06, "loss": 46.6042, "step": 994 }, { "epoch": 23.692537313432837, "grad_norm": 30.10877227783203, "learning_rate": 9.107142857142858e-06, "loss": 46.2632, "step": 995 }, { "epoch": 23.71641791044776, "grad_norm": 23.64444351196289, "learning_rate": 9.101190476190477e-06, "loss": 47.2817, "step": 996 }, { "epoch": 23.740298507462686, "grad_norm": 28.243606567382812, "learning_rate": 9.095238095238095e-06, "loss": 47.1196, "step": 997 }, { "epoch": 23.764179104477613, "grad_norm": 26.84208869934082, "learning_rate": 9.089285714285715e-06, "loss": 46.6631, "step": 998 }, { "epoch": 23.788059701492536, "grad_norm": 29.558794021606445, "learning_rate": 9.083333333333333e-06, "loss": 45.8711, "step": 999 }, { "epoch": 23.811940298507462, "grad_norm": 25.105928421020508, "learning_rate": 9.077380952380953e-06, "loss": 47.8253, "step": 1000 }, { "epoch": 23.83582089552239, "grad_norm": NaN, "learning_rate": 9.071428571428573e-06, "loss": 82.5048, "step": 1001 }, { "epoch": 23.85970149253731, "grad_norm": 25.548643112182617, "learning_rate": 9.071428571428573e-06, "loss": 47.5042, "step": 1002 }, { "epoch": 23.883582089552238, "grad_norm": 28.8011531829834, "learning_rate": 9.065476190476191e-06, "loss": 47.0084, "step": 1003 }, { "epoch": 23.907462686567165, "grad_norm": 31.907651901245117, "learning_rate": 9.05952380952381e-06, "loss": 48.287, "step": 1004 }, { "epoch": 23.93134328358209, "grad_norm": 32.044986724853516, "learning_rate": 9.053571428571429e-06, "loss": 47.276, "step": 1005 }, { "epoch": 23.955223880597014, "grad_norm": 31.224260330200195, "learning_rate": 9.047619047619049e-06, "loss": 47.4774, "step": 1006 }, { "epoch": 23.97910447761194, "grad_norm": 29.830835342407227, "learning_rate": 9.041666666666667e-06, "loss": 47.7031, "step": 1007 }, { "epoch": 24.0, "grad_norm": 25.12934112548828, "learning_rate": 9.035714285714287e-06, "loss": 41.8156, "step": 1008 }, { "epoch": 24.023880597014927, "grad_norm": 31.172348022460938, "learning_rate": 9.029761904761906e-06, "loss": 48.0591, "step": 1009 }, { "epoch": 24.04776119402985, "grad_norm": 26.59412956237793, "learning_rate": 9.023809523809524e-06, "loss": 47.6291, "step": 1010 }, { "epoch": 24.071641791044776, "grad_norm": 29.16905975341797, "learning_rate": 9.017857142857144e-06, "loss": 47.4587, "step": 1011 }, { "epoch": 24.095522388059702, "grad_norm": 33.05836868286133, "learning_rate": 9.011904761904762e-06, "loss": 47.8748, "step": 1012 }, { "epoch": 24.119402985074625, "grad_norm": 26.13016700744629, "learning_rate": 9.005952380952382e-06, "loss": 48.0003, "step": 1013 }, { "epoch": 24.143283582089552, "grad_norm": 29.883411407470703, "learning_rate": 9e-06, "loss": 47.0188, "step": 1014 }, { "epoch": 24.16716417910448, "grad_norm": 29.039255142211914, "learning_rate": 8.99404761904762e-06, "loss": 46.8844, "step": 1015 }, { "epoch": 24.1910447761194, "grad_norm": 26.532760620117188, "learning_rate": 8.98809523809524e-06, "loss": 47.1817, "step": 1016 }, { "epoch": 24.214925373134328, "grad_norm": 30.146087646484375, "learning_rate": 8.982142857142858e-06, "loss": 46.4863, "step": 1017 }, { "epoch": 24.238805970149254, "grad_norm": 27.027935028076172, "learning_rate": 8.976190476190478e-06, "loss": 45.7162, "step": 1018 }, { "epoch": 24.262686567164177, "grad_norm": 27.315515518188477, "learning_rate": 8.970238095238096e-06, "loss": 46.6337, "step": 1019 }, { "epoch": 24.286567164179104, "grad_norm": 25.63303565979004, "learning_rate": 8.964285714285716e-06, "loss": 46.5452, "step": 1020 }, { "epoch": 24.31044776119403, "grad_norm": 22.407268524169922, "learning_rate": 8.958333333333334e-06, "loss": 47.3262, "step": 1021 }, { "epoch": 24.334328358208957, "grad_norm": 19.4051513671875, "learning_rate": 8.952380952380953e-06, "loss": 46.8407, "step": 1022 }, { "epoch": 24.35820895522388, "grad_norm": NaN, "learning_rate": 8.946428571428573e-06, "loss": 77.1735, "step": 1023 }, { "epoch": 24.382089552238806, "grad_norm": 26.870166778564453, "learning_rate": 8.946428571428573e-06, "loss": 46.9395, "step": 1024 }, { "epoch": 24.405970149253733, "grad_norm": 21.54165267944336, "learning_rate": 8.940476190476191e-06, "loss": 47.2505, "step": 1025 }, { "epoch": 24.429850746268656, "grad_norm": 29.317501068115234, "learning_rate": 8.934523809523811e-06, "loss": 48.0554, "step": 1026 }, { "epoch": 24.453731343283582, "grad_norm": 27.45809555053711, "learning_rate": 8.92857142857143e-06, "loss": 47.0495, "step": 1027 }, { "epoch": 24.47761194029851, "grad_norm": NaN, "learning_rate": 8.922619047619049e-06, "loss": 66.51, "step": 1028 }, { "epoch": 24.501492537313432, "grad_norm": 23.169204711914062, "learning_rate": 8.922619047619049e-06, "loss": 47.5902, "step": 1029 }, { "epoch": 24.52537313432836, "grad_norm": 31.986774444580078, "learning_rate": 8.916666666666667e-06, "loss": 47.4281, "step": 1030 }, { "epoch": 24.549253731343285, "grad_norm": 27.15190315246582, "learning_rate": 8.910714285714287e-06, "loss": 46.3638, "step": 1031 }, { "epoch": 24.573134328358208, "grad_norm": 26.88028335571289, "learning_rate": 8.904761904761905e-06, "loss": 45.0491, "step": 1032 }, { "epoch": 24.597014925373134, "grad_norm": 27.693952560424805, "learning_rate": 8.898809523809525e-06, "loss": 47.6471, "step": 1033 }, { "epoch": 24.62089552238806, "grad_norm": 33.45442581176758, "learning_rate": 8.892857142857143e-06, "loss": 47.1459, "step": 1034 }, { "epoch": 24.644776119402984, "grad_norm": 29.933320999145508, "learning_rate": 8.886904761904763e-06, "loss": 46.9218, "step": 1035 }, { "epoch": 24.66865671641791, "grad_norm": 26.401569366455078, "learning_rate": 8.88095238095238e-06, "loss": 47.7027, "step": 1036 }, { "epoch": 24.692537313432837, "grad_norm": 26.92498016357422, "learning_rate": 8.875e-06, "loss": 47.2302, "step": 1037 }, { "epoch": 24.71641791044776, "grad_norm": 28.368043899536133, "learning_rate": 8.869047619047619e-06, "loss": 47.4479, "step": 1038 }, { "epoch": 24.740298507462686, "grad_norm": 27.319650650024414, "learning_rate": 8.863095238095238e-06, "loss": 47.4652, "step": 1039 }, { "epoch": 24.764179104477613, "grad_norm": 37.10929870605469, "learning_rate": 8.857142857142858e-06, "loss": 47.3543, "step": 1040 }, { "epoch": 24.788059701492536, "grad_norm": 32.430416107177734, "learning_rate": 8.851190476190476e-06, "loss": 46.1406, "step": 1041 }, { "epoch": 24.811940298507462, "grad_norm": 33.29399108886719, "learning_rate": 8.845238095238096e-06, "loss": 47.1917, "step": 1042 }, { "epoch": 24.83582089552239, "grad_norm": 32.72507095336914, "learning_rate": 8.839285714285714e-06, "loss": 47.677, "step": 1043 }, { "epoch": 24.85970149253731, "grad_norm": 25.997148513793945, "learning_rate": 8.833333333333334e-06, "loss": 47.6851, "step": 1044 }, { "epoch": 24.883582089552238, "grad_norm": 33.00047302246094, "learning_rate": 8.827380952380952e-06, "loss": 47.5326, "step": 1045 }, { "epoch": 24.907462686567165, "grad_norm": 33.95719528198242, "learning_rate": 8.821428571428572e-06, "loss": 47.2836, "step": 1046 }, { "epoch": 24.93134328358209, "grad_norm": 31.353008270263672, "learning_rate": 8.815476190476192e-06, "loss": 47.8318, "step": 1047 }, { "epoch": 24.955223880597014, "grad_norm": 27.4250545501709, "learning_rate": 8.80952380952381e-06, "loss": 47.0066, "step": 1048 }, { "epoch": 24.97910447761194, "grad_norm": 30.22010612487793, "learning_rate": 8.80357142857143e-06, "loss": 46.6991, "step": 1049 }, { "epoch": 25.0, "grad_norm": 19.693180084228516, "learning_rate": 8.797619047619048e-06, "loss": 41.6055, "step": 1050 }, { "epoch": 25.023880597014927, "grad_norm": 24.590662002563477, "learning_rate": 8.791666666666667e-06, "loss": 46.3536, "step": 1051 }, { "epoch": 25.04776119402985, "grad_norm": 28.37199592590332, "learning_rate": 8.785714285714286e-06, "loss": 47.8334, "step": 1052 }, { "epoch": 25.071641791044776, "grad_norm": 26.38755226135254, "learning_rate": 8.779761904761905e-06, "loss": 47.6486, "step": 1053 }, { "epoch": 25.095522388059702, "grad_norm": 27.338485717773438, "learning_rate": 8.773809523809525e-06, "loss": 47.3044, "step": 1054 }, { "epoch": 25.119402985074625, "grad_norm": 25.308486938476562, "learning_rate": 8.767857142857143e-06, "loss": 45.6873, "step": 1055 }, { "epoch": 25.143283582089552, "grad_norm": 30.886962890625, "learning_rate": 8.761904761904763e-06, "loss": 46.8938, "step": 1056 }, { "epoch": 25.16716417910448, "grad_norm": 25.25688934326172, "learning_rate": 8.755952380952381e-06, "loss": 47.4858, "step": 1057 }, { "epoch": 25.1910447761194, "grad_norm": 30.462963104248047, "learning_rate": 8.750000000000001e-06, "loss": 46.6334, "step": 1058 }, { "epoch": 25.214925373134328, "grad_norm": 22.87471580505371, "learning_rate": 8.744047619047619e-06, "loss": 46.0966, "step": 1059 }, { "epoch": 25.238805970149254, "grad_norm": 23.413904190063477, "learning_rate": 8.738095238095239e-06, "loss": 46.8938, "step": 1060 }, { "epoch": 25.262686567164177, "grad_norm": 26.926279067993164, "learning_rate": 8.732142857142859e-06, "loss": 46.3773, "step": 1061 }, { "epoch": 25.286567164179104, "grad_norm": 27.595348358154297, "learning_rate": 8.726190476190477e-06, "loss": 48.0235, "step": 1062 }, { "epoch": 25.31044776119403, "grad_norm": 26.124523162841797, "learning_rate": 8.720238095238096e-06, "loss": 46.6863, "step": 1063 }, { "epoch": 25.334328358208957, "grad_norm": 28.308120727539062, "learning_rate": 8.714285714285715e-06, "loss": 47.7158, "step": 1064 }, { "epoch": 25.35820895522388, "grad_norm": 23.434846878051758, "learning_rate": 8.708333333333334e-06, "loss": 47.2951, "step": 1065 }, { "epoch": 25.382089552238806, "grad_norm": 26.917911529541016, "learning_rate": 8.702380952380952e-06, "loss": 45.7266, "step": 1066 }, { "epoch": 25.405970149253733, "grad_norm": 19.7725772857666, "learning_rate": 8.696428571428572e-06, "loss": 46.5458, "step": 1067 }, { "epoch": 25.429850746268656, "grad_norm": 27.18629264831543, "learning_rate": 8.690476190476192e-06, "loss": 46.3133, "step": 1068 }, { "epoch": 25.453731343283582, "grad_norm": 26.112865447998047, "learning_rate": 8.68452380952381e-06, "loss": 46.6383, "step": 1069 }, { "epoch": 25.47761194029851, "grad_norm": 19.385990142822266, "learning_rate": 8.67857142857143e-06, "loss": 46.5541, "step": 1070 }, { "epoch": 25.501492537313432, "grad_norm": 26.713350296020508, "learning_rate": 8.672619047619048e-06, "loss": 48.045, "step": 1071 }, { "epoch": 25.52537313432836, "grad_norm": 29.80147933959961, "learning_rate": 8.666666666666668e-06, "loss": 47.4443, "step": 1072 }, { "epoch": 25.549253731343285, "grad_norm": 23.674266815185547, "learning_rate": 8.660714285714286e-06, "loss": 46.6662, "step": 1073 }, { "epoch": 25.573134328358208, "grad_norm": 46.435401916503906, "learning_rate": 8.654761904761906e-06, "loss": 46.9276, "step": 1074 }, { "epoch": 25.597014925373134, "grad_norm": 35.016502380371094, "learning_rate": 8.648809523809526e-06, "loss": 47.6811, "step": 1075 }, { "epoch": 25.62089552238806, "grad_norm": 42.57990646362305, "learning_rate": 8.642857142857144e-06, "loss": 46.5684, "step": 1076 }, { "epoch": 25.644776119402984, "grad_norm": 36.2376708984375, "learning_rate": 8.636904761904763e-06, "loss": 46.1807, "step": 1077 }, { "epoch": 25.66865671641791, "grad_norm": 41.44023895263672, "learning_rate": 8.630952380952381e-06, "loss": 46.0823, "step": 1078 }, { "epoch": 25.692537313432837, "grad_norm": 43.62863540649414, "learning_rate": 8.625000000000001e-06, "loss": 47.9958, "step": 1079 }, { "epoch": 25.71641791044776, "grad_norm": 34.232120513916016, "learning_rate": 8.61904761904762e-06, "loss": 47.9585, "step": 1080 }, { "epoch": 25.740298507462686, "grad_norm": 38.023197174072266, "learning_rate": 8.61309523809524e-06, "loss": 47.7344, "step": 1081 }, { "epoch": 25.764179104477613, "grad_norm": 37.89833068847656, "learning_rate": 8.607142857142859e-06, "loss": 47.8956, "step": 1082 }, { "epoch": 25.788059701492536, "grad_norm": 33.03269958496094, "learning_rate": 8.601190476190477e-06, "loss": 47.7408, "step": 1083 }, { "epoch": 25.811940298507462, "grad_norm": 44.31171798706055, "learning_rate": 8.595238095238097e-06, "loss": 47.6232, "step": 1084 }, { "epoch": 25.83582089552239, "grad_norm": 42.54961395263672, "learning_rate": 8.589285714285715e-06, "loss": 47.9757, "step": 1085 }, { "epoch": 25.85970149253731, "grad_norm": 27.695526123046875, "learning_rate": 8.583333333333333e-06, "loss": 47.0934, "step": 1086 }, { "epoch": 25.883582089552238, "grad_norm": 32.62801742553711, "learning_rate": 8.577380952380953e-06, "loss": 47.1186, "step": 1087 }, { "epoch": 25.907462686567165, "grad_norm": 26.777305603027344, "learning_rate": 8.571428571428571e-06, "loss": 47.2931, "step": 1088 }, { "epoch": 25.93134328358209, "grad_norm": 24.382678985595703, "learning_rate": 8.56547619047619e-06, "loss": 46.4698, "step": 1089 }, { "epoch": 25.955223880597014, "grad_norm": 34.310150146484375, "learning_rate": 8.55952380952381e-06, "loss": 46.0509, "step": 1090 }, { "epoch": 25.97910447761194, "grad_norm": 27.468976974487305, "learning_rate": 8.553571428571429e-06, "loss": 46.9123, "step": 1091 }, { "epoch": 26.0, "grad_norm": 27.90901756286621, "learning_rate": 8.547619047619048e-06, "loss": 41.8265, "step": 1092 }, { "epoch": 26.023880597014927, "grad_norm": 28.853416442871094, "learning_rate": 8.541666666666666e-06, "loss": 47.9612, "step": 1093 }, { "epoch": 26.04776119402985, "grad_norm": 31.96144676208496, "learning_rate": 8.535714285714286e-06, "loss": 46.8167, "step": 1094 }, { "epoch": 26.071641791044776, "grad_norm": 33.179141998291016, "learning_rate": 8.529761904761904e-06, "loss": 48.0464, "step": 1095 }, { "epoch": 26.095522388059702, "grad_norm": 32.18705368041992, "learning_rate": 8.523809523809524e-06, "loss": 45.6743, "step": 1096 }, { "epoch": 26.119402985074625, "grad_norm": 26.125934600830078, "learning_rate": 8.517857142857144e-06, "loss": 46.4944, "step": 1097 }, { "epoch": 26.143283582089552, "grad_norm": 31.666461944580078, "learning_rate": 8.511904761904762e-06, "loss": 47.6152, "step": 1098 }, { "epoch": 26.16716417910448, "grad_norm": 29.90437889099121, "learning_rate": 8.505952380952382e-06, "loss": 46.4497, "step": 1099 }, { "epoch": 26.1910447761194, "grad_norm": 32.59938430786133, "learning_rate": 8.5e-06, "loss": 47.1877, "step": 1100 }, { "epoch": 26.214925373134328, "grad_norm": 28.368562698364258, "learning_rate": 8.49404761904762e-06, "loss": 46.4898, "step": 1101 }, { "epoch": 26.238805970149254, "grad_norm": 31.274070739746094, "learning_rate": 8.488095238095238e-06, "loss": 47.1507, "step": 1102 }, { "epoch": 26.262686567164177, "grad_norm": 24.63444709777832, "learning_rate": 8.482142857142858e-06, "loss": 47.3659, "step": 1103 }, { "epoch": 26.286567164179104, "grad_norm": 35.413970947265625, "learning_rate": 8.476190476190477e-06, "loss": 46.6459, "step": 1104 }, { "epoch": 26.31044776119403, "grad_norm": 27.774656295776367, "learning_rate": 8.470238095238095e-06, "loss": 47.4369, "step": 1105 }, { "epoch": 26.334328358208957, "grad_norm": 32.258155822753906, "learning_rate": 8.464285714285715e-06, "loss": 47.487, "step": 1106 }, { "epoch": 26.35820895522388, "grad_norm": 18.22418785095215, "learning_rate": 8.458333333333333e-06, "loss": 46.948, "step": 1107 }, { "epoch": 26.382089552238806, "grad_norm": 24.50945472717285, "learning_rate": 8.452380952380953e-06, "loss": 45.7291, "step": 1108 }, { "epoch": 26.405970149253733, "grad_norm": 23.20486831665039, "learning_rate": 8.446428571428571e-06, "loss": 46.8704, "step": 1109 }, { "epoch": 26.429850746268656, "grad_norm": 20.810514450073242, "learning_rate": 8.440476190476191e-06, "loss": 47.3419, "step": 1110 }, { "epoch": 26.453731343283582, "grad_norm": 27.68440818786621, "learning_rate": 8.434523809523811e-06, "loss": 47.0641, "step": 1111 }, { "epoch": 26.47761194029851, "grad_norm": 26.989046096801758, "learning_rate": 8.428571428571429e-06, "loss": 46.9324, "step": 1112 }, { "epoch": 26.501492537313432, "grad_norm": 25.986888885498047, "learning_rate": 8.422619047619049e-06, "loss": 48.6179, "step": 1113 }, { "epoch": 26.52537313432836, "grad_norm": 28.111356735229492, "learning_rate": 8.416666666666667e-06, "loss": 46.4251, "step": 1114 }, { "epoch": 26.549253731343285, "grad_norm": 24.306228637695312, "learning_rate": 8.410714285714287e-06, "loss": 46.4379, "step": 1115 }, { "epoch": 26.573134328358208, "grad_norm": 23.894895553588867, "learning_rate": 8.404761904761905e-06, "loss": 46.665, "step": 1116 }, { "epoch": 26.597014925373134, "grad_norm": 25.917400360107422, "learning_rate": 8.398809523809525e-06, "loss": 46.6619, "step": 1117 }, { "epoch": 26.62089552238806, "grad_norm": 21.423585891723633, "learning_rate": 8.392857142857144e-06, "loss": 46.3447, "step": 1118 }, { "epoch": 26.644776119402984, "grad_norm": 29.13437271118164, "learning_rate": 8.386904761904762e-06, "loss": 46.4292, "step": 1119 }, { "epoch": 26.66865671641791, "grad_norm": 25.711469650268555, "learning_rate": 8.380952380952382e-06, "loss": 46.6156, "step": 1120 }, { "epoch": 26.692537313432837, "grad_norm": 26.55695915222168, "learning_rate": 8.375e-06, "loss": 46.7429, "step": 1121 }, { "epoch": 26.71641791044776, "grad_norm": 27.66262435913086, "learning_rate": 8.36904761904762e-06, "loss": 47.1275, "step": 1122 }, { "epoch": 26.740298507462686, "grad_norm": 33.85395050048828, "learning_rate": 8.36309523809524e-06, "loss": 46.7244, "step": 1123 }, { "epoch": 26.764179104477613, "grad_norm": 29.51833152770996, "learning_rate": 8.357142857142858e-06, "loss": 47.2072, "step": 1124 }, { "epoch": 26.788059701492536, "grad_norm": 26.21416664123535, "learning_rate": 8.351190476190478e-06, "loss": 47.8298, "step": 1125 }, { "epoch": 26.811940298507462, "grad_norm": 31.24039649963379, "learning_rate": 8.345238095238096e-06, "loss": 46.8069, "step": 1126 }, { "epoch": 26.83582089552239, "grad_norm": 32.19520568847656, "learning_rate": 8.339285714285716e-06, "loss": 47.1494, "step": 1127 }, { "epoch": 26.85970149253731, "grad_norm": 29.194063186645508, "learning_rate": 8.333333333333334e-06, "loss": 46.1827, "step": 1128 }, { "epoch": 26.883582089552238, "grad_norm": 28.723541259765625, "learning_rate": 8.327380952380954e-06, "loss": 46.8513, "step": 1129 }, { "epoch": 26.907462686567165, "grad_norm": 26.91135597229004, "learning_rate": 8.321428571428573e-06, "loss": 46.561, "step": 1130 }, { "epoch": 26.93134328358209, "grad_norm": 25.60898208618164, "learning_rate": 8.315476190476191e-06, "loss": 46.4706, "step": 1131 }, { "epoch": 26.955223880597014, "grad_norm": 23.72539520263672, "learning_rate": 8.309523809523811e-06, "loss": 46.7871, "step": 1132 }, { "epoch": 26.97910447761194, "grad_norm": 25.030731201171875, "learning_rate": 8.30357142857143e-06, "loss": 46.2433, "step": 1133 }, { "epoch": 27.0, "grad_norm": 25.439281463623047, "learning_rate": 8.297619047619049e-06, "loss": 40.9101, "step": 1134 }, { "epoch": 27.023880597014927, "grad_norm": 29.52981948852539, "learning_rate": 8.291666666666667e-06, "loss": 46.9385, "step": 1135 }, { "epoch": 27.04776119402985, "grad_norm": 22.007299423217773, "learning_rate": 8.285714285714287e-06, "loss": 47.2751, "step": 1136 }, { "epoch": 27.071641791044776, "grad_norm": 31.965675354003906, "learning_rate": 8.279761904761905e-06, "loss": 47.7763, "step": 1137 }, { "epoch": 27.095522388059702, "grad_norm": 23.38637351989746, "learning_rate": 8.273809523809523e-06, "loss": 46.0412, "step": 1138 }, { "epoch": 27.119402985074625, "grad_norm": 24.295711517333984, "learning_rate": 8.267857142857143e-06, "loss": 46.8552, "step": 1139 }, { "epoch": 27.143283582089552, "grad_norm": 20.915624618530273, "learning_rate": 8.261904761904763e-06, "loss": 46.3041, "step": 1140 }, { "epoch": 27.16716417910448, "grad_norm": 28.25569725036621, "learning_rate": 8.25595238095238e-06, "loss": 45.8736, "step": 1141 }, { "epoch": 27.1910447761194, "grad_norm": 24.8399658203125, "learning_rate": 8.25e-06, "loss": 46.0629, "step": 1142 }, { "epoch": 27.214925373134328, "grad_norm": 21.237272262573242, "learning_rate": 8.244047619047619e-06, "loss": 47.1252, "step": 1143 }, { "epoch": 27.238805970149254, "grad_norm": 24.35887336730957, "learning_rate": 8.238095238095239e-06, "loss": 46.9687, "step": 1144 }, { "epoch": 27.262686567164177, "grad_norm": 26.583545684814453, "learning_rate": 8.232142857142857e-06, "loss": 47.1486, "step": 1145 }, { "epoch": 27.286567164179104, "grad_norm": 23.712989807128906, "learning_rate": 8.226190476190476e-06, "loss": 46.4769, "step": 1146 }, { "epoch": 27.31044776119403, "grad_norm": 18.466094970703125, "learning_rate": 8.220238095238096e-06, "loss": 46.6911, "step": 1147 }, { "epoch": 27.334328358208957, "grad_norm": 31.812236785888672, "learning_rate": 8.214285714285714e-06, "loss": 47.6261, "step": 1148 }, { "epoch": 27.35820895522388, "grad_norm": 23.437780380249023, "learning_rate": 8.208333333333334e-06, "loss": 46.1776, "step": 1149 }, { "epoch": 27.382089552238806, "grad_norm": 27.252187728881836, "learning_rate": 8.202380952380952e-06, "loss": 44.8499, "step": 1150 }, { "epoch": 27.405970149253733, "grad_norm": 24.425500869750977, "learning_rate": 8.196428571428572e-06, "loss": 46.5397, "step": 1151 }, { "epoch": 27.429850746268656, "grad_norm": 28.237712860107422, "learning_rate": 8.190476190476192e-06, "loss": 46.8922, "step": 1152 }, { "epoch": 27.453731343283582, "grad_norm": 23.262300491333008, "learning_rate": 8.18452380952381e-06, "loss": 47.4204, "step": 1153 }, { "epoch": 27.47761194029851, "grad_norm": 20.69318389892578, "learning_rate": 8.17857142857143e-06, "loss": 47.1507, "step": 1154 }, { "epoch": 27.501492537313432, "grad_norm": 30.182701110839844, "learning_rate": 8.172619047619048e-06, "loss": 46.198, "step": 1155 }, { "epoch": 27.52537313432836, "grad_norm": 28.804855346679688, "learning_rate": 8.166666666666668e-06, "loss": 46.1366, "step": 1156 }, { "epoch": 27.549253731343285, "grad_norm": 26.992097854614258, "learning_rate": 8.160714285714286e-06, "loss": 47.3639, "step": 1157 }, { "epoch": 27.573134328358208, "grad_norm": 27.22978401184082, "learning_rate": 8.154761904761905e-06, "loss": 46.7295, "step": 1158 }, { "epoch": 27.597014925373134, "grad_norm": 24.036380767822266, "learning_rate": 8.148809523809525e-06, "loss": 46.7824, "step": 1159 }, { "epoch": 27.62089552238806, "grad_norm": 28.67648696899414, "learning_rate": 8.142857142857143e-06, "loss": 46.9712, "step": 1160 }, { "epoch": 27.644776119402984, "grad_norm": 27.389991760253906, "learning_rate": 8.136904761904763e-06, "loss": 47.6263, "step": 1161 }, { "epoch": 27.66865671641791, "grad_norm": 28.887022018432617, "learning_rate": 8.130952380952381e-06, "loss": 47.7553, "step": 1162 }, { "epoch": 27.692537313432837, "grad_norm": 17.354753494262695, "learning_rate": 8.125000000000001e-06, "loss": 46.1359, "step": 1163 }, { "epoch": 27.71641791044776, "grad_norm": 26.72220230102539, "learning_rate": 8.119047619047619e-06, "loss": 46.3653, "step": 1164 }, { "epoch": 27.740298507462686, "grad_norm": 22.09579849243164, "learning_rate": 8.113095238095239e-06, "loss": 46.856, "step": 1165 }, { "epoch": 27.764179104477613, "grad_norm": 30.197189331054688, "learning_rate": 8.107142857142859e-06, "loss": 46.4435, "step": 1166 }, { "epoch": 27.788059701492536, "grad_norm": 28.597610473632812, "learning_rate": 8.101190476190477e-06, "loss": 48.3097, "step": 1167 }, { "epoch": 27.811940298507462, "grad_norm": 22.391801834106445, "learning_rate": 8.095238095238097e-06, "loss": 47.2598, "step": 1168 }, { "epoch": 27.83582089552239, "grad_norm": 28.523584365844727, "learning_rate": 8.089285714285715e-06, "loss": 46.3123, "step": 1169 }, { "epoch": 27.85970149253731, "grad_norm": 21.646997451782227, "learning_rate": 8.083333333333334e-06, "loss": 46.2853, "step": 1170 }, { "epoch": 27.883582089552238, "grad_norm": 22.68369483947754, "learning_rate": 8.077380952380953e-06, "loss": 46.1355, "step": 1171 }, { "epoch": 27.907462686567165, "grad_norm": 25.581987380981445, "learning_rate": 8.071428571428572e-06, "loss": 45.5431, "step": 1172 }, { "epoch": 27.93134328358209, "grad_norm": 26.512523651123047, "learning_rate": 8.065476190476192e-06, "loss": 47.1898, "step": 1173 }, { "epoch": 27.955223880597014, "grad_norm": 26.89809226989746, "learning_rate": 8.05952380952381e-06, "loss": 46.8108, "step": 1174 }, { "epoch": 27.97910447761194, "grad_norm": 23.638704299926758, "learning_rate": 8.05357142857143e-06, "loss": 47.1454, "step": 1175 }, { "epoch": 28.0, "grad_norm": 28.61042022705078, "learning_rate": 8.047619047619048e-06, "loss": 41.7161, "step": 1176 }, { "epoch": 28.023880597014927, "grad_norm": 31.2153377532959, "learning_rate": 8.041666666666668e-06, "loss": 46.8925, "step": 1177 }, { "epoch": 28.04776119402985, "grad_norm": 26.284482955932617, "learning_rate": 8.035714285714286e-06, "loss": 45.7065, "step": 1178 }, { "epoch": 28.071641791044776, "grad_norm": 30.96581268310547, "learning_rate": 8.029761904761906e-06, "loss": 46.5612, "step": 1179 }, { "epoch": 28.095522388059702, "grad_norm": 24.686336517333984, "learning_rate": 8.023809523809526e-06, "loss": 46.4376, "step": 1180 }, { "epoch": 28.119402985074625, "grad_norm": 27.783416748046875, "learning_rate": 8.017857142857144e-06, "loss": 47.247, "step": 1181 }, { "epoch": 28.143283582089552, "grad_norm": 33.3108024597168, "learning_rate": 8.011904761904763e-06, "loss": 47.3171, "step": 1182 }, { "epoch": 28.16716417910448, "grad_norm": 30.010536193847656, "learning_rate": 8.005952380952382e-06, "loss": 45.9761, "step": 1183 }, { "epoch": 28.1910447761194, "grad_norm": 29.399965286254883, "learning_rate": 8.000000000000001e-06, "loss": 47.3345, "step": 1184 }, { "epoch": 28.214925373134328, "grad_norm": 25.835142135620117, "learning_rate": 7.99404761904762e-06, "loss": 46.3395, "step": 1185 }, { "epoch": 28.238805970149254, "grad_norm": 26.06570053100586, "learning_rate": 7.98809523809524e-06, "loss": 45.842, "step": 1186 }, { "epoch": 28.262686567164177, "grad_norm": 28.64603042602539, "learning_rate": 7.982142857142859e-06, "loss": 46.4802, "step": 1187 }, { "epoch": 28.286567164179104, "grad_norm": 27.157583236694336, "learning_rate": 7.976190476190477e-06, "loss": 45.7376, "step": 1188 }, { "epoch": 28.31044776119403, "grad_norm": 22.073328018188477, "learning_rate": 7.970238095238097e-06, "loss": 47.0787, "step": 1189 }, { "epoch": 28.334328358208957, "grad_norm": 21.545568466186523, "learning_rate": 7.964285714285715e-06, "loss": 45.6691, "step": 1190 }, { "epoch": 28.35820895522388, "grad_norm": 26.17327880859375, "learning_rate": 7.958333333333333e-06, "loss": 46.2058, "step": 1191 }, { "epoch": 28.382089552238806, "grad_norm": 24.443920135498047, "learning_rate": 7.952380952380953e-06, "loss": 45.3531, "step": 1192 }, { "epoch": 28.405970149253733, "grad_norm": 27.207778930664062, "learning_rate": 7.946428571428571e-06, "loss": 46.5519, "step": 1193 }, { "epoch": 28.429850746268656, "grad_norm": 23.15156364440918, "learning_rate": 7.94047619047619e-06, "loss": 46.9284, "step": 1194 }, { "epoch": 28.453731343283582, "grad_norm": 27.96567153930664, "learning_rate": 7.93452380952381e-06, "loss": 46.3696, "step": 1195 }, { "epoch": 28.47761194029851, "grad_norm": 25.828689575195312, "learning_rate": 7.928571428571429e-06, "loss": 46.2933, "step": 1196 }, { "epoch": 28.501492537313432, "grad_norm": 30.69676971435547, "learning_rate": 7.922619047619048e-06, "loss": 46.7471, "step": 1197 }, { "epoch": 28.52537313432836, "grad_norm": 24.977018356323242, "learning_rate": 7.916666666666667e-06, "loss": 47.045, "step": 1198 }, { "epoch": 28.549253731343285, "grad_norm": 26.286821365356445, "learning_rate": 7.910714285714286e-06, "loss": 47.0185, "step": 1199 }, { "epoch": 28.573134328358208, "grad_norm": 25.324783325195312, "learning_rate": 7.904761904761904e-06, "loss": 46.4036, "step": 1200 }, { "epoch": 28.597014925373134, "grad_norm": 34.25847625732422, "learning_rate": 7.898809523809524e-06, "loss": 46.8307, "step": 1201 }, { "epoch": 28.62089552238806, "grad_norm": 24.739521026611328, "learning_rate": 7.892857142857144e-06, "loss": 46.515, "step": 1202 }, { "epoch": 28.644776119402984, "grad_norm": 36.694252014160156, "learning_rate": 7.886904761904762e-06, "loss": 45.7078, "step": 1203 }, { "epoch": 28.66865671641791, "grad_norm": 34.95314025878906, "learning_rate": 7.880952380952382e-06, "loss": 47.6302, "step": 1204 }, { "epoch": 28.692537313432837, "grad_norm": 26.474821090698242, "learning_rate": 7.875e-06, "loss": 47.2158, "step": 1205 }, { "epoch": 28.71641791044776, "grad_norm": 30.19892692565918, "learning_rate": 7.86904761904762e-06, "loss": 46.1515, "step": 1206 }, { "epoch": 28.740298507462686, "grad_norm": 32.16860580444336, "learning_rate": 7.863095238095238e-06, "loss": 46.3963, "step": 1207 }, { "epoch": 28.764179104477613, "grad_norm": 27.323444366455078, "learning_rate": 7.857142857142858e-06, "loss": 47.6511, "step": 1208 }, { "epoch": 28.788059701492536, "grad_norm": 24.073701858520508, "learning_rate": 7.851190476190477e-06, "loss": 46.7283, "step": 1209 }, { "epoch": 28.811940298507462, "grad_norm": 25.73206329345703, "learning_rate": 7.845238095238096e-06, "loss": 47.3957, "step": 1210 }, { "epoch": 28.83582089552239, "grad_norm": 23.368709564208984, "learning_rate": 7.839285714285715e-06, "loss": 46.1844, "step": 1211 }, { "epoch": 28.85970149253731, "grad_norm": 24.563371658325195, "learning_rate": 7.833333333333333e-06, "loss": 47.0752, "step": 1212 }, { "epoch": 28.883582089552238, "grad_norm": 20.747081756591797, "learning_rate": 7.827380952380953e-06, "loss": 45.9425, "step": 1213 }, { "epoch": 28.907462686567165, "grad_norm": 22.27573013305664, "learning_rate": 7.821428571428571e-06, "loss": 46.746, "step": 1214 }, { "epoch": 28.93134328358209, "grad_norm": 23.162179946899414, "learning_rate": 7.815476190476191e-06, "loss": 46.808, "step": 1215 }, { "epoch": 28.955223880597014, "grad_norm": 23.585325241088867, "learning_rate": 7.809523809523811e-06, "loss": 47.2584, "step": 1216 }, { "epoch": 28.97910447761194, "grad_norm": 29.979564666748047, "learning_rate": 7.803571428571429e-06, "loss": 47.3433, "step": 1217 }, { "epoch": 29.0, "grad_norm": 25.872072219848633, "learning_rate": 7.797619047619049e-06, "loss": 41.1844, "step": 1218 }, { "epoch": 29.023880597014927, "grad_norm": 25.673351287841797, "learning_rate": 7.791666666666667e-06, "loss": 46.4546, "step": 1219 }, { "epoch": 29.04776119402985, "grad_norm": 29.831058502197266, "learning_rate": 7.785714285714287e-06, "loss": 46.8122, "step": 1220 }, { "epoch": 29.071641791044776, "grad_norm": 27.548316955566406, "learning_rate": 7.779761904761905e-06, "loss": 47.748, "step": 1221 }, { "epoch": 29.095522388059702, "grad_norm": 26.399370193481445, "learning_rate": 7.773809523809525e-06, "loss": 46.736, "step": 1222 }, { "epoch": 29.119402985074625, "grad_norm": 25.127031326293945, "learning_rate": 7.767857142857144e-06, "loss": 46.8307, "step": 1223 }, { "epoch": 29.143283582089552, "grad_norm": 26.624732971191406, "learning_rate": 7.761904761904762e-06, "loss": 46.2401, "step": 1224 }, { "epoch": 29.16716417910448, "grad_norm": 30.770824432373047, "learning_rate": 7.755952380952382e-06, "loss": 46.7194, "step": 1225 }, { "epoch": 29.1910447761194, "grad_norm": 23.830007553100586, "learning_rate": 7.75e-06, "loss": 46.4737, "step": 1226 }, { "epoch": 29.214925373134328, "grad_norm": 32.90129470825195, "learning_rate": 7.74404761904762e-06, "loss": 47.4361, "step": 1227 }, { "epoch": 29.238805970149254, "grad_norm": 23.381397247314453, "learning_rate": 7.738095238095238e-06, "loss": 45.3297, "step": 1228 }, { "epoch": 29.262686567164177, "grad_norm": 32.836387634277344, "learning_rate": 7.732142857142858e-06, "loss": 46.0574, "step": 1229 }, { "epoch": 29.286567164179104, "grad_norm": 25.803264617919922, "learning_rate": 7.726190476190478e-06, "loss": 45.757, "step": 1230 }, { "epoch": 29.31044776119403, "grad_norm": 29.38982391357422, "learning_rate": 7.720238095238096e-06, "loss": 46.7099, "step": 1231 }, { "epoch": 29.334328358208957, "grad_norm": 26.39947509765625, "learning_rate": 7.714285714285716e-06, "loss": 47.5944, "step": 1232 }, { "epoch": 29.35820895522388, "grad_norm": 25.958354949951172, "learning_rate": 7.708333333333334e-06, "loss": 46.1395, "step": 1233 }, { "epoch": 29.382089552238806, "grad_norm": 28.697542190551758, "learning_rate": 7.702380952380954e-06, "loss": 46.1713, "step": 1234 }, { "epoch": 29.405970149253733, "grad_norm": 19.471586227416992, "learning_rate": 7.696428571428572e-06, "loss": 47.7724, "step": 1235 }, { "epoch": 29.429850746268656, "grad_norm": 29.924991607666016, "learning_rate": 7.690476190476191e-06, "loss": 47.2203, "step": 1236 }, { "epoch": 29.453731343283582, "grad_norm": 20.516891479492188, "learning_rate": 7.684523809523811e-06, "loss": 46.2945, "step": 1237 }, { "epoch": 29.47761194029851, "grad_norm": 30.605262756347656, "learning_rate": 7.67857142857143e-06, "loss": 47.1786, "step": 1238 }, { "epoch": 29.501492537313432, "grad_norm": 16.288013458251953, "learning_rate": 7.672619047619049e-06, "loss": 45.3413, "step": 1239 }, { "epoch": 29.52537313432836, "grad_norm": 23.54091453552246, "learning_rate": 7.666666666666667e-06, "loss": 45.9196, "step": 1240 }, { "epoch": 29.549253731343285, "grad_norm": 20.33724021911621, "learning_rate": 7.660714285714287e-06, "loss": 47.0275, "step": 1241 }, { "epoch": 29.573134328358208, "grad_norm": 27.460975646972656, "learning_rate": 7.654761904761905e-06, "loss": 44.8995, "step": 1242 }, { "epoch": 29.597014925373134, "grad_norm": 25.58623695373535, "learning_rate": 7.648809523809523e-06, "loss": 46.0706, "step": 1243 }, { "epoch": 29.62089552238806, "grad_norm": 27.997203826904297, "learning_rate": 7.642857142857143e-06, "loss": 47.2368, "step": 1244 }, { "epoch": 29.644776119402984, "grad_norm": 31.361181259155273, "learning_rate": 7.636904761904763e-06, "loss": 46.8056, "step": 1245 }, { "epoch": 29.66865671641791, "grad_norm": 29.266433715820312, "learning_rate": 7.630952380952381e-06, "loss": 45.323, "step": 1246 }, { "epoch": 29.692537313432837, "grad_norm": 24.066415786743164, "learning_rate": 7.625e-06, "loss": 46.9221, "step": 1247 }, { "epoch": 29.71641791044776, "grad_norm": 25.790491104125977, "learning_rate": 7.61904761904762e-06, "loss": 45.7051, "step": 1248 }, { "epoch": 29.740298507462686, "grad_norm": 24.202716827392578, "learning_rate": 7.6130952380952386e-06, "loss": 47.1067, "step": 1249 }, { "epoch": 29.764179104477613, "grad_norm": 27.302003860473633, "learning_rate": 7.6071428571428575e-06, "loss": 46.6039, "step": 1250 }, { "epoch": 29.788059701492536, "grad_norm": 22.75196075439453, "learning_rate": 7.6011904761904765e-06, "loss": 46.3265, "step": 1251 }, { "epoch": 29.811940298507462, "grad_norm": 30.963153839111328, "learning_rate": 7.595238095238095e-06, "loss": 46.3283, "step": 1252 }, { "epoch": 29.83582089552239, "grad_norm": 21.538162231445312, "learning_rate": 7.589285714285714e-06, "loss": 46.5345, "step": 1253 }, { "epoch": 29.85970149253731, "grad_norm": 28.09955596923828, "learning_rate": 7.583333333333333e-06, "loss": 47.1, "step": 1254 }, { "epoch": 29.883582089552238, "grad_norm": 20.88216781616211, "learning_rate": 7.577380952380953e-06, "loss": 45.3354, "step": 1255 }, { "epoch": 29.907462686567165, "grad_norm": 24.15240478515625, "learning_rate": 7.571428571428572e-06, "loss": 46.299, "step": 1256 }, { "epoch": 29.93134328358209, "grad_norm": 22.839298248291016, "learning_rate": 7.565476190476191e-06, "loss": 46.3436, "step": 1257 }, { "epoch": 29.955223880597014, "grad_norm": 26.582752227783203, "learning_rate": 7.55952380952381e-06, "loss": 45.9107, "step": 1258 }, { "epoch": 29.97910447761194, "grad_norm": 24.98562240600586, "learning_rate": 7.553571428571429e-06, "loss": 46.7134, "step": 1259 }, { "epoch": 30.0, "grad_norm": 23.327436447143555, "learning_rate": 7.547619047619048e-06, "loss": 41.2325, "step": 1260 }, { "epoch": 30.023880597014927, "grad_norm": 20.400623321533203, "learning_rate": 7.541666666666667e-06, "loss": 46.9564, "step": 1261 }, { "epoch": 30.04776119402985, "grad_norm": NaN, "learning_rate": 7.5357142857142865e-06, "loss": 68.2215, "step": 1262 }, { "epoch": 30.071641791044776, "grad_norm": 22.870811462402344, "learning_rate": 7.5357142857142865e-06, "loss": 46.5712, "step": 1263 }, { "epoch": 30.095522388059702, "grad_norm": 24.057098388671875, "learning_rate": 7.5297619047619055e-06, "loss": 46.8943, "step": 1264 }, { "epoch": 30.119402985074625, "grad_norm": 25.820720672607422, "learning_rate": 7.523809523809524e-06, "loss": 46.0747, "step": 1265 }, { "epoch": 30.143283582089552, "grad_norm": 28.460693359375, "learning_rate": 7.517857142857143e-06, "loss": 47.0127, "step": 1266 }, { "epoch": 30.16716417910448, "grad_norm": 21.60432243347168, "learning_rate": 7.511904761904762e-06, "loss": 45.8081, "step": 1267 }, { "epoch": 30.1910447761194, "grad_norm": 29.013648986816406, "learning_rate": 7.505952380952381e-06, "loss": 46.6712, "step": 1268 }, { "epoch": 30.214925373134328, "grad_norm": 24.865493774414062, "learning_rate": 7.500000000000001e-06, "loss": 46.6816, "step": 1269 }, { "epoch": 30.238805970149254, "grad_norm": 23.676206588745117, "learning_rate": 7.49404761904762e-06, "loss": 46.2663, "step": 1270 }, { "epoch": 30.262686567164177, "grad_norm": 27.889135360717773, "learning_rate": 7.488095238095239e-06, "loss": 45.7052, "step": 1271 }, { "epoch": 30.286567164179104, "grad_norm": 29.024211883544922, "learning_rate": 7.482142857142858e-06, "loss": 45.5005, "step": 1272 }, { "epoch": 30.31044776119403, "grad_norm": 25.8428955078125, "learning_rate": 7.476190476190477e-06, "loss": 46.788, "step": 1273 }, { "epoch": 30.334328358208957, "grad_norm": 26.765539169311523, "learning_rate": 7.470238095238096e-06, "loss": 46.454, "step": 1274 }, { "epoch": 30.35820895522388, "grad_norm": 34.80079650878906, "learning_rate": 7.464285714285715e-06, "loss": 47.6929, "step": 1275 }, { "epoch": 30.382089552238806, "grad_norm": 25.589618682861328, "learning_rate": 7.4583333333333345e-06, "loss": 46.0104, "step": 1276 }, { "epoch": 30.405970149253733, "grad_norm": 27.0733699798584, "learning_rate": 7.4523809523809534e-06, "loss": 45.4742, "step": 1277 }, { "epoch": 30.429850746268656, "grad_norm": 26.662338256835938, "learning_rate": 7.446428571428572e-06, "loss": 46.8066, "step": 1278 }, { "epoch": 30.453731343283582, "grad_norm": 28.389951705932617, "learning_rate": 7.440476190476191e-06, "loss": 46.9716, "step": 1279 }, { "epoch": 30.47761194029851, "grad_norm": NaN, "learning_rate": 7.43452380952381e-06, "loss": 52.2915, "step": 1280 }, { "epoch": 30.501492537313432, "grad_norm": 26.77708625793457, "learning_rate": 7.43452380952381e-06, "loss": 44.919, "step": 1281 }, { "epoch": 30.52537313432836, "grad_norm": 25.423444747924805, "learning_rate": 7.428571428571429e-06, "loss": 46.5057, "step": 1282 }, { "epoch": 30.549253731343285, "grad_norm": 24.04167366027832, "learning_rate": 7.422619047619048e-06, "loss": 46.3685, "step": 1283 }, { "epoch": 30.573134328358208, "grad_norm": 23.51607894897461, "learning_rate": 7.416666666666668e-06, "loss": 45.9694, "step": 1284 }, { "epoch": 30.597014925373134, "grad_norm": 26.216157913208008, "learning_rate": 7.410714285714287e-06, "loss": 47.2582, "step": 1285 }, { "epoch": 30.62089552238806, "grad_norm": 24.339780807495117, "learning_rate": 7.404761904761906e-06, "loss": 44.8052, "step": 1286 }, { "epoch": 30.644776119402984, "grad_norm": 19.203577041625977, "learning_rate": 7.398809523809525e-06, "loss": 47.0301, "step": 1287 }, { "epoch": 30.66865671641791, "grad_norm": 22.252805709838867, "learning_rate": 7.392857142857144e-06, "loss": 45.5993, "step": 1288 }, { "epoch": 30.692537313432837, "grad_norm": 25.316205978393555, "learning_rate": 7.386904761904763e-06, "loss": 46.1157, "step": 1289 }, { "epoch": 30.71641791044776, "grad_norm": 18.311643600463867, "learning_rate": 7.380952380952382e-06, "loss": 46.5986, "step": 1290 }, { "epoch": 30.740298507462686, "grad_norm": 31.84505271911621, "learning_rate": 7.375000000000001e-06, "loss": 46.9177, "step": 1291 }, { "epoch": 30.764179104477613, "grad_norm": 26.221525192260742, "learning_rate": 7.36904761904762e-06, "loss": 47.0897, "step": 1292 }, { "epoch": 30.788059701492536, "grad_norm": 27.029104232788086, "learning_rate": 7.363095238095239e-06, "loss": 45.3724, "step": 1293 }, { "epoch": 30.811940298507462, "grad_norm": 33.51012420654297, "learning_rate": 7.357142857142858e-06, "loss": 46.7046, "step": 1294 }, { "epoch": 30.83582089552239, "grad_norm": 26.42972183227539, "learning_rate": 7.351190476190477e-06, "loss": 46.7606, "step": 1295 }, { "epoch": 30.85970149253731, "grad_norm": 30.91115951538086, "learning_rate": 7.345238095238096e-06, "loss": 47.5485, "step": 1296 }, { "epoch": 30.883582089552238, "grad_norm": 28.296560287475586, "learning_rate": 7.339285714285714e-06, "loss": 46.4997, "step": 1297 }, { "epoch": 30.907462686567165, "grad_norm": 32.054561614990234, "learning_rate": 7.333333333333333e-06, "loss": 46.4953, "step": 1298 }, { "epoch": 30.93134328358209, "grad_norm": 31.635595321655273, "learning_rate": 7.327380952380952e-06, "loss": 46.5325, "step": 1299 }, { "epoch": 30.955223880597014, "grad_norm": 25.557523727416992, "learning_rate": 7.321428571428572e-06, "loss": 45.56, "step": 1300 }, { "epoch": 30.97910447761194, "grad_norm": 30.01810073852539, "learning_rate": 7.315476190476191e-06, "loss": 46.6149, "step": 1301 }, { "epoch": 31.0, "grad_norm": 24.6826114654541, "learning_rate": 7.30952380952381e-06, "loss": 40.8651, "step": 1302 }, { "epoch": 31.023880597014927, "grad_norm": 24.378164291381836, "learning_rate": 7.303571428571429e-06, "loss": 46.0721, "step": 1303 }, { "epoch": 31.04776119402985, "grad_norm": 20.247482299804688, "learning_rate": 7.297619047619048e-06, "loss": 45.8819, "step": 1304 }, { "epoch": 31.071641791044776, "grad_norm": 25.636112213134766, "learning_rate": 7.291666666666667e-06, "loss": 47.1987, "step": 1305 }, { "epoch": 31.095522388059702, "grad_norm": 30.428096771240234, "learning_rate": 7.285714285714286e-06, "loss": 46.6961, "step": 1306 }, { "epoch": 31.119402985074625, "grad_norm": 21.404991149902344, "learning_rate": 7.279761904761905e-06, "loss": 46.6841, "step": 1307 }, { "epoch": 31.143283582089552, "grad_norm": 31.655052185058594, "learning_rate": 7.273809523809524e-06, "loss": 47.7781, "step": 1308 }, { "epoch": 31.16716417910448, "grad_norm": 24.327327728271484, "learning_rate": 7.267857142857143e-06, "loss": 46.002, "step": 1309 }, { "epoch": 31.1910447761194, "grad_norm": 26.230745315551758, "learning_rate": 7.261904761904762e-06, "loss": 47.3903, "step": 1310 }, { "epoch": 31.214925373134328, "grad_norm": 27.337961196899414, "learning_rate": 7.255952380952381e-06, "loss": 46.0999, "step": 1311 }, { "epoch": 31.238805970149254, "grad_norm": 35.14864730834961, "learning_rate": 7.25e-06, "loss": 46.5187, "step": 1312 }, { "epoch": 31.262686567164177, "grad_norm": 26.60109519958496, "learning_rate": 7.24404761904762e-06, "loss": 44.5864, "step": 1313 }, { "epoch": 31.286567164179104, "grad_norm": 33.15165710449219, "learning_rate": 7.238095238095239e-06, "loss": 46.4779, "step": 1314 }, { "epoch": 31.31044776119403, "grad_norm": 26.3510684967041, "learning_rate": 7.232142857142858e-06, "loss": 47.0845, "step": 1315 }, { "epoch": 31.334328358208957, "grad_norm": 47.12569046020508, "learning_rate": 7.226190476190477e-06, "loss": 47.2947, "step": 1316 }, { "epoch": 31.35820895522388, "grad_norm": 40.15263748168945, "learning_rate": 7.220238095238096e-06, "loss": 45.8788, "step": 1317 }, { "epoch": 31.382089552238806, "grad_norm": 36.59072494506836, "learning_rate": 7.2142857142857145e-06, "loss": 45.991, "step": 1318 }, { "epoch": 31.405970149253733, "grad_norm": 36.895408630371094, "learning_rate": 7.2083333333333335e-06, "loss": 46.197, "step": 1319 }, { "epoch": 31.429850746268656, "grad_norm": NaN, "learning_rate": 7.202380952380953e-06, "loss": 38.9024, "step": 1320 }, { "epoch": 31.453731343283582, "grad_norm": 27.446247100830078, "learning_rate": 7.202380952380953e-06, "loss": 45.5293, "step": 1321 }, { "epoch": 31.47761194029851, "grad_norm": 27.48939323425293, "learning_rate": 7.196428571428572e-06, "loss": 46.8754, "step": 1322 }, { "epoch": 31.501492537313432, "grad_norm": 22.736833572387695, "learning_rate": 7.190476190476191e-06, "loss": 44.4905, "step": 1323 }, { "epoch": 31.52537313432836, "grad_norm": 23.413612365722656, "learning_rate": 7.18452380952381e-06, "loss": 47.0714, "step": 1324 }, { "epoch": 31.549253731343285, "grad_norm": 29.154848098754883, "learning_rate": 7.178571428571429e-06, "loss": 46.393, "step": 1325 }, { "epoch": 31.573134328358208, "grad_norm": 28.130638122558594, "learning_rate": 7.172619047619048e-06, "loss": 46.1857, "step": 1326 }, { "epoch": 31.597014925373134, "grad_norm": 19.745920181274414, "learning_rate": 7.166666666666667e-06, "loss": 45.2873, "step": 1327 }, { "epoch": 31.62089552238806, "grad_norm": 27.630279541015625, "learning_rate": 7.160714285714287e-06, "loss": 46.5475, "step": 1328 }, { "epoch": 31.644776119402984, "grad_norm": 20.568862915039062, "learning_rate": 7.154761904761906e-06, "loss": 46.231, "step": 1329 }, { "epoch": 31.66865671641791, "grad_norm": 17.769695281982422, "learning_rate": 7.148809523809525e-06, "loss": 46.8431, "step": 1330 }, { "epoch": 31.692537313432837, "grad_norm": 29.941057205200195, "learning_rate": 7.1428571428571436e-06, "loss": 44.842, "step": 1331 }, { "epoch": 31.71641791044776, "grad_norm": 21.054975509643555, "learning_rate": 7.1369047619047625e-06, "loss": 45.1147, "step": 1332 }, { "epoch": 31.740298507462686, "grad_norm": 23.80388069152832, "learning_rate": 7.1309523809523814e-06, "loss": 46.1839, "step": 1333 }, { "epoch": 31.764179104477613, "grad_norm": 30.561933517456055, "learning_rate": 7.125e-06, "loss": 46.2703, "step": 1334 }, { "epoch": 31.788059701492536, "grad_norm": 23.752151489257812, "learning_rate": 7.11904761904762e-06, "loss": 46.7347, "step": 1335 }, { "epoch": 31.811940298507462, "grad_norm": 32.00548553466797, "learning_rate": 7.113095238095239e-06, "loss": 46.1236, "step": 1336 }, { "epoch": 31.83582089552239, "grad_norm": 26.685504913330078, "learning_rate": 7.107142857142858e-06, "loss": 47.8881, "step": 1337 }, { "epoch": 31.85970149253731, "grad_norm": 26.5799503326416, "learning_rate": 7.101190476190477e-06, "loss": 46.1187, "step": 1338 }, { "epoch": 31.883582089552238, "grad_norm": 28.78062629699707, "learning_rate": 7.095238095238096e-06, "loss": 46.8058, "step": 1339 }, { "epoch": 31.907462686567165, "grad_norm": 26.98428726196289, "learning_rate": 7.089285714285715e-06, "loss": 46.3602, "step": 1340 }, { "epoch": 31.93134328358209, "grad_norm": 32.5291633605957, "learning_rate": 7.083333333333335e-06, "loss": 46.464, "step": 1341 }, { "epoch": 31.955223880597014, "grad_norm": 25.088685989379883, "learning_rate": 7.077380952380954e-06, "loss": 47.0542, "step": 1342 }, { "epoch": 31.97910447761194, "grad_norm": 32.58052444458008, "learning_rate": 7.0714285714285726e-06, "loss": 46.3364, "step": 1343 }, { "epoch": 32.0, "grad_norm": 22.65249252319336, "learning_rate": 7.0654761904761915e-06, "loss": 39.8201, "step": 1344 }, { "epoch": 32.02388059701492, "grad_norm": 27.03556251525879, "learning_rate": 7.0595238095238105e-06, "loss": 47.7819, "step": 1345 }, { "epoch": 32.04776119402985, "grad_norm": 25.712047576904297, "learning_rate": 7.053571428571429e-06, "loss": 46.1116, "step": 1346 }, { "epoch": 32.071641791044776, "grad_norm": 21.99336051940918, "learning_rate": 7.047619047619048e-06, "loss": 46.3745, "step": 1347 }, { "epoch": 32.0955223880597, "grad_norm": 28.53151512145996, "learning_rate": 7.041666666666668e-06, "loss": 46.5998, "step": 1348 }, { "epoch": 32.11940298507463, "grad_norm": 20.151912689208984, "learning_rate": 7.035714285714287e-06, "loss": 45.4197, "step": 1349 }, { "epoch": 32.14328358208955, "grad_norm": 21.491193771362305, "learning_rate": 7.029761904761905e-06, "loss": 46.0246, "step": 1350 }, { "epoch": 32.167164179104475, "grad_norm": 20.057588577270508, "learning_rate": 7.023809523809524e-06, "loss": 46.2149, "step": 1351 }, { "epoch": 32.191044776119405, "grad_norm": 16.675336837768555, "learning_rate": 7.017857142857143e-06, "loss": 46.5231, "step": 1352 }, { "epoch": 32.21492537313433, "grad_norm": 22.007305145263672, "learning_rate": 7.011904761904762e-06, "loss": 44.8665, "step": 1353 }, { "epoch": 32.23880597014925, "grad_norm": 22.947837829589844, "learning_rate": 7.005952380952381e-06, "loss": 45.0394, "step": 1354 }, { "epoch": 32.26268656716418, "grad_norm": 25.444522857666016, "learning_rate": 7e-06, "loss": 46.0367, "step": 1355 }, { "epoch": 32.286567164179104, "grad_norm": 22.319833755493164, "learning_rate": 6.994047619047619e-06, "loss": 47.0455, "step": 1356 }, { "epoch": 32.31044776119403, "grad_norm": 20.41710090637207, "learning_rate": 6.988095238095239e-06, "loss": 45.5119, "step": 1357 }, { "epoch": 32.33432835820896, "grad_norm": 29.03120994567871, "learning_rate": 6.9821428571428576e-06, "loss": 45.1962, "step": 1358 }, { "epoch": 32.35820895522388, "grad_norm": 22.10372543334961, "learning_rate": 6.9761904761904765e-06, "loss": 47.379, "step": 1359 }, { "epoch": 32.3820895522388, "grad_norm": 29.49492073059082, "learning_rate": 6.9702380952380955e-06, "loss": 48.2375, "step": 1360 }, { "epoch": 32.40597014925373, "grad_norm": 26.655149459838867, "learning_rate": 6.964285714285714e-06, "loss": 45.8468, "step": 1361 }, { "epoch": 32.429850746268656, "grad_norm": 27.994979858398438, "learning_rate": 6.958333333333333e-06, "loss": 46.4883, "step": 1362 }, { "epoch": 32.45373134328358, "grad_norm": 25.787900924682617, "learning_rate": 6.952380952380952e-06, "loss": 47.0159, "step": 1363 }, { "epoch": 32.47761194029851, "grad_norm": 29.429485321044922, "learning_rate": 6.946428571428572e-06, "loss": 45.182, "step": 1364 }, { "epoch": 32.50149253731343, "grad_norm": 21.825122833251953, "learning_rate": 6.940476190476191e-06, "loss": 47.4224, "step": 1365 }, { "epoch": 32.525373134328355, "grad_norm": 26.284622192382812, "learning_rate": 6.93452380952381e-06, "loss": 45.7025, "step": 1366 }, { "epoch": 32.549253731343285, "grad_norm": 21.384979248046875, "learning_rate": 6.928571428571429e-06, "loss": 45.6267, "step": 1367 }, { "epoch": 32.57313432835821, "grad_norm": 21.64442253112793, "learning_rate": 6.922619047619048e-06, "loss": 46.8577, "step": 1368 }, { "epoch": 32.59701492537313, "grad_norm": 22.377302169799805, "learning_rate": 6.916666666666667e-06, "loss": 46.5022, "step": 1369 }, { "epoch": 32.62089552238806, "grad_norm": 18.1933536529541, "learning_rate": 6.910714285714286e-06, "loss": 46.7098, "step": 1370 }, { "epoch": 32.644776119402984, "grad_norm": NaN, "learning_rate": 6.9047619047619055e-06, "loss": 59.6159, "step": 1371 }, { "epoch": 32.668656716417914, "grad_norm": 20.35690689086914, "learning_rate": 6.9047619047619055e-06, "loss": 47.4638, "step": 1372 }, { "epoch": 32.69253731343284, "grad_norm": 29.140775680541992, "learning_rate": 6.8988095238095245e-06, "loss": 46.242, "step": 1373 }, { "epoch": 32.71641791044776, "grad_norm": 25.27906608581543, "learning_rate": 6.892857142857143e-06, "loss": 45.7122, "step": 1374 }, { "epoch": 32.74029850746269, "grad_norm": 19.000076293945312, "learning_rate": 6.886904761904762e-06, "loss": 46.4813, "step": 1375 }, { "epoch": 32.76417910447761, "grad_norm": 25.048797607421875, "learning_rate": 6.880952380952381e-06, "loss": 45.5569, "step": 1376 }, { "epoch": 32.788059701492536, "grad_norm": 24.078060150146484, "learning_rate": 6.875e-06, "loss": 45.9708, "step": 1377 }, { "epoch": 32.811940298507466, "grad_norm": 23.822643280029297, "learning_rate": 6.86904761904762e-06, "loss": 47.5914, "step": 1378 }, { "epoch": 32.83582089552239, "grad_norm": 29.267864227294922, "learning_rate": 6.863095238095239e-06, "loss": 45.2741, "step": 1379 }, { "epoch": 32.85970149253731, "grad_norm": 19.477649688720703, "learning_rate": 6.857142857142858e-06, "loss": 46.3849, "step": 1380 }, { "epoch": 32.88358208955224, "grad_norm": 33.31391525268555, "learning_rate": 6.851190476190477e-06, "loss": 44.9609, "step": 1381 }, { "epoch": 32.907462686567165, "grad_norm": 23.064956665039062, "learning_rate": 6.845238095238096e-06, "loss": 45.8295, "step": 1382 }, { "epoch": 32.93134328358209, "grad_norm": 30.366653442382812, "learning_rate": 6.839285714285715e-06, "loss": 44.3142, "step": 1383 }, { "epoch": 32.95522388059702, "grad_norm": 25.059572219848633, "learning_rate": 6.833333333333334e-06, "loss": 46.5768, "step": 1384 }, { "epoch": 32.97910447761194, "grad_norm": 23.186697006225586, "learning_rate": 6.8273809523809535e-06, "loss": 45.185, "step": 1385 }, { "epoch": 33.0, "grad_norm": 21.550168991088867, "learning_rate": 6.8214285714285724e-06, "loss": 39.1732, "step": 1386 }, { "epoch": 33.02388059701492, "grad_norm": 22.417282104492188, "learning_rate": 6.815476190476191e-06, "loss": 47.6667, "step": 1387 }, { "epoch": 33.04776119402985, "grad_norm": 26.805702209472656, "learning_rate": 6.80952380952381e-06, "loss": 46.4091, "step": 1388 }, { "epoch": 33.071641791044776, "grad_norm": 23.723695755004883, "learning_rate": 6.803571428571429e-06, "loss": 46.3798, "step": 1389 }, { "epoch": 33.0955223880597, "grad_norm": 30.029897689819336, "learning_rate": 6.797619047619048e-06, "loss": 45.9736, "step": 1390 }, { "epoch": 33.11940298507463, "grad_norm": 19.387653350830078, "learning_rate": 6.791666666666667e-06, "loss": 45.1998, "step": 1391 }, { "epoch": 33.14328358208955, "grad_norm": 33.68477249145508, "learning_rate": 6.785714285714287e-06, "loss": 45.4435, "step": 1392 }, { "epoch": 33.167164179104475, "grad_norm": 26.001699447631836, "learning_rate": 6.779761904761906e-06, "loss": 45.6725, "step": 1393 }, { "epoch": 33.191044776119405, "grad_norm": 34.19535827636719, "learning_rate": 6.773809523809525e-06, "loss": 46.6387, "step": 1394 }, { "epoch": 33.21492537313433, "grad_norm": 24.243515014648438, "learning_rate": 6.767857142857144e-06, "loss": 46.4235, "step": 1395 }, { "epoch": 33.23880597014925, "grad_norm": 33.013675689697266, "learning_rate": 6.761904761904763e-06, "loss": 46.7151, "step": 1396 }, { "epoch": 33.26268656716418, "grad_norm": 30.15135955810547, "learning_rate": 6.755952380952382e-06, "loss": 46.3002, "step": 1397 }, { "epoch": 33.286567164179104, "grad_norm": 31.58100128173828, "learning_rate": 6.750000000000001e-06, "loss": 46.6084, "step": 1398 }, { "epoch": 33.31044776119403, "grad_norm": 26.23592185974121, "learning_rate": 6.74404761904762e-06, "loss": 45.5745, "step": 1399 }, { "epoch": 33.33432835820896, "grad_norm": 32.273311614990234, "learning_rate": 6.738095238095239e-06, "loss": 45.1131, "step": 1400 }, { "epoch": 33.35820895522388, "grad_norm": 29.7532958984375, "learning_rate": 6.732142857142858e-06, "loss": 45.9739, "step": 1401 }, { "epoch": 33.3820895522388, "grad_norm": 32.648704528808594, "learning_rate": 6.726190476190477e-06, "loss": 46.6293, "step": 1402 }, { "epoch": 33.40597014925373, "grad_norm": 26.455778121948242, "learning_rate": 6.720238095238096e-06, "loss": 46.5187, "step": 1403 }, { "epoch": 33.429850746268656, "grad_norm": 30.5809326171875, "learning_rate": 6.714285714285714e-06, "loss": 46.5477, "step": 1404 }, { "epoch": 33.45373134328358, "grad_norm": 29.604442596435547, "learning_rate": 6.708333333333333e-06, "loss": 45.462, "step": 1405 }, { "epoch": 33.47761194029851, "grad_norm": 36.19733428955078, "learning_rate": 6.702380952380952e-06, "loss": 46.7046, "step": 1406 }, { "epoch": 33.50149253731343, "grad_norm": 37.733619689941406, "learning_rate": 6.696428571428571e-06, "loss": 46.2156, "step": 1407 }, { "epoch": 33.525373134328355, "grad_norm": 26.49405288696289, "learning_rate": 6.690476190476191e-06, "loss": 45.373, "step": 1408 }, { "epoch": 33.549253731343285, "grad_norm": 30.09432601928711, "learning_rate": 6.68452380952381e-06, "loss": 46.3868, "step": 1409 }, { "epoch": 33.57313432835821, "grad_norm": 25.85702896118164, "learning_rate": 6.678571428571429e-06, "loss": 45.805, "step": 1410 }, { "epoch": 33.59701492537313, "grad_norm": 28.564380645751953, "learning_rate": 6.672619047619048e-06, "loss": 46.4158, "step": 1411 }, { "epoch": 33.62089552238806, "grad_norm": 19.878551483154297, "learning_rate": 6.666666666666667e-06, "loss": 46.5922, "step": 1412 }, { "epoch": 33.644776119402984, "grad_norm": 22.83441734313965, "learning_rate": 6.660714285714286e-06, "loss": 45.1216, "step": 1413 }, { "epoch": 33.668656716417914, "grad_norm": 31.372957229614258, "learning_rate": 6.654761904761905e-06, "loss": 47.111, "step": 1414 }, { "epoch": 33.69253731343284, "grad_norm": 23.98666763305664, "learning_rate": 6.648809523809524e-06, "loss": 47.1762, "step": 1415 }, { "epoch": 33.71641791044776, "grad_norm": 27.895401000976562, "learning_rate": 6.642857142857143e-06, "loss": 45.6151, "step": 1416 }, { "epoch": 33.74029850746269, "grad_norm": 21.776100158691406, "learning_rate": 6.636904761904762e-06, "loss": 45.7198, "step": 1417 }, { "epoch": 33.76417910447761, "grad_norm": 30.373878479003906, "learning_rate": 6.630952380952381e-06, "loss": 45.2212, "step": 1418 }, { "epoch": 33.788059701492536, "grad_norm": 26.604324340820312, "learning_rate": 6.625e-06, "loss": 45.2001, "step": 1419 }, { "epoch": 33.811940298507466, "grad_norm": 29.38104248046875, "learning_rate": 6.619047619047619e-06, "loss": 46.711, "step": 1420 }, { "epoch": 33.83582089552239, "grad_norm": 24.36806869506836, "learning_rate": 6.613095238095239e-06, "loss": 46.3608, "step": 1421 }, { "epoch": 33.85970149253731, "grad_norm": 33.40534210205078, "learning_rate": 6.607142857142858e-06, "loss": 45.5189, "step": 1422 }, { "epoch": 33.88358208955224, "grad_norm": 25.91522789001465, "learning_rate": 6.601190476190477e-06, "loss": 47.3604, "step": 1423 }, { "epoch": 33.907462686567165, "grad_norm": 25.26549530029297, "learning_rate": 6.595238095238096e-06, "loss": 46.483, "step": 1424 }, { "epoch": 33.93134328358209, "grad_norm": 26.101816177368164, "learning_rate": 6.589285714285715e-06, "loss": 45.7998, "step": 1425 }, { "epoch": 33.95522388059702, "grad_norm": 27.942903518676758, "learning_rate": 6.5833333333333335e-06, "loss": 46.4593, "step": 1426 }, { "epoch": 33.97910447761194, "grad_norm": 21.551429748535156, "learning_rate": 6.5773809523809525e-06, "loss": 45.458, "step": 1427 }, { "epoch": 34.0, "grad_norm": 32.26907730102539, "learning_rate": 6.571428571428572e-06, "loss": 38.5718, "step": 1428 }, { "epoch": 34.02388059701492, "grad_norm": 32.16934585571289, "learning_rate": 6.565476190476191e-06, "loss": 45.5812, "step": 1429 }, { "epoch": 34.04776119402985, "grad_norm": 19.646459579467773, "learning_rate": 6.55952380952381e-06, "loss": 44.9032, "step": 1430 }, { "epoch": 34.071641791044776, "grad_norm": 28.886430740356445, "learning_rate": 6.553571428571429e-06, "loss": 45.4187, "step": 1431 }, { "epoch": 34.0955223880597, "grad_norm": 22.722471237182617, "learning_rate": 6.547619047619048e-06, "loss": 45.468, "step": 1432 }, { "epoch": 34.11940298507463, "grad_norm": 25.334766387939453, "learning_rate": 6.541666666666667e-06, "loss": 47.3534, "step": 1433 }, { "epoch": 34.14328358208955, "grad_norm": 28.49740982055664, "learning_rate": 6.535714285714286e-06, "loss": 47.4733, "step": 1434 }, { "epoch": 34.167164179104475, "grad_norm": 27.773820877075195, "learning_rate": 6.529761904761906e-06, "loss": 45.3215, "step": 1435 }, { "epoch": 34.191044776119405, "grad_norm": 24.25234031677246, "learning_rate": 6.523809523809525e-06, "loss": 46.0011, "step": 1436 }, { "epoch": 34.21492537313433, "grad_norm": 28.666475296020508, "learning_rate": 6.517857142857144e-06, "loss": 45.9091, "step": 1437 }, { "epoch": 34.23880597014925, "grad_norm": 24.367712020874023, "learning_rate": 6.5119047619047626e-06, "loss": 46.5004, "step": 1438 }, { "epoch": 34.26268656716418, "grad_norm": 23.11983299255371, "learning_rate": 6.5059523809523815e-06, "loss": 47.3335, "step": 1439 }, { "epoch": 34.286567164179104, "grad_norm": 20.672304153442383, "learning_rate": 6.5000000000000004e-06, "loss": 47.1491, "step": 1440 }, { "epoch": 34.31044776119403, "grad_norm": 23.815290451049805, "learning_rate": 6.49404761904762e-06, "loss": 46.7084, "step": 1441 }, { "epoch": 34.33432835820896, "grad_norm": 20.582489013671875, "learning_rate": 6.488095238095239e-06, "loss": 46.9707, "step": 1442 }, { "epoch": 34.35820895522388, "grad_norm": 18.315673828125, "learning_rate": 6.482142857142858e-06, "loss": 47.5359, "step": 1443 }, { "epoch": 34.3820895522388, "grad_norm": 24.396499633789062, "learning_rate": 6.476190476190477e-06, "loss": 46.052, "step": 1444 }, { "epoch": 34.40597014925373, "grad_norm": 21.200523376464844, "learning_rate": 6.470238095238096e-06, "loss": 46.5843, "step": 1445 }, { "epoch": 34.429850746268656, "grad_norm": 17.59020233154297, "learning_rate": 6.464285714285715e-06, "loss": 46.0017, "step": 1446 }, { "epoch": 34.45373134328358, "grad_norm": 21.810382843017578, "learning_rate": 6.458333333333334e-06, "loss": 46.4232, "step": 1447 }, { "epoch": 34.47761194029851, "grad_norm": 27.78464126586914, "learning_rate": 6.452380952380954e-06, "loss": 46.0973, "step": 1448 }, { "epoch": 34.50149253731343, "grad_norm": 29.360275268554688, "learning_rate": 6.446428571428573e-06, "loss": 45.4821, "step": 1449 }, { "epoch": 34.525373134328355, "grad_norm": 26.914587020874023, "learning_rate": 6.4404761904761916e-06, "loss": 45.2982, "step": 1450 }, { "epoch": 34.549253731343285, "grad_norm": 22.19925880432129, "learning_rate": 6.4345238095238105e-06, "loss": 46.6693, "step": 1451 }, { "epoch": 34.57313432835821, "grad_norm": 25.39541244506836, "learning_rate": 6.4285714285714295e-06, "loss": 45.8936, "step": 1452 }, { "epoch": 34.59701492537313, "grad_norm": 20.633222579956055, "learning_rate": 6.422619047619048e-06, "loss": 44.6061, "step": 1453 }, { "epoch": 34.62089552238806, "grad_norm": 22.513790130615234, "learning_rate": 6.416666666666667e-06, "loss": 45.5503, "step": 1454 }, { "epoch": 34.644776119402984, "grad_norm": 25.715484619140625, "learning_rate": 6.410714285714287e-06, "loss": 45.7485, "step": 1455 }, { "epoch": 34.668656716417914, "grad_norm": 21.964609146118164, "learning_rate": 6.404761904761904e-06, "loss": 46.3223, "step": 1456 }, { "epoch": 34.69253731343284, "grad_norm": 20.32435417175293, "learning_rate": 6.398809523809524e-06, "loss": 45.1507, "step": 1457 }, { "epoch": 34.71641791044776, "grad_norm": 24.32924461364746, "learning_rate": 6.392857142857143e-06, "loss": 45.8221, "step": 1458 }, { "epoch": 34.74029850746269, "grad_norm": 19.200895309448242, "learning_rate": 6.386904761904762e-06, "loss": 45.0915, "step": 1459 }, { "epoch": 34.76417910447761, "grad_norm": 24.436569213867188, "learning_rate": 6.380952380952381e-06, "loss": 45.5892, "step": 1460 }, { "epoch": 34.788059701492536, "grad_norm": 24.381568908691406, "learning_rate": 6.375e-06, "loss": 45.5295, "step": 1461 }, { "epoch": 34.811940298507466, "grad_norm": 19.64159393310547, "learning_rate": 6.369047619047619e-06, "loss": 46.244, "step": 1462 }, { "epoch": 34.83582089552239, "grad_norm": 27.420351028442383, "learning_rate": 6.363095238095238e-06, "loss": 45.9723, "step": 1463 }, { "epoch": 34.85970149253731, "grad_norm": 18.136165618896484, "learning_rate": 6.357142857142858e-06, "loss": 45.5106, "step": 1464 }, { "epoch": 34.88358208955224, "grad_norm": 21.70622444152832, "learning_rate": 6.3511904761904766e-06, "loss": 46.4965, "step": 1465 }, { "epoch": 34.907462686567165, "grad_norm": 23.573131561279297, "learning_rate": 6.3452380952380955e-06, "loss": 46.0698, "step": 1466 }, { "epoch": 34.93134328358209, "grad_norm": 21.20003890991211, "learning_rate": 6.3392857142857145e-06, "loss": 45.6992, "step": 1467 }, { "epoch": 34.95522388059702, "grad_norm": 23.745859146118164, "learning_rate": 6.333333333333333e-06, "loss": 45.8431, "step": 1468 }, { "epoch": 34.97910447761194, "grad_norm": 21.26241683959961, "learning_rate": 6.327380952380952e-06, "loss": 45.6577, "step": 1469 }, { "epoch": 35.0, "grad_norm": 22.033447265625, "learning_rate": 6.321428571428571e-06, "loss": 39.8491, "step": 1470 }, { "epoch": 35.02388059701492, "grad_norm": NaN, "learning_rate": 6.315476190476191e-06, "loss": 68.4405, "step": 1471 }, { "epoch": 35.04776119402985, "grad_norm": 22.06501007080078, "learning_rate": 6.315476190476191e-06, "loss": 44.971, "step": 1472 }, { "epoch": 35.071641791044776, "grad_norm": 23.923011779785156, "learning_rate": 6.30952380952381e-06, "loss": 45.4865, "step": 1473 }, { "epoch": 35.0955223880597, "grad_norm": 18.272428512573242, "learning_rate": 6.303571428571429e-06, "loss": 46.6551, "step": 1474 }, { "epoch": 35.11940298507463, "grad_norm": 23.046764373779297, "learning_rate": 6.297619047619048e-06, "loss": 46.3486, "step": 1475 }, { "epoch": 35.14328358208955, "grad_norm": 23.790733337402344, "learning_rate": 6.291666666666667e-06, "loss": 46.7032, "step": 1476 }, { "epoch": 35.167164179104475, "grad_norm": 23.891183853149414, "learning_rate": 6.285714285714286e-06, "loss": 44.9916, "step": 1477 }, { "epoch": 35.191044776119405, "grad_norm": 25.107316970825195, "learning_rate": 6.279761904761906e-06, "loss": 46.2358, "step": 1478 }, { "epoch": 35.21492537313433, "grad_norm": 20.48590660095215, "learning_rate": 6.2738095238095245e-06, "loss": 46.0048, "step": 1479 }, { "epoch": 35.23880597014925, "grad_norm": 25.425119400024414, "learning_rate": 6.2678571428571435e-06, "loss": 44.0941, "step": 1480 }, { "epoch": 35.26268656716418, "grad_norm": 28.264352798461914, "learning_rate": 6.261904761904762e-06, "loss": 46.5301, "step": 1481 }, { "epoch": 35.286567164179104, "grad_norm": 23.869232177734375, "learning_rate": 6.255952380952381e-06, "loss": 45.681, "step": 1482 }, { "epoch": 35.31044776119403, "grad_norm": 28.840408325195312, "learning_rate": 6.25e-06, "loss": 43.7517, "step": 1483 }, { "epoch": 35.33432835820896, "grad_norm": 26.768037796020508, "learning_rate": 6.244047619047619e-06, "loss": 46.1423, "step": 1484 }, { "epoch": 35.35820895522388, "grad_norm": 23.532470703125, "learning_rate": 6.238095238095239e-06, "loss": 45.6669, "step": 1485 }, { "epoch": 35.3820895522388, "grad_norm": 25.94774055480957, "learning_rate": 6.232142857142858e-06, "loss": 45.7672, "step": 1486 }, { "epoch": 35.40597014925373, "grad_norm": 23.215801239013672, "learning_rate": 6.226190476190477e-06, "loss": 45.6991, "step": 1487 }, { "epoch": 35.429850746268656, "grad_norm": 22.13661003112793, "learning_rate": 6.220238095238096e-06, "loss": 44.5214, "step": 1488 }, { "epoch": 35.45373134328358, "grad_norm": 24.596481323242188, "learning_rate": 6.214285714285715e-06, "loss": 46.1515, "step": 1489 }, { "epoch": 35.47761194029851, "grad_norm": 19.416872024536133, "learning_rate": 6.208333333333334e-06, "loss": 45.7596, "step": 1490 }, { "epoch": 35.50149253731343, "grad_norm": 23.993833541870117, "learning_rate": 6.202380952380953e-06, "loss": 46.1668, "step": 1491 }, { "epoch": 35.525373134328355, "grad_norm": 21.481637954711914, "learning_rate": 6.1964285714285725e-06, "loss": 45.1812, "step": 1492 }, { "epoch": 35.549253731343285, "grad_norm": 19.26917839050293, "learning_rate": 6.1904761904761914e-06, "loss": 45.9316, "step": 1493 }, { "epoch": 35.57313432835821, "grad_norm": 22.80115509033203, "learning_rate": 6.18452380952381e-06, "loss": 45.9088, "step": 1494 }, { "epoch": 35.59701492537313, "grad_norm": 21.33648109436035, "learning_rate": 6.178571428571429e-06, "loss": 46.7602, "step": 1495 }, { "epoch": 35.62089552238806, "grad_norm": 28.059947967529297, "learning_rate": 6.172619047619048e-06, "loss": 46.1767, "step": 1496 }, { "epoch": 35.644776119402984, "grad_norm": 21.1577205657959, "learning_rate": 6.166666666666667e-06, "loss": 45.6847, "step": 1497 }, { "epoch": 35.668656716417914, "grad_norm": 23.277509689331055, "learning_rate": 6.160714285714286e-06, "loss": 45.6145, "step": 1498 }, { "epoch": 35.69253731343284, "grad_norm": 16.815677642822266, "learning_rate": 6.154761904761906e-06, "loss": 45.515, "step": 1499 }, { "epoch": 35.71641791044776, "grad_norm": 24.218280792236328, "learning_rate": 6.148809523809525e-06, "loss": 47.6329, "step": 1500 }, { "epoch": 35.74029850746269, "grad_norm": 20.943737030029297, "learning_rate": 6.142857142857144e-06, "loss": 45.7388, "step": 1501 }, { "epoch": 35.76417910447761, "grad_norm": 20.344369888305664, "learning_rate": 6.136904761904763e-06, "loss": 45.9404, "step": 1502 }, { "epoch": 35.788059701492536, "grad_norm": 25.980487823486328, "learning_rate": 6.130952380952382e-06, "loss": 46.6928, "step": 1503 }, { "epoch": 35.811940298507466, "grad_norm": 19.285552978515625, "learning_rate": 6.125000000000001e-06, "loss": 46.4614, "step": 1504 }, { "epoch": 35.83582089552239, "grad_norm": 27.701011657714844, "learning_rate": 6.11904761904762e-06, "loss": 45.258, "step": 1505 }, { "epoch": 35.85970149253731, "grad_norm": 24.963760375976562, "learning_rate": 6.113095238095239e-06, "loss": 47.0721, "step": 1506 }, { "epoch": 35.88358208955224, "grad_norm": 25.08616828918457, "learning_rate": 6.107142857142858e-06, "loss": 45.9668, "step": 1507 }, { "epoch": 35.907462686567165, "grad_norm": 18.00580406188965, "learning_rate": 6.101190476190477e-06, "loss": 46.1049, "step": 1508 }, { "epoch": 35.93134328358209, "grad_norm": 24.686004638671875, "learning_rate": 6.095238095238096e-06, "loss": 46.6996, "step": 1509 }, { "epoch": 35.95522388059702, "grad_norm": 18.304157257080078, "learning_rate": 6.089285714285714e-06, "loss": 46.694, "step": 1510 }, { "epoch": 35.97910447761194, "grad_norm": 23.10132598876953, "learning_rate": 6.083333333333333e-06, "loss": 46.3807, "step": 1511 }, { "epoch": 36.0, "grad_norm": 19.077655792236328, "learning_rate": 6.077380952380952e-06, "loss": 41.1702, "step": 1512 }, { "epoch": 36.02388059701492, "grad_norm": 26.49584197998047, "learning_rate": 6.071428571428571e-06, "loss": 45.382, "step": 1513 }, { "epoch": 36.04776119402985, "grad_norm": 24.438323974609375, "learning_rate": 6.065476190476191e-06, "loss": 45.9433, "step": 1514 }, { "epoch": 36.071641791044776, "grad_norm": 30.8107852935791, "learning_rate": 6.05952380952381e-06, "loss": 45.6688, "step": 1515 }, { "epoch": 36.0955223880597, "grad_norm": 31.754154205322266, "learning_rate": 6.053571428571429e-06, "loss": 45.9768, "step": 1516 }, { "epoch": 36.11940298507463, "grad_norm": 26.034778594970703, "learning_rate": 6.047619047619048e-06, "loss": 46.022, "step": 1517 }, { "epoch": 36.14328358208955, "grad_norm": 31.643035888671875, "learning_rate": 6.041666666666667e-06, "loss": 44.5987, "step": 1518 }, { "epoch": 36.167164179104475, "grad_norm": 24.322874069213867, "learning_rate": 6.035714285714286e-06, "loss": 45.3774, "step": 1519 }, { "epoch": 36.191044776119405, "grad_norm": 29.067466735839844, "learning_rate": 6.029761904761905e-06, "loss": 46.1784, "step": 1520 }, { "epoch": 36.21492537313433, "grad_norm": 30.415788650512695, "learning_rate": 6.023809523809524e-06, "loss": 46.7259, "step": 1521 }, { "epoch": 36.23880597014925, "grad_norm": 19.417943954467773, "learning_rate": 6.017857142857143e-06, "loss": 46.0544, "step": 1522 }, { "epoch": 36.26268656716418, "grad_norm": 27.239500045776367, "learning_rate": 6.011904761904762e-06, "loss": 46.9344, "step": 1523 }, { "epoch": 36.286567164179104, "grad_norm": 27.671018600463867, "learning_rate": 6.005952380952381e-06, "loss": 45.78, "step": 1524 }, { "epoch": 36.31044776119403, "grad_norm": 25.103811264038086, "learning_rate": 6e-06, "loss": 45.7153, "step": 1525 }, { "epoch": 36.33432835820896, "grad_norm": 26.25937843322754, "learning_rate": 5.994047619047619e-06, "loss": 45.3151, "step": 1526 }, { "epoch": 36.35820895522388, "grad_norm": 18.400033950805664, "learning_rate": 5.988095238095238e-06, "loss": 46.5614, "step": 1527 }, { "epoch": 36.3820895522388, "grad_norm": 35.505374908447266, "learning_rate": 5.982142857142858e-06, "loss": 45.8805, "step": 1528 }, { "epoch": 36.40597014925373, "grad_norm": 31.476438522338867, "learning_rate": 5.976190476190477e-06, "loss": 46.189, "step": 1529 }, { "epoch": 36.429850746268656, "grad_norm": 26.192047119140625, "learning_rate": 5.970238095238096e-06, "loss": 45.7026, "step": 1530 }, { "epoch": 36.45373134328358, "grad_norm": 29.712961196899414, "learning_rate": 5.964285714285715e-06, "loss": 44.86, "step": 1531 }, { "epoch": 36.47761194029851, "grad_norm": 28.22374153137207, "learning_rate": 5.958333333333334e-06, "loss": 45.7644, "step": 1532 }, { "epoch": 36.50149253731343, "grad_norm": 23.614940643310547, "learning_rate": 5.9523809523809525e-06, "loss": 45.0373, "step": 1533 }, { "epoch": 36.525373134328355, "grad_norm": 27.78896141052246, "learning_rate": 5.9464285714285715e-06, "loss": 46.9277, "step": 1534 }, { "epoch": 36.549253731343285, "grad_norm": 18.64702606201172, "learning_rate": 5.940476190476191e-06, "loss": 45.277, "step": 1535 }, { "epoch": 36.57313432835821, "grad_norm": 27.2061710357666, "learning_rate": 5.93452380952381e-06, "loss": 46.8394, "step": 1536 }, { "epoch": 36.59701492537313, "grad_norm": 26.296287536621094, "learning_rate": 5.928571428571429e-06, "loss": 44.8519, "step": 1537 }, { "epoch": 36.62089552238806, "grad_norm": 26.594314575195312, "learning_rate": 5.922619047619048e-06, "loss": 45.1743, "step": 1538 }, { "epoch": 36.644776119402984, "grad_norm": 24.076461791992188, "learning_rate": 5.916666666666667e-06, "loss": 45.4145, "step": 1539 }, { "epoch": 36.668656716417914, "grad_norm": 23.31978416442871, "learning_rate": 5.910714285714286e-06, "loss": 45.7526, "step": 1540 }, { "epoch": 36.69253731343284, "grad_norm": 22.630998611450195, "learning_rate": 5.904761904761905e-06, "loss": 46.4197, "step": 1541 }, { "epoch": 36.71641791044776, "grad_norm": 32.66592025756836, "learning_rate": 5.898809523809525e-06, "loss": 45.0123, "step": 1542 }, { "epoch": 36.74029850746269, "grad_norm": 24.478839874267578, "learning_rate": 5.892857142857144e-06, "loss": 46.1418, "step": 1543 }, { "epoch": 36.76417910447761, "grad_norm": 33.325775146484375, "learning_rate": 5.886904761904763e-06, "loss": 45.8228, "step": 1544 }, { "epoch": 36.788059701492536, "grad_norm": 29.264528274536133, "learning_rate": 5.8809523809523816e-06, "loss": 46.1921, "step": 1545 }, { "epoch": 36.811940298507466, "grad_norm": 31.78297233581543, "learning_rate": 5.8750000000000005e-06, "loss": 45.4564, "step": 1546 }, { "epoch": 36.83582089552239, "grad_norm": 27.223127365112305, "learning_rate": 5.8690476190476194e-06, "loss": 45.5277, "step": 1547 }, { "epoch": 36.85970149253731, "grad_norm": 26.29422950744629, "learning_rate": 5.863095238095239e-06, "loss": 46.2285, "step": 1548 }, { "epoch": 36.88358208955224, "grad_norm": 27.933652877807617, "learning_rate": 5.857142857142858e-06, "loss": 46.4441, "step": 1549 }, { "epoch": 36.907462686567165, "grad_norm": 25.306129455566406, "learning_rate": 5.851190476190477e-06, "loss": 45.9724, "step": 1550 }, { "epoch": 36.93134328358209, "grad_norm": 23.481304168701172, "learning_rate": 5.845238095238096e-06, "loss": 46.2544, "step": 1551 }, { "epoch": 36.95522388059702, "grad_norm": 20.86615562438965, "learning_rate": 5.839285714285715e-06, "loss": 47.4502, "step": 1552 }, { "epoch": 36.97910447761194, "grad_norm": 21.519290924072266, "learning_rate": 5.833333333333334e-06, "loss": 45.0165, "step": 1553 }, { "epoch": 37.0, "grad_norm": 22.031705856323242, "learning_rate": 5.827380952380953e-06, "loss": 40.6199, "step": 1554 }, { "epoch": 37.02388059701492, "grad_norm": 29.273820877075195, "learning_rate": 5.821428571428573e-06, "loss": 46.5836, "step": 1555 }, { "epoch": 37.04776119402985, "grad_norm": 24.417945861816406, "learning_rate": 5.815476190476192e-06, "loss": 44.9549, "step": 1556 }, { "epoch": 37.071641791044776, "grad_norm": 24.60706901550293, "learning_rate": 5.8095238095238106e-06, "loss": 44.8607, "step": 1557 }, { "epoch": 37.0955223880597, "grad_norm": 24.76397132873535, "learning_rate": 5.8035714285714295e-06, "loss": 44.9875, "step": 1558 }, { "epoch": 37.11940298507463, "grad_norm": 24.380352020263672, "learning_rate": 5.7976190476190485e-06, "loss": 45.4835, "step": 1559 }, { "epoch": 37.14328358208955, "grad_norm": 19.852746963500977, "learning_rate": 5.791666666666667e-06, "loss": 45.1303, "step": 1560 }, { "epoch": 37.167164179104475, "grad_norm": 23.550888061523438, "learning_rate": 5.785714285714286e-06, "loss": 46.1086, "step": 1561 }, { "epoch": 37.191044776119405, "grad_norm": 24.31315803527832, "learning_rate": 5.7797619047619044e-06, "loss": 45.8181, "step": 1562 }, { "epoch": 37.21492537313433, "grad_norm": 19.324602127075195, "learning_rate": 5.773809523809523e-06, "loss": 44.8606, "step": 1563 }, { "epoch": 37.23880597014925, "grad_norm": 26.747098922729492, "learning_rate": 5.767857142857143e-06, "loss": 45.753, "step": 1564 }, { "epoch": 37.26268656716418, "grad_norm": 22.472572326660156, "learning_rate": 5.761904761904762e-06, "loss": 46.0156, "step": 1565 }, { "epoch": 37.286567164179104, "grad_norm": 20.813426971435547, "learning_rate": 5.755952380952381e-06, "loss": 46.7466, "step": 1566 }, { "epoch": 37.31044776119403, "grad_norm": 27.869413375854492, "learning_rate": 5.75e-06, "loss": 46.287, "step": 1567 }, { "epoch": 37.33432835820896, "grad_norm": 23.257444381713867, "learning_rate": 5.744047619047619e-06, "loss": 45.9862, "step": 1568 }, { "epoch": 37.35820895522388, "grad_norm": 24.715946197509766, "learning_rate": 5.738095238095238e-06, "loss": 47.3128, "step": 1569 }, { "epoch": 37.3820895522388, "grad_norm": 21.670385360717773, "learning_rate": 5.732142857142857e-06, "loss": 46.121, "step": 1570 }, { "epoch": 37.40597014925373, "grad_norm": 24.53063201904297, "learning_rate": 5.726190476190477e-06, "loss": 46.5441, "step": 1571 }, { "epoch": 37.429850746268656, "grad_norm": 19.584630966186523, "learning_rate": 5.7202380952380956e-06, "loss": 46.0683, "step": 1572 }, { "epoch": 37.45373134328358, "grad_norm": 26.179149627685547, "learning_rate": 5.7142857142857145e-06, "loss": 46.3294, "step": 1573 }, { "epoch": 37.47761194029851, "grad_norm": 21.13595199584961, "learning_rate": 5.7083333333333335e-06, "loss": 45.7853, "step": 1574 }, { "epoch": 37.50149253731343, "grad_norm": 28.440006256103516, "learning_rate": 5.702380952380952e-06, "loss": 46.5029, "step": 1575 }, { "epoch": 37.525373134328355, "grad_norm": 27.941879272460938, "learning_rate": 5.696428571428571e-06, "loss": 45.6132, "step": 1576 }, { "epoch": 37.549253731343285, "grad_norm": 25.952688217163086, "learning_rate": 5.690476190476191e-06, "loss": 45.6803, "step": 1577 }, { "epoch": 37.57313432835821, "grad_norm": 23.551633834838867, "learning_rate": 5.68452380952381e-06, "loss": 45.1563, "step": 1578 }, { "epoch": 37.59701492537313, "grad_norm": 23.119415283203125, "learning_rate": 5.678571428571429e-06, "loss": 47.2717, "step": 1579 }, { "epoch": 37.62089552238806, "grad_norm": 27.995214462280273, "learning_rate": 5.672619047619048e-06, "loss": 46.1847, "step": 1580 }, { "epoch": 37.644776119402984, "grad_norm": 28.0698299407959, "learning_rate": 5.666666666666667e-06, "loss": 46.4639, "step": 1581 }, { "epoch": 37.668656716417914, "grad_norm": 23.09457015991211, "learning_rate": 5.660714285714286e-06, "loss": 45.0939, "step": 1582 }, { "epoch": 37.69253731343284, "grad_norm": 25.94692611694336, "learning_rate": 5.654761904761905e-06, "loss": 45.216, "step": 1583 }, { "epoch": 37.71641791044776, "grad_norm": 20.192176818847656, "learning_rate": 5.648809523809525e-06, "loss": 45.7997, "step": 1584 }, { "epoch": 37.74029850746269, "grad_norm": 26.115283966064453, "learning_rate": 5.6428571428571435e-06, "loss": 44.8405, "step": 1585 }, { "epoch": 37.76417910447761, "grad_norm": 24.431346893310547, "learning_rate": 5.6369047619047625e-06, "loss": 46.5067, "step": 1586 }, { "epoch": 37.788059701492536, "grad_norm": 25.838623046875, "learning_rate": 5.630952380952381e-06, "loss": 46.1806, "step": 1587 }, { "epoch": 37.811940298507466, "grad_norm": 20.44222640991211, "learning_rate": 5.625e-06, "loss": 45.7445, "step": 1588 }, { "epoch": 37.83582089552239, "grad_norm": 19.459331512451172, "learning_rate": 5.619047619047619e-06, "loss": 45.7875, "step": 1589 }, { "epoch": 37.85970149253731, "grad_norm": 17.49920082092285, "learning_rate": 5.613095238095238e-06, "loss": 44.2889, "step": 1590 }, { "epoch": 37.88358208955224, "grad_norm": 18.541828155517578, "learning_rate": 5.607142857142858e-06, "loss": 46.7668, "step": 1591 }, { "epoch": 37.907462686567165, "grad_norm": 16.22308349609375, "learning_rate": 5.601190476190477e-06, "loss": 45.0406, "step": 1592 }, { "epoch": 37.93134328358209, "grad_norm": 21.068069458007812, "learning_rate": 5.595238095238096e-06, "loss": 44.0997, "step": 1593 }, { "epoch": 37.95522388059702, "grad_norm": 18.877992630004883, "learning_rate": 5.589285714285715e-06, "loss": 46.5816, "step": 1594 }, { "epoch": 37.97910447761194, "grad_norm": 20.14031410217285, "learning_rate": 5.583333333333334e-06, "loss": 44.8537, "step": 1595 }, { "epoch": 38.0, "grad_norm": 19.989953994750977, "learning_rate": 5.577380952380953e-06, "loss": 39.8501, "step": 1596 }, { "epoch": 38.02388059701492, "grad_norm": 23.484283447265625, "learning_rate": 5.571428571428572e-06, "loss": 46.3864, "step": 1597 }, { "epoch": 38.04776119402985, "grad_norm": 20.579587936401367, "learning_rate": 5.5654761904761915e-06, "loss": 46.1473, "step": 1598 }, { "epoch": 38.071641791044776, "grad_norm": 19.48423194885254, "learning_rate": 5.5595238095238104e-06, "loss": 45.3255, "step": 1599 }, { "epoch": 38.0955223880597, "grad_norm": 23.766077041625977, "learning_rate": 5.553571428571429e-06, "loss": 45.4387, "step": 1600 }, { "epoch": 38.11940298507463, "grad_norm": 17.605247497558594, "learning_rate": 5.547619047619048e-06, "loss": 46.1065, "step": 1601 }, { "epoch": 38.14328358208955, "grad_norm": 20.179826736450195, "learning_rate": 5.541666666666667e-06, "loss": 45.974, "step": 1602 }, { "epoch": 38.167164179104475, "grad_norm": 28.50605583190918, "learning_rate": 5.535714285714286e-06, "loss": 46.0505, "step": 1603 }, { "epoch": 38.191044776119405, "grad_norm": 16.770771026611328, "learning_rate": 5.529761904761905e-06, "loss": 46.4403, "step": 1604 }, { "epoch": 38.21492537313433, "grad_norm": NaN, "learning_rate": 5.523809523809525e-06, "loss": 69.3153, "step": 1605 }, { "epoch": 38.23880597014925, "grad_norm": 25.01431655883789, "learning_rate": 5.523809523809525e-06, "loss": 46.8119, "step": 1606 }, { "epoch": 38.26268656716418, "grad_norm": 20.459747314453125, "learning_rate": 5.517857142857144e-06, "loss": 47.7687, "step": 1607 }, { "epoch": 38.286567164179104, "grad_norm": 21.603086471557617, "learning_rate": 5.511904761904763e-06, "loss": 44.6093, "step": 1608 }, { "epoch": 38.31044776119403, "grad_norm": 25.284805297851562, "learning_rate": 5.505952380952382e-06, "loss": 45.0834, "step": 1609 }, { "epoch": 38.33432835820896, "grad_norm": 21.638917922973633, "learning_rate": 5.500000000000001e-06, "loss": 45.3904, "step": 1610 }, { "epoch": 38.35820895522388, "grad_norm": 22.443374633789062, "learning_rate": 5.49404761904762e-06, "loss": 43.7163, "step": 1611 }, { "epoch": 38.3820895522388, "grad_norm": 23.427288055419922, "learning_rate": 5.4880952380952394e-06, "loss": 44.7692, "step": 1612 }, { "epoch": 38.40597014925373, "grad_norm": 22.346813201904297, "learning_rate": 5.482142857142858e-06, "loss": 45.0674, "step": 1613 }, { "epoch": 38.429850746268656, "grad_norm": 20.567325592041016, "learning_rate": 5.476190476190477e-06, "loss": 45.5367, "step": 1614 }, { "epoch": 38.45373134328358, "grad_norm": 23.872394561767578, "learning_rate": 5.470238095238096e-06, "loss": 46.2728, "step": 1615 }, { "epoch": 38.47761194029851, "grad_norm": 23.790176391601562, "learning_rate": 5.464285714285714e-06, "loss": 46.3734, "step": 1616 }, { "epoch": 38.50149253731343, "grad_norm": 22.707136154174805, "learning_rate": 5.458333333333333e-06, "loss": 44.577, "step": 1617 }, { "epoch": 38.525373134328355, "grad_norm": 26.203781127929688, "learning_rate": 5.452380952380952e-06, "loss": 45.6794, "step": 1618 }, { "epoch": 38.549253731343285, "grad_norm": 22.935991287231445, "learning_rate": 5.446428571428571e-06, "loss": 45.7815, "step": 1619 }, { "epoch": 38.57313432835821, "grad_norm": 28.275053024291992, "learning_rate": 5.44047619047619e-06, "loss": 45.0312, "step": 1620 }, { "epoch": 38.59701492537313, "grad_norm": 23.848264694213867, "learning_rate": 5.43452380952381e-06, "loss": 46.7093, "step": 1621 }, { "epoch": 38.62089552238806, "grad_norm": 25.240819931030273, "learning_rate": 5.428571428571429e-06, "loss": 46.6751, "step": 1622 }, { "epoch": 38.644776119402984, "grad_norm": 26.2618350982666, "learning_rate": 5.422619047619048e-06, "loss": 47.5501, "step": 1623 }, { "epoch": 38.668656716417914, "grad_norm": 23.986392974853516, "learning_rate": 5.416666666666667e-06, "loss": 45.6208, "step": 1624 }, { "epoch": 38.69253731343284, "grad_norm": 22.11539077758789, "learning_rate": 5.410714285714286e-06, "loss": 44.4163, "step": 1625 }, { "epoch": 38.71641791044776, "grad_norm": 22.9071044921875, "learning_rate": 5.404761904761905e-06, "loss": 45.5715, "step": 1626 }, { "epoch": 38.74029850746269, "grad_norm": 22.759733200073242, "learning_rate": 5.398809523809524e-06, "loss": 45.1706, "step": 1627 }, { "epoch": 38.76417910447761, "grad_norm": 23.66644287109375, "learning_rate": 5.392857142857143e-06, "loss": 45.4343, "step": 1628 }, { "epoch": 38.788059701492536, "grad_norm": 20.179203033447266, "learning_rate": 5.386904761904762e-06, "loss": 45.9163, "step": 1629 }, { "epoch": 38.811940298507466, "grad_norm": 22.327817916870117, "learning_rate": 5.380952380952381e-06, "loss": 44.1558, "step": 1630 }, { "epoch": 38.83582089552239, "grad_norm": 22.10496711730957, "learning_rate": 5.375e-06, "loss": 45.764, "step": 1631 }, { "epoch": 38.85970149253731, "grad_norm": 24.25627326965332, "learning_rate": 5.369047619047619e-06, "loss": 46.6394, "step": 1632 }, { "epoch": 38.88358208955224, "grad_norm": 20.797740936279297, "learning_rate": 5.363095238095238e-06, "loss": 45.6251, "step": 1633 }, { "epoch": 38.907462686567165, "grad_norm": 24.14659309387207, "learning_rate": 5.357142857142857e-06, "loss": 45.5603, "step": 1634 }, { "epoch": 38.93134328358209, "grad_norm": 23.259584426879883, "learning_rate": 5.351190476190477e-06, "loss": 46.055, "step": 1635 }, { "epoch": 38.95522388059702, "grad_norm": 23.72128677368164, "learning_rate": 5.345238095238096e-06, "loss": 45.9729, "step": 1636 }, { "epoch": 38.97910447761194, "grad_norm": 22.746183395385742, "learning_rate": 5.339285714285715e-06, "loss": 46.1893, "step": 1637 }, { "epoch": 39.0, "grad_norm": 22.067306518554688, "learning_rate": 5.333333333333334e-06, "loss": 39.8095, "step": 1638 }, { "epoch": 39.02388059701492, "grad_norm": 22.888097763061523, "learning_rate": 5.327380952380953e-06, "loss": 45.3095, "step": 1639 }, { "epoch": 39.04776119402985, "grad_norm": 23.86408233642578, "learning_rate": 5.3214285714285715e-06, "loss": 46.3774, "step": 1640 }, { "epoch": 39.071641791044776, "grad_norm": 21.418088912963867, "learning_rate": 5.3154761904761905e-06, "loss": 45.6404, "step": 1641 }, { "epoch": 39.0955223880597, "grad_norm": 21.521831512451172, "learning_rate": 5.30952380952381e-06, "loss": 46.4895, "step": 1642 }, { "epoch": 39.11940298507463, "grad_norm": 20.189105987548828, "learning_rate": 5.303571428571429e-06, "loss": 44.6538, "step": 1643 }, { "epoch": 39.14328358208955, "grad_norm": 19.73761558532715, "learning_rate": 5.297619047619048e-06, "loss": 45.5941, "step": 1644 }, { "epoch": 39.167164179104475, "grad_norm": 25.631227493286133, "learning_rate": 5.291666666666667e-06, "loss": 44.4105, "step": 1645 }, { "epoch": 39.191044776119405, "grad_norm": 19.47798728942871, "learning_rate": 5.285714285714286e-06, "loss": 46.3286, "step": 1646 }, { "epoch": 39.21492537313433, "grad_norm": 19.627609252929688, "learning_rate": 5.279761904761905e-06, "loss": 46.0707, "step": 1647 }, { "epoch": 39.23880597014925, "grad_norm": 19.668777465820312, "learning_rate": 5.273809523809525e-06, "loss": 44.8447, "step": 1648 }, { "epoch": 39.26268656716418, "grad_norm": 23.311546325683594, "learning_rate": 5.267857142857144e-06, "loss": 45.9081, "step": 1649 }, { "epoch": 39.286567164179104, "grad_norm": 21.426624298095703, "learning_rate": 5.261904761904763e-06, "loss": 45.4256, "step": 1650 }, { "epoch": 39.31044776119403, "grad_norm": 19.545969009399414, "learning_rate": 5.255952380952382e-06, "loss": 45.4264, "step": 1651 }, { "epoch": 39.33432835820896, "grad_norm": 22.78704833984375, "learning_rate": 5.2500000000000006e-06, "loss": 47.6822, "step": 1652 }, { "epoch": 39.35820895522388, "grad_norm": 18.759178161621094, "learning_rate": 5.2440476190476195e-06, "loss": 44.5254, "step": 1653 }, { "epoch": 39.3820895522388, "grad_norm": 19.855981826782227, "learning_rate": 5.2380952380952384e-06, "loss": 46.249, "step": 1654 }, { "epoch": 39.40597014925373, "grad_norm": 18.817089080810547, "learning_rate": 5.232142857142858e-06, "loss": 45.2813, "step": 1655 }, { "epoch": 39.429850746268656, "grad_norm": 19.587581634521484, "learning_rate": 5.226190476190477e-06, "loss": 45.0445, "step": 1656 }, { "epoch": 39.45373134328358, "grad_norm": 19.9105167388916, "learning_rate": 5.220238095238096e-06, "loss": 46.8658, "step": 1657 }, { "epoch": 39.47761194029851, "grad_norm": 19.529748916625977, "learning_rate": 5.214285714285715e-06, "loss": 46.6175, "step": 1658 }, { "epoch": 39.50149253731343, "grad_norm": 18.63764762878418, "learning_rate": 5.208333333333334e-06, "loss": 46.3122, "step": 1659 }, { "epoch": 39.525373134328355, "grad_norm": 19.58228874206543, "learning_rate": 5.202380952380953e-06, "loss": 44.6263, "step": 1660 }, { "epoch": 39.549253731343285, "grad_norm": 21.451528549194336, "learning_rate": 5.196428571428572e-06, "loss": 46.2707, "step": 1661 }, { "epoch": 39.57313432835821, "grad_norm": 22.756628036499023, "learning_rate": 5.190476190476192e-06, "loss": 45.0001, "step": 1662 }, { "epoch": 39.59701492537313, "grad_norm": 24.481945037841797, "learning_rate": 5.184523809523811e-06, "loss": 45.3038, "step": 1663 }, { "epoch": 39.62089552238806, "grad_norm": 19.3010196685791, "learning_rate": 5.1785714285714296e-06, "loss": 46.1894, "step": 1664 }, { "epoch": 39.644776119402984, "grad_norm": 24.840822219848633, "learning_rate": 5.1726190476190485e-06, "loss": 46.6593, "step": 1665 }, { "epoch": 39.668656716417914, "grad_norm": 20.712875366210938, "learning_rate": 5.1666666666666675e-06, "loss": 46.7594, "step": 1666 }, { "epoch": 39.69253731343284, "grad_norm": 20.431598663330078, "learning_rate": 5.160714285714286e-06, "loss": 46.4969, "step": 1667 }, { "epoch": 39.71641791044776, "grad_norm": 21.094484329223633, "learning_rate": 5.1547619047619045e-06, "loss": 44.5114, "step": 1668 }, { "epoch": 39.74029850746269, "grad_norm": 22.929946899414062, "learning_rate": 5.1488095238095234e-06, "loss": 44.5251, "step": 1669 }, { "epoch": 39.76417910447761, "grad_norm": 17.285877227783203, "learning_rate": 5.142857142857142e-06, "loss": 45.312, "step": 1670 }, { "epoch": 39.788059701492536, "grad_norm": 18.29960823059082, "learning_rate": 5.136904761904762e-06, "loss": 44.984, "step": 1671 }, { "epoch": 39.811940298507466, "grad_norm": 25.79044532775879, "learning_rate": 5.130952380952381e-06, "loss": 44.8192, "step": 1672 }, { "epoch": 39.83582089552239, "grad_norm": 21.014759063720703, "learning_rate": 5.125e-06, "loss": 46.9319, "step": 1673 }, { "epoch": 39.85970149253731, "grad_norm": 24.91911506652832, "learning_rate": 5.119047619047619e-06, "loss": 46.7778, "step": 1674 }, { "epoch": 39.88358208955224, "grad_norm": 25.174942016601562, "learning_rate": 5.113095238095238e-06, "loss": 44.948, "step": 1675 }, { "epoch": 39.907462686567165, "grad_norm": 22.642148971557617, "learning_rate": 5.107142857142857e-06, "loss": 45.5964, "step": 1676 }, { "epoch": 39.93134328358209, "grad_norm": 24.867389678955078, "learning_rate": 5.101190476190476e-06, "loss": 45.446, "step": 1677 }, { "epoch": 39.95522388059702, "grad_norm": 21.888269424438477, "learning_rate": 5.095238095238096e-06, "loss": 45.414, "step": 1678 }, { "epoch": 39.97910447761194, "grad_norm": 25.071487426757812, "learning_rate": 5.0892857142857146e-06, "loss": 44.9464, "step": 1679 }, { "epoch": 40.0, "grad_norm": 19.389556884765625, "learning_rate": 5.0833333333333335e-06, "loss": 39.5515, "step": 1680 }, { "epoch": 40.0, "step": 1680, "total_flos": 8.26172747445074e+16, "train_loss": 23.38366504396711, "train_runtime": 26137.4766, "train_samples_per_second": 8.191, "train_steps_per_second": 0.064 }, { "epoch": 40.02388059701492, "grad_norm": 18.99544334411621, "learning_rate": 1e-05, "loss": 46.1194, "step": 1681 }, { "epoch": 40.04776119402985, "grad_norm": Infinity, "learning_rate": 9.996031746031746e-06, "loss": 54.6718, "step": 1682 }, { "epoch": 40.071641791044776, "grad_norm": Infinity, "learning_rate": 9.996031746031746e-06, "loss": 54.4703, "step": 1683 }, { "epoch": 40.0955223880597, "grad_norm": 416.26324462890625, "learning_rate": 9.996031746031746e-06, "loss": 53.5676, "step": 1684 }, { "epoch": 40.11940298507463, "grad_norm": 147.0504608154297, "learning_rate": 9.992063492063493e-06, "loss": 50.2561, "step": 1685 }, { "epoch": 40.14328358208955, "grad_norm": 122.7557601928711, "learning_rate": 9.988095238095239e-06, "loss": 50.4153, "step": 1686 }, { "epoch": 40.167164179104475, "grad_norm": 97.062744140625, "learning_rate": 9.984126984126986e-06, "loss": 47.2739, "step": 1687 }, { "epoch": 40.191044776119405, "grad_norm": 73.37904357910156, "learning_rate": 9.980158730158731e-06, "loss": 48.0252, "step": 1688 }, { "epoch": 40.21492537313433, "grad_norm": 68.98373413085938, "learning_rate": 9.976190476190477e-06, "loss": 47.6782, "step": 1689 }, { "epoch": 40.23880597014925, "grad_norm": 56.258548736572266, "learning_rate": 9.972222222222224e-06, "loss": 47.5786, "step": 1690 }, { "epoch": 40.26268656716418, "grad_norm": 68.9515609741211, "learning_rate": 9.968253968253969e-06, "loss": 46.3938, "step": 1691 }, { "epoch": 40.286567164179104, "grad_norm": 39.17803955078125, "learning_rate": 9.964285714285714e-06, "loss": 45.9047, "step": 1692 }, { "epoch": 40.31044776119403, "grad_norm": 51.936981201171875, "learning_rate": 9.960317460317462e-06, "loss": 45.6047, "step": 1693 }, { "epoch": 40.33432835820896, "grad_norm": 43.64280700683594, "learning_rate": 9.956349206349207e-06, "loss": 46.6234, "step": 1694 }, { "epoch": 40.35820895522388, "grad_norm": 58.56443405151367, "learning_rate": 9.952380952380954e-06, "loss": 47.255, "step": 1695 }, { "epoch": 40.3820895522388, "grad_norm": 37.53863525390625, "learning_rate": 9.9484126984127e-06, "loss": 47.1183, "step": 1696 }, { "epoch": 40.40597014925373, "grad_norm": 35.800628662109375, "learning_rate": 9.944444444444445e-06, "loss": 46.3602, "step": 1697 }, { "epoch": 40.429850746268656, "grad_norm": 39.58418655395508, "learning_rate": 9.940476190476192e-06, "loss": 46.3082, "step": 1698 }, { "epoch": 40.45373134328358, "grad_norm": 30.6373233795166, "learning_rate": 9.936507936507937e-06, "loss": 45.2231, "step": 1699 }, { "epoch": 40.47761194029851, "grad_norm": 34.47962951660156, "learning_rate": 9.932539682539684e-06, "loss": 46.3243, "step": 1700 }, { "epoch": 40.50149253731343, "grad_norm": 23.599184036254883, "learning_rate": 9.92857142857143e-06, "loss": 46.3045, "step": 1701 }, { "epoch": 40.525373134328355, "grad_norm": 27.183767318725586, "learning_rate": 9.924603174603175e-06, "loss": 45.3216, "step": 1702 }, { "epoch": 40.549253731343285, "grad_norm": 27.263038635253906, "learning_rate": 9.920634920634922e-06, "loss": 46.8117, "step": 1703 }, { "epoch": 40.57313432835821, "grad_norm": 30.570518493652344, "learning_rate": 9.916666666666668e-06, "loss": 46.0951, "step": 1704 }, { "epoch": 40.59701492537313, "grad_norm": 23.30783462524414, "learning_rate": 9.912698412698413e-06, "loss": 45.5407, "step": 1705 }, { "epoch": 40.62089552238806, "grad_norm": 29.269088745117188, "learning_rate": 9.90873015873016e-06, "loss": 45.9624, "step": 1706 }, { "epoch": 40.644776119402984, "grad_norm": NaN, "learning_rate": 9.904761904761906e-06, "loss": 75.1575, "step": 1707 }, { "epoch": 40.668656716417914, "grad_norm": 25.785404205322266, "learning_rate": 9.904761904761906e-06, "loss": 45.9263, "step": 1708 }, { "epoch": 40.69253731343284, "grad_norm": 34.729549407958984, "learning_rate": 9.900793650793653e-06, "loss": 45.5276, "step": 1709 }, { "epoch": 40.71641791044776, "grad_norm": 28.62750816345215, "learning_rate": 9.896825396825398e-06, "loss": 46.2797, "step": 1710 }, { "epoch": 40.74029850746269, "grad_norm": 31.081378936767578, "learning_rate": 9.892857142857143e-06, "loss": 45.1643, "step": 1711 }, { "epoch": 40.76417910447761, "grad_norm": 28.92620086669922, "learning_rate": 9.88888888888889e-06, "loss": 46.3105, "step": 1712 }, { "epoch": 40.788059701492536, "grad_norm": 23.232866287231445, "learning_rate": 9.884920634920636e-06, "loss": 46.6131, "step": 1713 }, { "epoch": 40.811940298507466, "grad_norm": 25.97928810119629, "learning_rate": 9.880952380952381e-06, "loss": 45.5054, "step": 1714 }, { "epoch": 40.83582089552239, "grad_norm": 27.382034301757812, "learning_rate": 9.876984126984128e-06, "loss": 45.593, "step": 1715 }, { "epoch": 40.85970149253731, "grad_norm": 23.762460708618164, "learning_rate": 9.873015873015874e-06, "loss": 45.7414, "step": 1716 }, { "epoch": 40.88358208955224, "grad_norm": 29.6158390045166, "learning_rate": 9.869047619047621e-06, "loss": 45.0669, "step": 1717 }, { "epoch": 40.907462686567165, "grad_norm": 24.66147804260254, "learning_rate": 9.865079365079366e-06, "loss": 45.6125, "step": 1718 }, { "epoch": 40.93134328358209, "grad_norm": 28.167495727539062, "learning_rate": 9.861111111111112e-06, "loss": 46.099, "step": 1719 }, { "epoch": 40.95522388059702, "grad_norm": 27.325531005859375, "learning_rate": 9.857142857142859e-06, "loss": 45.1728, "step": 1720 }, { "epoch": 40.97910447761194, "grad_norm": 23.650911331176758, "learning_rate": 9.853174603174604e-06, "loss": 44.6743, "step": 1721 }, { "epoch": 41.0, "grad_norm": 22.53518295288086, "learning_rate": 9.849206349206351e-06, "loss": 39.1464, "step": 1722 }, { "epoch": 41.02388059701492, "grad_norm": 28.995275497436523, "learning_rate": 9.845238095238097e-06, "loss": 44.5823, "step": 1723 }, { "epoch": 41.04776119402985, "grad_norm": 28.680805206298828, "learning_rate": 9.841269841269842e-06, "loss": 44.7002, "step": 1724 }, { "epoch": 41.071641791044776, "grad_norm": 24.10047149658203, "learning_rate": 9.837301587301588e-06, "loss": 46.232, "step": 1725 }, { "epoch": 41.0955223880597, "grad_norm": 25.722291946411133, "learning_rate": 9.833333333333333e-06, "loss": 45.1447, "step": 1726 }, { "epoch": 41.11940298507463, "grad_norm": 22.944278717041016, "learning_rate": 9.82936507936508e-06, "loss": 46.57, "step": 1727 }, { "epoch": 41.14328358208955, "grad_norm": 25.734941482543945, "learning_rate": 9.825396825396825e-06, "loss": 45.8386, "step": 1728 }, { "epoch": 41.167164179104475, "grad_norm": 23.644197463989258, "learning_rate": 9.821428571428573e-06, "loss": 46.2608, "step": 1729 }, { "epoch": 41.191044776119405, "grad_norm": 22.163721084594727, "learning_rate": 9.817460317460318e-06, "loss": 45.2914, "step": 1730 }, { "epoch": 41.21492537313433, "grad_norm": 33.71270751953125, "learning_rate": 9.813492063492063e-06, "loss": 44.6372, "step": 1731 }, { "epoch": 41.23880597014925, "grad_norm": 28.478361129760742, "learning_rate": 9.80952380952381e-06, "loss": 45.472, "step": 1732 }, { "epoch": 41.26268656716418, "grad_norm": 27.120990753173828, "learning_rate": 9.805555555555556e-06, "loss": 46.445, "step": 1733 }, { "epoch": 41.286567164179104, "grad_norm": 25.342784881591797, "learning_rate": 9.801587301587301e-06, "loss": 45.8317, "step": 1734 }, { "epoch": 41.31044776119403, "grad_norm": 28.94765853881836, "learning_rate": 9.797619047619048e-06, "loss": 46.0677, "step": 1735 }, { "epoch": 41.33432835820896, "grad_norm": 22.983802795410156, "learning_rate": 9.793650793650794e-06, "loss": 45.8029, "step": 1736 }, { "epoch": 41.35820895522388, "grad_norm": 24.97469711303711, "learning_rate": 9.78968253968254e-06, "loss": 46.7215, "step": 1737 }, { "epoch": 41.3820895522388, "grad_norm": 26.136960983276367, "learning_rate": 9.785714285714286e-06, "loss": 45.8042, "step": 1738 }, { "epoch": 41.40597014925373, "grad_norm": 21.150083541870117, "learning_rate": 9.781746031746032e-06, "loss": 45.836, "step": 1739 }, { "epoch": 41.429850746268656, "grad_norm": 19.56538963317871, "learning_rate": 9.777777777777779e-06, "loss": 46.0126, "step": 1740 }, { "epoch": 41.45373134328358, "grad_norm": 26.608108520507812, "learning_rate": 9.773809523809524e-06, "loss": 45.3108, "step": 1741 }, { "epoch": 41.47761194029851, "grad_norm": 19.020097732543945, "learning_rate": 9.769841269841271e-06, "loss": 46.278, "step": 1742 }, { "epoch": 41.50149253731343, "grad_norm": 25.4818172454834, "learning_rate": 9.765873015873017e-06, "loss": 45.8142, "step": 1743 }, { "epoch": 41.525373134328355, "grad_norm": 21.7120304107666, "learning_rate": 9.761904761904762e-06, "loss": 46.0221, "step": 1744 }, { "epoch": 41.549253731343285, "grad_norm": 24.395984649658203, "learning_rate": 9.757936507936509e-06, "loss": 45.3654, "step": 1745 }, { "epoch": 41.57313432835821, "grad_norm": 26.8757381439209, "learning_rate": 9.753968253968254e-06, "loss": 46.0073, "step": 1746 }, { "epoch": 41.59701492537313, "grad_norm": 27.31254768371582, "learning_rate": 9.75e-06, "loss": 45.465, "step": 1747 }, { "epoch": 41.62089552238806, "grad_norm": 23.271629333496094, "learning_rate": 9.746031746031747e-06, "loss": 46.0739, "step": 1748 }, { "epoch": 41.644776119402984, "grad_norm": 24.240131378173828, "learning_rate": 9.742063492063492e-06, "loss": 45.6978, "step": 1749 }, { "epoch": 41.668656716417914, "grad_norm": 23.16962242126465, "learning_rate": 9.73809523809524e-06, "loss": 45.9961, "step": 1750 }, { "epoch": 41.69253731343284, "grad_norm": 29.63677406311035, "learning_rate": 9.734126984126985e-06, "loss": 45.0859, "step": 1751 }, { "epoch": 41.71641791044776, "grad_norm": 20.725126266479492, "learning_rate": 9.73015873015873e-06, "loss": 45.3546, "step": 1752 }, { "epoch": 41.74029850746269, "grad_norm": 23.172834396362305, "learning_rate": 9.726190476190477e-06, "loss": 45.3822, "step": 1753 }, { "epoch": 41.76417910447761, "grad_norm": 30.179182052612305, "learning_rate": 9.722222222222223e-06, "loss": 45.0901, "step": 1754 }, { "epoch": 41.788059701492536, "grad_norm": 17.276126861572266, "learning_rate": 9.71825396825397e-06, "loss": 45.4555, "step": 1755 }, { "epoch": 41.811940298507466, "grad_norm": 24.585174560546875, "learning_rate": 9.714285714285715e-06, "loss": 43.8513, "step": 1756 }, { "epoch": 41.83582089552239, "grad_norm": 23.242969512939453, "learning_rate": 9.71031746031746e-06, "loss": 45.7996, "step": 1757 }, { "epoch": 41.85970149253731, "grad_norm": 21.585342407226562, "learning_rate": 9.706349206349208e-06, "loss": 45.2616, "step": 1758 }, { "epoch": 41.88358208955224, "grad_norm": 28.802600860595703, "learning_rate": 9.702380952380953e-06, "loss": 45.6062, "step": 1759 }, { "epoch": 41.907462686567165, "grad_norm": 23.895822525024414, "learning_rate": 9.698412698412698e-06, "loss": 44.3029, "step": 1760 }, { "epoch": 41.93134328358209, "grad_norm": 26.175247192382812, "learning_rate": 9.694444444444446e-06, "loss": 45.6048, "step": 1761 }, { "epoch": 41.95522388059702, "grad_norm": 23.499914169311523, "learning_rate": 9.690476190476191e-06, "loss": 45.4891, "step": 1762 }, { "epoch": 41.97910447761194, "grad_norm": 22.244211196899414, "learning_rate": 9.686507936507938e-06, "loss": 44.1723, "step": 1763 }, { "epoch": 42.0, "grad_norm": 20.29228401184082, "learning_rate": 9.682539682539683e-06, "loss": 39.7896, "step": 1764 }, { "epoch": 42.02388059701492, "grad_norm": 27.773515701293945, "learning_rate": 9.678571428571429e-06, "loss": 45.7383, "step": 1765 }, { "epoch": 42.04776119402985, "grad_norm": 27.289716720581055, "learning_rate": 9.674603174603176e-06, "loss": 45.2073, "step": 1766 }, { "epoch": 42.071641791044776, "grad_norm": 21.16016387939453, "learning_rate": 9.670634920634921e-06, "loss": 45.3415, "step": 1767 }, { "epoch": 42.0955223880597, "grad_norm": 28.878597259521484, "learning_rate": 9.666666666666667e-06, "loss": 45.1139, "step": 1768 }, { "epoch": 42.11940298507463, "grad_norm": 29.504600524902344, "learning_rate": 9.662698412698414e-06, "loss": 46.185, "step": 1769 }, { "epoch": 42.14328358208955, "grad_norm": 20.372560501098633, "learning_rate": 9.65873015873016e-06, "loss": 46.4996, "step": 1770 }, { "epoch": 42.167164179104475, "grad_norm": 27.437274932861328, "learning_rate": 9.654761904761906e-06, "loss": 43.77, "step": 1771 }, { "epoch": 42.191044776119405, "grad_norm": 23.735233306884766, "learning_rate": 9.650793650793652e-06, "loss": 43.9415, "step": 1772 }, { "epoch": 42.21492537313433, "grad_norm": 26.434886932373047, "learning_rate": 9.646825396825397e-06, "loss": 46.6163, "step": 1773 }, { "epoch": 42.23880597014925, "grad_norm": 26.843782424926758, "learning_rate": 9.642857142857144e-06, "loss": 46.1987, "step": 1774 }, { "epoch": 42.26268656716418, "grad_norm": 25.86046600341797, "learning_rate": 9.63888888888889e-06, "loss": 46.635, "step": 1775 }, { "epoch": 42.286567164179104, "grad_norm": 25.95208740234375, "learning_rate": 9.634920634920637e-06, "loss": 44.6339, "step": 1776 }, { "epoch": 42.31044776119403, "grad_norm": 21.243392944335938, "learning_rate": 9.630952380952382e-06, "loss": 45.1151, "step": 1777 }, { "epoch": 42.33432835820896, "grad_norm": 22.445972442626953, "learning_rate": 9.626984126984127e-06, "loss": 45.1704, "step": 1778 }, { "epoch": 42.35820895522388, "grad_norm": 37.871681213378906, "learning_rate": 9.623015873015875e-06, "loss": 45.116, "step": 1779 }, { "epoch": 42.3820895522388, "grad_norm": 25.75882339477539, "learning_rate": 9.61904761904762e-06, "loss": 45.2748, "step": 1780 }, { "epoch": 42.40597014925373, "grad_norm": 32.44329071044922, "learning_rate": 9.615079365079365e-06, "loss": 45.0782, "step": 1781 }, { "epoch": 42.429850746268656, "grad_norm": 25.74696159362793, "learning_rate": 9.611111111111112e-06, "loss": 46.1405, "step": 1782 }, { "epoch": 42.45373134328358, "grad_norm": 44.88374710083008, "learning_rate": 9.607142857142858e-06, "loss": 45.7843, "step": 1783 }, { "epoch": 42.47761194029851, "grad_norm": 29.956615447998047, "learning_rate": 9.603174603174605e-06, "loss": 46.7361, "step": 1784 }, { "epoch": 42.50149253731343, "grad_norm": 41.191864013671875, "learning_rate": 9.59920634920635e-06, "loss": 45.7368, "step": 1785 }, { "epoch": 42.525373134328355, "grad_norm": 32.30370330810547, "learning_rate": 9.595238095238096e-06, "loss": 45.4091, "step": 1786 }, { "epoch": 42.549253731343285, "grad_norm": 32.65694046020508, "learning_rate": 9.591269841269843e-06, "loss": 44.8837, "step": 1787 }, { "epoch": 42.57313432835821, "grad_norm": 29.783634185791016, "learning_rate": 9.587301587301588e-06, "loss": 46.0239, "step": 1788 }, { "epoch": 42.59701492537313, "grad_norm": 32.415035247802734, "learning_rate": 9.583333333333335e-06, "loss": 44.7968, "step": 1789 }, { "epoch": 42.62089552238806, "grad_norm": 31.461589813232422, "learning_rate": 9.57936507936508e-06, "loss": 44.5408, "step": 1790 }, { "epoch": 42.644776119402984, "grad_norm": 27.083560943603516, "learning_rate": 9.575396825396826e-06, "loss": 44.9716, "step": 1791 }, { "epoch": 42.668656716417914, "grad_norm": 34.453102111816406, "learning_rate": 9.571428571428573e-06, "loss": 44.8527, "step": 1792 }, { "epoch": 42.69253731343284, "grad_norm": 24.403902053833008, "learning_rate": 9.567460317460319e-06, "loss": 44.6635, "step": 1793 }, { "epoch": 42.71641791044776, "grad_norm": 43.89455795288086, "learning_rate": 9.563492063492064e-06, "loss": 45.9798, "step": 1794 }, { "epoch": 42.74029850746269, "grad_norm": 33.704498291015625, "learning_rate": 9.559523809523811e-06, "loss": 45.8182, "step": 1795 }, { "epoch": 42.76417910447761, "grad_norm": 38.266357421875, "learning_rate": 9.555555555555556e-06, "loss": 44.8923, "step": 1796 }, { "epoch": 42.788059701492536, "grad_norm": 36.38774490356445, "learning_rate": 9.551587301587304e-06, "loss": 45.5987, "step": 1797 }, { "epoch": 42.811940298507466, "grad_norm": 33.449737548828125, "learning_rate": 9.547619047619049e-06, "loss": 46.2494, "step": 1798 }, { "epoch": 42.83582089552239, "grad_norm": 29.902509689331055, "learning_rate": 9.543650793650794e-06, "loss": 44.7438, "step": 1799 }, { "epoch": 42.85970149253731, "grad_norm": 35.025184631347656, "learning_rate": 9.539682539682541e-06, "loss": 44.7825, "step": 1800 }, { "epoch": 42.88358208955224, "grad_norm": 30.783037185668945, "learning_rate": 9.535714285714287e-06, "loss": 45.3493, "step": 1801 }, { "epoch": 42.907462686567165, "grad_norm": 28.61165428161621, "learning_rate": 9.531746031746032e-06, "loss": 46.5537, "step": 1802 }, { "epoch": 42.93134328358209, "grad_norm": 34.27008056640625, "learning_rate": 9.527777777777778e-06, "loss": 44.0439, "step": 1803 }, { "epoch": 42.95522388059702, "grad_norm": 31.05691146850586, "learning_rate": 9.523809523809525e-06, "loss": 46.1128, "step": 1804 }, { "epoch": 42.97910447761194, "grad_norm": 28.658565521240234, "learning_rate": 9.51984126984127e-06, "loss": 46.2442, "step": 1805 }, { "epoch": 43.0, "grad_norm": 20.02385139465332, "learning_rate": 9.515873015873016e-06, "loss": 39.8537, "step": 1806 }, { "epoch": 43.02388059701492, "grad_norm": 32.5422248840332, "learning_rate": 9.511904761904763e-06, "loss": 44.0765, "step": 1807 }, { "epoch": 43.04776119402985, "grad_norm": 22.364904403686523, "learning_rate": 9.507936507936508e-06, "loss": 45.6789, "step": 1808 }, { "epoch": 43.071641791044776, "grad_norm": 35.576072692871094, "learning_rate": 9.503968253968255e-06, "loss": 45.6707, "step": 1809 }, { "epoch": 43.0955223880597, "grad_norm": 27.892908096313477, "learning_rate": 9.5e-06, "loss": 46.348, "step": 1810 }, { "epoch": 43.11940298507463, "grad_norm": 22.283756256103516, "learning_rate": 9.496031746031746e-06, "loss": 44.8757, "step": 1811 }, { "epoch": 43.14328358208955, "grad_norm": 34.38758087158203, "learning_rate": 9.492063492063493e-06, "loss": 45.0544, "step": 1812 }, { "epoch": 43.167164179104475, "grad_norm": 26.720060348510742, "learning_rate": 9.488095238095238e-06, "loss": 46.2092, "step": 1813 }, { "epoch": 43.191044776119405, "grad_norm": 35.375362396240234, "learning_rate": 9.484126984126984e-06, "loss": 46.0173, "step": 1814 }, { "epoch": 43.21492537313433, "grad_norm": 24.92397117614746, "learning_rate": 9.480158730158731e-06, "loss": 45.5031, "step": 1815 }, { "epoch": 43.23880597014925, "grad_norm": 35.76795959472656, "learning_rate": 9.476190476190476e-06, "loss": 44.8149, "step": 1816 }, { "epoch": 43.26268656716418, "grad_norm": 29.861675262451172, "learning_rate": 9.472222222222223e-06, "loss": 45.4173, "step": 1817 }, { "epoch": 43.286567164179104, "grad_norm": 33.83314895629883, "learning_rate": 9.468253968253969e-06, "loss": 44.8036, "step": 1818 }, { "epoch": 43.31044776119403, "grad_norm": 32.994483947753906, "learning_rate": 9.464285714285714e-06, "loss": 46.5555, "step": 1819 }, { "epoch": 43.33432835820896, "grad_norm": 23.94085693359375, "learning_rate": 9.460317460317461e-06, "loss": 45.9566, "step": 1820 }, { "epoch": 43.35820895522388, "grad_norm": 34.10947799682617, "learning_rate": 9.456349206349207e-06, "loss": 45.2182, "step": 1821 }, { "epoch": 43.3820895522388, "grad_norm": 23.844850540161133, "learning_rate": 9.452380952380952e-06, "loss": 45.5904, "step": 1822 }, { "epoch": 43.40597014925373, "grad_norm": 46.643768310546875, "learning_rate": 9.4484126984127e-06, "loss": 46.6924, "step": 1823 }, { "epoch": 43.429850746268656, "grad_norm": 32.49457931518555, "learning_rate": 9.444444444444445e-06, "loss": 44.024, "step": 1824 }, { "epoch": 43.45373134328358, "grad_norm": 32.36979293823242, "learning_rate": 9.440476190476192e-06, "loss": 45.5752, "step": 1825 }, { "epoch": 43.47761194029851, "grad_norm": 28.21212387084961, "learning_rate": 9.436507936507937e-06, "loss": 43.3649, "step": 1826 }, { "epoch": 43.50149253731343, "grad_norm": 30.494169235229492, "learning_rate": 9.432539682539682e-06, "loss": 44.4726, "step": 1827 }, { "epoch": 43.525373134328355, "grad_norm": 29.817806243896484, "learning_rate": 9.42857142857143e-06, "loss": 45.4315, "step": 1828 }, { "epoch": 43.549253731343285, "grad_norm": 32.68490219116211, "learning_rate": 9.424603174603175e-06, "loss": 45.1985, "step": 1829 }, { "epoch": 43.57313432835821, "grad_norm": 28.48166275024414, "learning_rate": 9.420634920634922e-06, "loss": 45.6737, "step": 1830 }, { "epoch": 43.59701492537313, "grad_norm": 30.532995223999023, "learning_rate": 9.416666666666667e-06, "loss": 45.9931, "step": 1831 }, { "epoch": 43.62089552238806, "grad_norm": 24.953765869140625, "learning_rate": 9.412698412698413e-06, "loss": 44.4189, "step": 1832 }, { "epoch": 43.644776119402984, "grad_norm": 23.647258758544922, "learning_rate": 9.40873015873016e-06, "loss": 44.6757, "step": 1833 }, { "epoch": 43.668656716417914, "grad_norm": 28.41623878479004, "learning_rate": 9.404761904761905e-06, "loss": 45.5732, "step": 1834 }, { "epoch": 43.69253731343284, "grad_norm": 25.599082946777344, "learning_rate": 9.40079365079365e-06, "loss": 45.0382, "step": 1835 }, { "epoch": 43.71641791044776, "grad_norm": 39.584144592285156, "learning_rate": 9.396825396825398e-06, "loss": 45.1531, "step": 1836 }, { "epoch": 43.74029850746269, "grad_norm": 30.606550216674805, "learning_rate": 9.392857142857143e-06, "loss": 44.3696, "step": 1837 }, { "epoch": 43.76417910447761, "grad_norm": 37.25154495239258, "learning_rate": 9.38888888888889e-06, "loss": 45.4297, "step": 1838 }, { "epoch": 43.788059701492536, "grad_norm": 30.60915184020996, "learning_rate": 9.384920634920636e-06, "loss": 45.2441, "step": 1839 }, { "epoch": 43.811940298507466, "grad_norm": 32.886268615722656, "learning_rate": 9.380952380952381e-06, "loss": 45.3913, "step": 1840 }, { "epoch": 43.83582089552239, "grad_norm": 27.98761749267578, "learning_rate": 9.376984126984128e-06, "loss": 45.2191, "step": 1841 }, { "epoch": 43.85970149253731, "grad_norm": 33.787261962890625, "learning_rate": 9.373015873015874e-06, "loss": 45.0051, "step": 1842 }, { "epoch": 43.88358208955224, "grad_norm": 26.90253257751465, "learning_rate": 9.36904761904762e-06, "loss": 45.9333, "step": 1843 }, { "epoch": 43.907462686567165, "grad_norm": 29.78704261779785, "learning_rate": 9.365079365079366e-06, "loss": 45.3598, "step": 1844 }, { "epoch": 43.93134328358209, "grad_norm": 24.871315002441406, "learning_rate": 9.361111111111111e-06, "loss": 44.4159, "step": 1845 }, { "epoch": 43.95522388059702, "grad_norm": 22.998323440551758, "learning_rate": 9.357142857142859e-06, "loss": 44.9989, "step": 1846 }, { "epoch": 43.97910447761194, "grad_norm": 28.724388122558594, "learning_rate": 9.353174603174604e-06, "loss": 46.0748, "step": 1847 }, { "epoch": 44.0, "grad_norm": 19.870426177978516, "learning_rate": 9.34920634920635e-06, "loss": 41.7823, "step": 1848 }, { "epoch": 44.02388059701492, "grad_norm": 28.000728607177734, "learning_rate": 9.345238095238096e-06, "loss": 44.3686, "step": 1849 }, { "epoch": 44.04776119402985, "grad_norm": 25.330766677856445, "learning_rate": 9.341269841269842e-06, "loss": 44.8766, "step": 1850 }, { "epoch": 44.071641791044776, "grad_norm": 30.589149475097656, "learning_rate": 9.337301587301589e-06, "loss": 46.3777, "step": 1851 }, { "epoch": 44.0955223880597, "grad_norm": 27.803207397460938, "learning_rate": 9.333333333333334e-06, "loss": 45.7627, "step": 1852 }, { "epoch": 44.11940298507463, "grad_norm": 28.11823081970215, "learning_rate": 9.32936507936508e-06, "loss": 44.9612, "step": 1853 }, { "epoch": 44.14328358208955, "grad_norm": 24.24823570251465, "learning_rate": 9.325396825396827e-06, "loss": 45.6512, "step": 1854 }, { "epoch": 44.167164179104475, "grad_norm": 29.81229019165039, "learning_rate": 9.321428571428572e-06, "loss": 43.7322, "step": 1855 }, { "epoch": 44.191044776119405, "grad_norm": 24.88245964050293, "learning_rate": 9.317460317460318e-06, "loss": 45.288, "step": 1856 }, { "epoch": 44.21492537313433, "grad_norm": 31.246389389038086, "learning_rate": 9.313492063492065e-06, "loss": 44.6547, "step": 1857 }, { "epoch": 44.23880597014925, "grad_norm": 29.363845825195312, "learning_rate": 9.30952380952381e-06, "loss": 44.7851, "step": 1858 }, { "epoch": 44.26268656716418, "grad_norm": 32.35028839111328, "learning_rate": 9.305555555555557e-06, "loss": 44.5643, "step": 1859 }, { "epoch": 44.286567164179104, "grad_norm": 31.52218246459961, "learning_rate": 9.301587301587303e-06, "loss": 45.293, "step": 1860 }, { "epoch": 44.31044776119403, "grad_norm": 29.180295944213867, "learning_rate": 9.297619047619048e-06, "loss": 45.298, "step": 1861 }, { "epoch": 44.33432835820896, "grad_norm": 27.626508712768555, "learning_rate": 9.293650793650795e-06, "loss": 45.1187, "step": 1862 }, { "epoch": 44.35820895522388, "grad_norm": 28.44379425048828, "learning_rate": 9.28968253968254e-06, "loss": 45.0835, "step": 1863 }, { "epoch": 44.3820895522388, "grad_norm": 29.45343017578125, "learning_rate": 9.285714285714288e-06, "loss": 45.5642, "step": 1864 }, { "epoch": 44.40597014925373, "grad_norm": 21.64850425720215, "learning_rate": 9.281746031746033e-06, "loss": 45.6837, "step": 1865 }, { "epoch": 44.429850746268656, "grad_norm": 35.32088088989258, "learning_rate": 9.277777777777778e-06, "loss": 44.9266, "step": 1866 }, { "epoch": 44.45373134328358, "grad_norm": 28.638429641723633, "learning_rate": 9.273809523809525e-06, "loss": 45.9407, "step": 1867 }, { "epoch": 44.47761194029851, "grad_norm": 31.444725036621094, "learning_rate": 9.26984126984127e-06, "loss": 45.4442, "step": 1868 }, { "epoch": 44.50149253731343, "grad_norm": 26.114784240722656, "learning_rate": 9.265873015873016e-06, "loss": 45.1998, "step": 1869 }, { "epoch": 44.525373134328355, "grad_norm": 24.51571273803711, "learning_rate": 9.261904761904763e-06, "loss": 45.0705, "step": 1870 }, { "epoch": 44.549253731343285, "grad_norm": 24.52007293701172, "learning_rate": 9.257936507936509e-06, "loss": 43.9359, "step": 1871 }, { "epoch": 44.57313432835821, "grad_norm": 17.876834869384766, "learning_rate": 9.253968253968256e-06, "loss": 44.5254, "step": 1872 }, { "epoch": 44.59701492537313, "grad_norm": 21.0299015045166, "learning_rate": 9.250000000000001e-06, "loss": 46.0916, "step": 1873 }, { "epoch": 44.62089552238806, "grad_norm": 30.12071990966797, "learning_rate": 9.246031746031747e-06, "loss": 44.1769, "step": 1874 }, { "epoch": 44.644776119402984, "grad_norm": 23.94618797302246, "learning_rate": 9.242063492063494e-06, "loss": 45.628, "step": 1875 }, { "epoch": 44.668656716417914, "grad_norm": 29.615930557250977, "learning_rate": 9.238095238095239e-06, "loss": 45.2762, "step": 1876 }, { "epoch": 44.69253731343284, "grad_norm": 30.00957489013672, "learning_rate": 9.234126984126986e-06, "loss": 45.6399, "step": 1877 }, { "epoch": 44.71641791044776, "grad_norm": 26.414703369140625, "learning_rate": 9.230158730158732e-06, "loss": 44.6988, "step": 1878 }, { "epoch": 44.74029850746269, "grad_norm": 28.785755157470703, "learning_rate": 9.226190476190477e-06, "loss": 45.4551, "step": 1879 }, { "epoch": 44.76417910447761, "grad_norm": 23.4616756439209, "learning_rate": 9.222222222222224e-06, "loss": 44.8668, "step": 1880 }, { "epoch": 44.788059701492536, "grad_norm": 25.046113967895508, "learning_rate": 9.218253968253968e-06, "loss": 45.8905, "step": 1881 }, { "epoch": 44.811940298507466, "grad_norm": 31.216581344604492, "learning_rate": 9.214285714285715e-06, "loss": 46.6996, "step": 1882 }, { "epoch": 44.83582089552239, "grad_norm": 22.215465545654297, "learning_rate": 9.21031746031746e-06, "loss": 46.1791, "step": 1883 }, { "epoch": 44.85970149253731, "grad_norm": 33.831214904785156, "learning_rate": 9.206349206349207e-06, "loss": 45.3197, "step": 1884 }, { "epoch": 44.88358208955224, "grad_norm": 24.447084426879883, "learning_rate": 9.202380952380953e-06, "loss": 45.2949, "step": 1885 }, { "epoch": 44.907462686567165, "grad_norm": 31.735240936279297, "learning_rate": 9.198412698412698e-06, "loss": 46.4555, "step": 1886 }, { "epoch": 44.93134328358209, "grad_norm": 27.23394012451172, "learning_rate": 9.194444444444445e-06, "loss": 45.9441, "step": 1887 }, { "epoch": 44.95522388059702, "grad_norm": 27.79869270324707, "learning_rate": 9.19047619047619e-06, "loss": 45.387, "step": 1888 }, { "epoch": 44.97910447761194, "grad_norm": 24.329313278198242, "learning_rate": 9.186507936507936e-06, "loss": 44.2934, "step": 1889 }, { "epoch": 45.0, "grad_norm": 22.191181182861328, "learning_rate": 9.182539682539683e-06, "loss": 39.0195, "step": 1890 }, { "epoch": 45.02388059701492, "grad_norm": 32.1130256652832, "learning_rate": 9.178571428571429e-06, "loss": 45.9272, "step": 1891 }, { "epoch": 45.04776119402985, "grad_norm": 19.621145248413086, "learning_rate": 9.174603174603176e-06, "loss": 44.3447, "step": 1892 }, { "epoch": 45.071641791044776, "grad_norm": 39.10493087768555, "learning_rate": 9.170634920634921e-06, "loss": 46.001, "step": 1893 }, { "epoch": 45.0955223880597, "grad_norm": 23.7473201751709, "learning_rate": 9.166666666666666e-06, "loss": 45.5786, "step": 1894 }, { "epoch": 45.11940298507463, "grad_norm": 30.535781860351562, "learning_rate": 9.162698412698414e-06, "loss": 46.3373, "step": 1895 }, { "epoch": 45.14328358208955, "grad_norm": 26.53186798095703, "learning_rate": 9.158730158730159e-06, "loss": 44.6074, "step": 1896 }, { "epoch": 45.167164179104475, "grad_norm": 31.9615478515625, "learning_rate": 9.154761904761906e-06, "loss": 43.229, "step": 1897 }, { "epoch": 45.191044776119405, "grad_norm": 28.577655792236328, "learning_rate": 9.150793650793651e-06, "loss": 45.6407, "step": 1898 }, { "epoch": 45.21492537313433, "grad_norm": 18.897531509399414, "learning_rate": 9.146825396825397e-06, "loss": 45.4092, "step": 1899 }, { "epoch": 45.23880597014925, "grad_norm": 31.126819610595703, "learning_rate": 9.142857142857144e-06, "loss": 43.966, "step": 1900 }, { "epoch": 45.26268656716418, "grad_norm": 26.312490463256836, "learning_rate": 9.13888888888889e-06, "loss": 45.3819, "step": 1901 }, { "epoch": 45.286567164179104, "grad_norm": 28.255640029907227, "learning_rate": 9.134920634920635e-06, "loss": 43.9926, "step": 1902 }, { "epoch": 45.31044776119403, "grad_norm": 27.333642959594727, "learning_rate": 9.130952380952382e-06, "loss": 45.7229, "step": 1903 }, { "epoch": 45.33432835820896, "grad_norm": 27.024580001831055, "learning_rate": 9.126984126984127e-06, "loss": 45.24, "step": 1904 }, { "epoch": 45.35820895522388, "grad_norm": 31.131914138793945, "learning_rate": 9.123015873015874e-06, "loss": 44.4842, "step": 1905 }, { "epoch": 45.3820895522388, "grad_norm": 27.244861602783203, "learning_rate": 9.11904761904762e-06, "loss": 45.0392, "step": 1906 }, { "epoch": 45.40597014925373, "grad_norm": 30.606016159057617, "learning_rate": 9.115079365079365e-06, "loss": 44.4968, "step": 1907 }, { "epoch": 45.429850746268656, "grad_norm": 22.56324577331543, "learning_rate": 9.111111111111112e-06, "loss": 45.5149, "step": 1908 }, { "epoch": 45.45373134328358, "grad_norm": 31.586326599121094, "learning_rate": 9.107142857142858e-06, "loss": 45.9413, "step": 1909 }, { "epoch": 45.47761194029851, "grad_norm": 23.143661499023438, "learning_rate": 9.103174603174603e-06, "loss": 44.3301, "step": 1910 }, { "epoch": 45.50149253731343, "grad_norm": 33.158111572265625, "learning_rate": 9.09920634920635e-06, "loss": 45.3503, "step": 1911 }, { "epoch": 45.525373134328355, "grad_norm": 26.259010314941406, "learning_rate": 9.095238095238095e-06, "loss": 44.125, "step": 1912 }, { "epoch": 45.549253731343285, "grad_norm": 25.72600555419922, "learning_rate": 9.091269841269843e-06, "loss": 45.8252, "step": 1913 }, { "epoch": 45.57313432835821, "grad_norm": 29.651403427124023, "learning_rate": 9.087301587301588e-06, "loss": 44.7603, "step": 1914 }, { "epoch": 45.59701492537313, "grad_norm": 24.896892547607422, "learning_rate": 9.083333333333333e-06, "loss": 45.3582, "step": 1915 }, { "epoch": 45.62089552238806, "grad_norm": 26.172271728515625, "learning_rate": 9.07936507936508e-06, "loss": 45.418, "step": 1916 }, { "epoch": 45.644776119402984, "grad_norm": 31.333498001098633, "learning_rate": 9.075396825396826e-06, "loss": 45.5952, "step": 1917 }, { "epoch": 45.668656716417914, "grad_norm": 23.452194213867188, "learning_rate": 9.071428571428573e-06, "loss": 45.8141, "step": 1918 }, { "epoch": 45.69253731343284, "grad_norm": 30.300634384155273, "learning_rate": 9.067460317460318e-06, "loss": 46.1877, "step": 1919 }, { "epoch": 45.71641791044776, "grad_norm": 24.516042709350586, "learning_rate": 9.063492063492064e-06, "loss": 44.0542, "step": 1920 }, { "epoch": 45.74029850746269, "grad_norm": 26.41005516052246, "learning_rate": 9.05952380952381e-06, "loss": 44.2296, "step": 1921 }, { "epoch": 45.76417910447761, "grad_norm": 23.099822998046875, "learning_rate": 9.055555555555556e-06, "loss": 45.2567, "step": 1922 }, { "epoch": 45.788059701492536, "grad_norm": 18.7821044921875, "learning_rate": 9.051587301587302e-06, "loss": 44.5807, "step": 1923 }, { "epoch": 45.811940298507466, "grad_norm": 31.705181121826172, "learning_rate": 9.047619047619049e-06, "loss": 45.1571, "step": 1924 }, { "epoch": 45.83582089552239, "grad_norm": 25.712608337402344, "learning_rate": 9.043650793650794e-06, "loss": 44.9665, "step": 1925 }, { "epoch": 45.85970149253731, "grad_norm": 31.790864944458008, "learning_rate": 9.039682539682541e-06, "loss": 45.6095, "step": 1926 }, { "epoch": 45.88358208955224, "grad_norm": 27.735107421875, "learning_rate": 9.035714285714287e-06, "loss": 45.8988, "step": 1927 }, { "epoch": 45.907462686567165, "grad_norm": 30.94534683227539, "learning_rate": 9.031746031746032e-06, "loss": 45.8302, "step": 1928 }, { "epoch": 45.93134328358209, "grad_norm": 23.146005630493164, "learning_rate": 9.027777777777779e-06, "loss": 45.3911, "step": 1929 }, { "epoch": 45.95522388059702, "grad_norm": 24.59404945373535, "learning_rate": 9.023809523809524e-06, "loss": 45.1403, "step": 1930 }, { "epoch": 45.97910447761194, "grad_norm": 25.62955665588379, "learning_rate": 9.019841269841272e-06, "loss": 44.8934, "step": 1931 }, { "epoch": 46.0, "grad_norm": 20.037391662597656, "learning_rate": 9.015873015873017e-06, "loss": 39.4122, "step": 1932 }, { "epoch": 46.02388059701492, "grad_norm": 25.78251075744629, "learning_rate": 9.011904761904762e-06, "loss": 45.7163, "step": 1933 }, { "epoch": 46.04776119402985, "grad_norm": 28.0667781829834, "learning_rate": 9.00793650793651e-06, "loss": 44.6447, "step": 1934 }, { "epoch": 46.071641791044776, "grad_norm": NaN, "learning_rate": 9.003968253968255e-06, "loss": 61.1269, "step": 1935 }, { "epoch": 46.0955223880597, "grad_norm": NaN, "learning_rate": 9.003968253968255e-06, "loss": 57.8669, "step": 1936 }, { "epoch": 46.11940298507463, "grad_norm": 19.022104263305664, "learning_rate": 9.003968253968255e-06, "loss": 46.0546, "step": 1937 }, { "epoch": 46.14328358208955, "grad_norm": 28.844619750976562, "learning_rate": 9e-06, "loss": 44.5077, "step": 1938 }, { "epoch": 46.167164179104475, "grad_norm": 23.570850372314453, "learning_rate": 8.996031746031747e-06, "loss": 44.8965, "step": 1939 }, { "epoch": 46.191044776119405, "grad_norm": 27.71855354309082, "learning_rate": 8.992063492063493e-06, "loss": 45.3302, "step": 1940 }, { "epoch": 46.21492537313433, "grad_norm": 23.61193084716797, "learning_rate": 8.98809523809524e-06, "loss": 45.4048, "step": 1941 }, { "epoch": 46.23880597014925, "grad_norm": 27.16132926940918, "learning_rate": 8.984126984126985e-06, "loss": 44.535, "step": 1942 }, { "epoch": 46.26268656716418, "grad_norm": 25.254039764404297, "learning_rate": 8.98015873015873e-06, "loss": 45.2944, "step": 1943 }, { "epoch": 46.286567164179104, "grad_norm": 28.196325302124023, "learning_rate": 8.976190476190478e-06, "loss": 44.0106, "step": 1944 }, { "epoch": 46.31044776119403, "grad_norm": 24.75798988342285, "learning_rate": 8.972222222222223e-06, "loss": 44.949, "step": 1945 }, { "epoch": 46.33432835820896, "grad_norm": 30.992849349975586, "learning_rate": 8.968253968253968e-06, "loss": 44.6185, "step": 1946 }, { "epoch": 46.35820895522388, "grad_norm": 28.122825622558594, "learning_rate": 8.964285714285716e-06, "loss": 46.7498, "step": 1947 }, { "epoch": 46.3820895522388, "grad_norm": 25.130678176879883, "learning_rate": 8.960317460317461e-06, "loss": 45.7823, "step": 1948 }, { "epoch": 46.40597014925373, "grad_norm": 26.97332763671875, "learning_rate": 8.956349206349208e-06, "loss": 44.8217, "step": 1949 }, { "epoch": 46.429850746268656, "grad_norm": 21.403100967407227, "learning_rate": 8.952380952380953e-06, "loss": 45.4608, "step": 1950 }, { "epoch": 46.45373134328358, "grad_norm": 30.794330596923828, "learning_rate": 8.948412698412699e-06, "loss": 45.0327, "step": 1951 }, { "epoch": 46.47761194029851, "grad_norm": 26.035839080810547, "learning_rate": 8.944444444444446e-06, "loss": 44.6979, "step": 1952 }, { "epoch": 46.50149253731343, "grad_norm": 21.501266479492188, "learning_rate": 8.940476190476191e-06, "loss": 44.6421, "step": 1953 }, { "epoch": 46.525373134328355, "grad_norm": 27.67610740661621, "learning_rate": 8.936507936507938e-06, "loss": 44.5721, "step": 1954 }, { "epoch": 46.549253731343285, "grad_norm": 24.71251678466797, "learning_rate": 8.932539682539684e-06, "loss": 45.2891, "step": 1955 }, { "epoch": 46.57313432835821, "grad_norm": 32.72700500488281, "learning_rate": 8.92857142857143e-06, "loss": 45.0829, "step": 1956 }, { "epoch": 46.59701492537313, "grad_norm": 26.203643798828125, "learning_rate": 8.924603174603176e-06, "loss": 44.9264, "step": 1957 }, { "epoch": 46.62089552238806, "grad_norm": 25.362638473510742, "learning_rate": 8.920634920634922e-06, "loss": 45.1448, "step": 1958 }, { "epoch": 46.644776119402984, "grad_norm": 25.224456787109375, "learning_rate": 8.916666666666667e-06, "loss": 45.6017, "step": 1959 }, { "epoch": 46.668656716417914, "grad_norm": 29.02377700805664, "learning_rate": 8.912698412698414e-06, "loss": 45.5859, "step": 1960 }, { "epoch": 46.69253731343284, "grad_norm": 25.2493896484375, "learning_rate": 8.90873015873016e-06, "loss": 44.3262, "step": 1961 }, { "epoch": 46.71641791044776, "grad_norm": 24.432043075561523, "learning_rate": 8.904761904761905e-06, "loss": 44.0005, "step": 1962 }, { "epoch": 46.74029850746269, "grad_norm": 23.06245994567871, "learning_rate": 8.90079365079365e-06, "loss": 45.2406, "step": 1963 }, { "epoch": 46.76417910447761, "grad_norm": 27.603015899658203, "learning_rate": 8.896825396825398e-06, "loss": 45.2547, "step": 1964 }, { "epoch": 46.788059701492536, "grad_norm": 26.66181182861328, "learning_rate": 8.892857142857143e-06, "loss": 45.0288, "step": 1965 }, { "epoch": 46.811940298507466, "grad_norm": 19.665678024291992, "learning_rate": 8.888888888888888e-06, "loss": 45.1412, "step": 1966 }, { "epoch": 46.83582089552239, "grad_norm": 31.3046932220459, "learning_rate": 8.884920634920635e-06, "loss": 45.7144, "step": 1967 }, { "epoch": 46.85970149253731, "grad_norm": 24.661293029785156, "learning_rate": 8.88095238095238e-06, "loss": 43.9468, "step": 1968 }, { "epoch": 46.88358208955224, "grad_norm": 25.421525955200195, "learning_rate": 8.876984126984128e-06, "loss": 45.4404, "step": 1969 }, { "epoch": 46.907462686567165, "grad_norm": 30.11313247680664, "learning_rate": 8.873015873015873e-06, "loss": 44.4083, "step": 1970 }, { "epoch": 46.93134328358209, "grad_norm": 24.19677734375, "learning_rate": 8.869047619047619e-06, "loss": 45.5387, "step": 1971 }, { "epoch": 46.95522388059702, "grad_norm": 25.183414459228516, "learning_rate": 8.865079365079366e-06, "loss": 45.2725, "step": 1972 }, { "epoch": 46.97910447761194, "grad_norm": 22.570981979370117, "learning_rate": 8.861111111111111e-06, "loss": 44.1263, "step": 1973 }, { "epoch": 47.0, "grad_norm": 27.16869354248047, "learning_rate": 8.857142857142858e-06, "loss": 39.0382, "step": 1974 }, { "epoch": 47.02388059701492, "grad_norm": 27.326980590820312, "learning_rate": 8.853174603174604e-06, "loss": 45.0956, "step": 1975 }, { "epoch": 47.04776119402985, "grad_norm": 25.321685791015625, "learning_rate": 8.849206349206349e-06, "loss": 45.1531, "step": 1976 }, { "epoch": 47.071641791044776, "grad_norm": 29.480770111083984, "learning_rate": 8.845238095238096e-06, "loss": 44.7925, "step": 1977 }, { "epoch": 47.0955223880597, "grad_norm": 29.82880210876465, "learning_rate": 8.841269841269842e-06, "loss": 45.6435, "step": 1978 }, { "epoch": 47.11940298507463, "grad_norm": 31.852386474609375, "learning_rate": 8.837301587301587e-06, "loss": 45.0481, "step": 1979 }, { "epoch": 47.14328358208955, "grad_norm": 27.80265235900879, "learning_rate": 8.833333333333334e-06, "loss": 44.7472, "step": 1980 }, { "epoch": 47.167164179104475, "grad_norm": NaN, "learning_rate": 8.82936507936508e-06, "loss": 38.8619, "step": 1981 }, { "epoch": 47.191044776119405, "grad_norm": 24.525455474853516, "learning_rate": 8.82936507936508e-06, "loss": 44.8093, "step": 1982 }, { "epoch": 47.21492537313433, "grad_norm": 26.450302124023438, "learning_rate": 8.825396825396827e-06, "loss": 44.7615, "step": 1983 }, { "epoch": 47.23880597014925, "grad_norm": 22.493268966674805, "learning_rate": 8.821428571428572e-06, "loss": 44.5445, "step": 1984 }, { "epoch": 47.26268656716418, "grad_norm": 26.506013870239258, "learning_rate": 8.817460317460317e-06, "loss": 45.4412, "step": 1985 }, { "epoch": 47.286567164179104, "grad_norm": 23.09911346435547, "learning_rate": 8.813492063492064e-06, "loss": 44.8791, "step": 1986 }, { "epoch": 47.31044776119403, "grad_norm": 21.34832191467285, "learning_rate": 8.80952380952381e-06, "loss": 44.8867, "step": 1987 }, { "epoch": 47.33432835820896, "grad_norm": 25.69770050048828, "learning_rate": 8.805555555555557e-06, "loss": 45.0307, "step": 1988 }, { "epoch": 47.35820895522388, "grad_norm": 27.75917625427246, "learning_rate": 8.801587301587302e-06, "loss": 43.7733, "step": 1989 }, { "epoch": 47.3820895522388, "grad_norm": 24.314449310302734, "learning_rate": 8.797619047619048e-06, "loss": 44.8685, "step": 1990 }, { "epoch": 47.40597014925373, "grad_norm": 22.21106719970703, "learning_rate": 8.793650793650795e-06, "loss": 45.2589, "step": 1991 }, { "epoch": 47.429850746268656, "grad_norm": 28.61949920654297, "learning_rate": 8.78968253968254e-06, "loss": 45.7972, "step": 1992 }, { "epoch": 47.45373134328358, "grad_norm": 27.726839065551758, "learning_rate": 8.785714285714286e-06, "loss": 44.0989, "step": 1993 }, { "epoch": 47.47761194029851, "grad_norm": 24.9364013671875, "learning_rate": 8.781746031746033e-06, "loss": 44.9365, "step": 1994 }, { "epoch": 47.50149253731343, "grad_norm": 23.380905151367188, "learning_rate": 8.777777777777778e-06, "loss": 44.9662, "step": 1995 }, { "epoch": 47.525373134328355, "grad_norm": 22.02720832824707, "learning_rate": 8.773809523809525e-06, "loss": 45.1456, "step": 1996 }, { "epoch": 47.549253731343285, "grad_norm": NaN, "learning_rate": 8.76984126984127e-06, "loss": 60.0243, "step": 1997 }, { "epoch": 47.57313432835821, "grad_norm": 21.263904571533203, "learning_rate": 8.76984126984127e-06, "loss": 44.6697, "step": 1998 }, { "epoch": 47.59701492537313, "grad_norm": 25.381332397460938, "learning_rate": 8.765873015873016e-06, "loss": 44.9032, "step": 1999 }, { "epoch": 47.62089552238806, "grad_norm": 24.297027587890625, "learning_rate": 8.761904761904763e-06, "loss": 44.5833, "step": 2000 }, { "epoch": 47.644776119402984, "grad_norm": 26.303585052490234, "learning_rate": 8.757936507936508e-06, "loss": 45.252, "step": 2001 }, { "epoch": 47.668656716417914, "grad_norm": 23.310070037841797, "learning_rate": 8.753968253968254e-06, "loss": 45.0068, "step": 2002 }, { "epoch": 47.69253731343284, "grad_norm": 30.19032859802246, "learning_rate": 8.750000000000001e-06, "loss": 46.1286, "step": 2003 }, { "epoch": 47.71641791044776, "grad_norm": 27.43839454650879, "learning_rate": 8.746031746031746e-06, "loss": 46.5151, "step": 2004 }, { "epoch": 47.74029850746269, "grad_norm": 24.49736976623535, "learning_rate": 8.742063492063493e-06, "loss": 45.2309, "step": 2005 }, { "epoch": 47.76417910447761, "grad_norm": 32.9915885925293, "learning_rate": 8.738095238095239e-06, "loss": 44.221, "step": 2006 }, { "epoch": 47.788059701492536, "grad_norm": 27.080114364624023, "learning_rate": 8.734126984126984e-06, "loss": 44.4515, "step": 2007 }, { "epoch": 47.811940298507466, "grad_norm": 34.84925079345703, "learning_rate": 8.730158730158731e-06, "loss": 44.5223, "step": 2008 }, { "epoch": 47.83582089552239, "grad_norm": 28.061695098876953, "learning_rate": 8.726190476190477e-06, "loss": 45.6776, "step": 2009 }, { "epoch": 47.85970149253731, "grad_norm": 35.316009521484375, "learning_rate": 8.722222222222224e-06, "loss": 45.6784, "step": 2010 }, { "epoch": 47.88358208955224, "grad_norm": 29.395872116088867, "learning_rate": 8.71825396825397e-06, "loss": 46.054, "step": 2011 }, { "epoch": 47.907462686567165, "grad_norm": 31.359512329101562, "learning_rate": 8.714285714285715e-06, "loss": 44.6921, "step": 2012 }, { "epoch": 47.93134328358209, "grad_norm": 24.621870040893555, "learning_rate": 8.710317460317462e-06, "loss": 45.8119, "step": 2013 }, { "epoch": 47.95522388059702, "grad_norm": 30.466150283813477, "learning_rate": 8.706349206349207e-06, "loss": 44.5282, "step": 2014 }, { "epoch": 47.97910447761194, "grad_norm": 29.490886688232422, "learning_rate": 8.702380952380952e-06, "loss": 45.2275, "step": 2015 }, { "epoch": 48.0, "grad_norm": 18.86721420288086, "learning_rate": 8.6984126984127e-06, "loss": 38.1757, "step": 2016 }, { "epoch": 48.02388059701492, "grad_norm": 34.39149856567383, "learning_rate": 8.694444444444445e-06, "loss": 45.4931, "step": 2017 }, { "epoch": 48.04776119402985, "grad_norm": 28.87833023071289, "learning_rate": 8.690476190476192e-06, "loss": 45.3396, "step": 2018 }, { "epoch": 48.071641791044776, "grad_norm": 36.20280838012695, "learning_rate": 8.686507936507937e-06, "loss": 44.7758, "step": 2019 }, { "epoch": 48.0955223880597, "grad_norm": 30.76156234741211, "learning_rate": 8.682539682539683e-06, "loss": 44.2899, "step": 2020 }, { "epoch": 48.11940298507463, "grad_norm": 36.33967208862305, "learning_rate": 8.67857142857143e-06, "loss": 44.6879, "step": 2021 }, { "epoch": 48.14328358208955, "grad_norm": 30.22699737548828, "learning_rate": 8.674603174603175e-06, "loss": 45.8113, "step": 2022 }, { "epoch": 48.167164179104475, "grad_norm": 30.748640060424805, "learning_rate": 8.670634920634922e-06, "loss": 44.048, "step": 2023 }, { "epoch": 48.191044776119405, "grad_norm": 25.484418869018555, "learning_rate": 8.666666666666668e-06, "loss": 44.9645, "step": 2024 }, { "epoch": 48.21492537313433, "grad_norm": 33.34728240966797, "learning_rate": 8.662698412698413e-06, "loss": 44.2533, "step": 2025 }, { "epoch": 48.23880597014925, "grad_norm": 24.65802764892578, "learning_rate": 8.65873015873016e-06, "loss": 45.9453, "step": 2026 }, { "epoch": 48.26268656716418, "grad_norm": 30.4432373046875, "learning_rate": 8.654761904761906e-06, "loss": 45.8027, "step": 2027 }, { "epoch": 48.286567164179104, "grad_norm": 22.55684471130371, "learning_rate": 8.650793650793651e-06, "loss": 45.6855, "step": 2028 }, { "epoch": 48.31044776119403, "grad_norm": 22.167613983154297, "learning_rate": 8.646825396825398e-06, "loss": 44.3946, "step": 2029 }, { "epoch": 48.33432835820896, "grad_norm": 27.42496681213379, "learning_rate": 8.642857142857144e-06, "loss": 45.3506, "step": 2030 }, { "epoch": 48.35820895522388, "grad_norm": 24.647188186645508, "learning_rate": 8.63888888888889e-06, "loss": 44.3746, "step": 2031 }, { "epoch": 48.3820895522388, "grad_norm": 28.068981170654297, "learning_rate": 8.634920634920636e-06, "loss": 44.7821, "step": 2032 }, { "epoch": 48.40597014925373, "grad_norm": 22.093984603881836, "learning_rate": 8.630952380952381e-06, "loss": 43.8444, "step": 2033 }, { "epoch": 48.429850746268656, "grad_norm": 33.278778076171875, "learning_rate": 8.626984126984129e-06, "loss": 44.8849, "step": 2034 }, { "epoch": 48.45373134328358, "grad_norm": 23.357349395751953, "learning_rate": 8.623015873015874e-06, "loss": 44.8346, "step": 2035 }, { "epoch": 48.47761194029851, "grad_norm": 29.543947219848633, "learning_rate": 8.61904761904762e-06, "loss": 45.8072, "step": 2036 }, { "epoch": 48.50149253731343, "grad_norm": 24.81306266784668, "learning_rate": 8.615079365079366e-06, "loss": 43.6868, "step": 2037 }, { "epoch": 48.525373134328355, "grad_norm": 30.09635353088379, "learning_rate": 8.611111111111112e-06, "loss": 45.1631, "step": 2038 }, { "epoch": 48.549253731343285, "grad_norm": 26.751686096191406, "learning_rate": 8.607142857142859e-06, "loss": 44.5276, "step": 2039 }, { "epoch": 48.57313432835821, "grad_norm": 22.96086883544922, "learning_rate": 8.603174603174604e-06, "loss": 45.5322, "step": 2040 }, { "epoch": 48.59701492537313, "grad_norm": 30.90753173828125, "learning_rate": 8.59920634920635e-06, "loss": 44.5476, "step": 2041 }, { "epoch": 48.62089552238806, "grad_norm": 22.072256088256836, "learning_rate": 8.595238095238097e-06, "loss": 45.3412, "step": 2042 }, { "epoch": 48.644776119402984, "grad_norm": 37.27132034301758, "learning_rate": 8.591269841269842e-06, "loss": 43.9968, "step": 2043 }, { "epoch": 48.668656716417914, "grad_norm": 31.473464965820312, "learning_rate": 8.587301587301588e-06, "loss": 46.7003, "step": 2044 }, { "epoch": 48.69253731343284, "grad_norm": 41.3200798034668, "learning_rate": 8.583333333333333e-06, "loss": 44.9254, "step": 2045 }, { "epoch": 48.71641791044776, "grad_norm": 28.326889038085938, "learning_rate": 8.57936507936508e-06, "loss": 45.4611, "step": 2046 }, { "epoch": 48.74029850746269, "grad_norm": 42.016624450683594, "learning_rate": 8.575396825396826e-06, "loss": 45.9752, "step": 2047 }, { "epoch": 48.76417910447761, "grad_norm": 39.264827728271484, "learning_rate": 8.571428571428571e-06, "loss": 45.9133, "step": 2048 }, { "epoch": 48.788059701492536, "grad_norm": 36.876461029052734, "learning_rate": 8.567460317460318e-06, "loss": 44.052, "step": 2049 }, { "epoch": 48.811940298507466, "grad_norm": 33.36867141723633, "learning_rate": 8.563492063492063e-06, "loss": 44.8014, "step": 2050 }, { "epoch": 48.83582089552239, "grad_norm": 33.16298294067383, "learning_rate": 8.55952380952381e-06, "loss": 44.005, "step": 2051 }, { "epoch": 48.85970149253731, "grad_norm": 32.4409065246582, "learning_rate": 8.555555555555556e-06, "loss": 44.2993, "step": 2052 }, { "epoch": 48.88358208955224, "grad_norm": 32.56459426879883, "learning_rate": 8.551587301587301e-06, "loss": 45.2025, "step": 2053 }, { "epoch": 48.907462686567165, "grad_norm": 30.31665802001953, "learning_rate": 8.547619047619048e-06, "loss": 43.8506, "step": 2054 }, { "epoch": 48.93134328358209, "grad_norm": 29.07672119140625, "learning_rate": 8.543650793650794e-06, "loss": 44.2567, "step": 2055 }, { "epoch": 48.95522388059702, "grad_norm": 24.603849411010742, "learning_rate": 8.53968253968254e-06, "loss": 44.5072, "step": 2056 }, { "epoch": 48.97910447761194, "grad_norm": 26.305355072021484, "learning_rate": 8.535714285714286e-06, "loss": 45.2023, "step": 2057 }, { "epoch": 49.0, "grad_norm": 20.483905792236328, "learning_rate": 8.531746031746032e-06, "loss": 38.3416, "step": 2058 }, { "epoch": 49.02388059701492, "grad_norm": 18.845535278320312, "learning_rate": 8.527777777777779e-06, "loss": 44.0003, "step": 2059 }, { "epoch": 49.04776119402985, "grad_norm": 20.018390655517578, "learning_rate": 8.523809523809524e-06, "loss": 45.5951, "step": 2060 }, { "epoch": 49.071641791044776, "grad_norm": 18.276540756225586, "learning_rate": 8.51984126984127e-06, "loss": 45.4302, "step": 2061 }, { "epoch": 49.0955223880597, "grad_norm": 18.592966079711914, "learning_rate": 8.515873015873017e-06, "loss": 44.9415, "step": 2062 }, { "epoch": 49.11940298507463, "grad_norm": NaN, "learning_rate": 8.511904761904762e-06, "loss": 77.195, "step": 2063 }, { "epoch": 49.14328358208955, "grad_norm": 23.695045471191406, "learning_rate": 8.511904761904762e-06, "loss": 45.1853, "step": 2064 }, { "epoch": 49.167164179104475, "grad_norm": 16.90850830078125, "learning_rate": 8.507936507936509e-06, "loss": 44.0122, "step": 2065 }, { "epoch": 49.191044776119405, "grad_norm": 30.50786781311035, "learning_rate": 8.503968253968255e-06, "loss": 44.8398, "step": 2066 }, { "epoch": 49.21492537313433, "grad_norm": 24.35599136352539, "learning_rate": 8.5e-06, "loss": 43.4544, "step": 2067 }, { "epoch": 49.23880597014925, "grad_norm": 29.541887283325195, "learning_rate": 8.496031746031747e-06, "loss": 45.1471, "step": 2068 }, { "epoch": 49.26268656716418, "grad_norm": 20.277528762817383, "learning_rate": 8.492063492063492e-06, "loss": 45.1862, "step": 2069 }, { "epoch": 49.286567164179104, "grad_norm": 33.5463752746582, "learning_rate": 8.488095238095238e-06, "loss": 43.5467, "step": 2070 }, { "epoch": 49.31044776119403, "grad_norm": 23.218936920166016, "learning_rate": 8.484126984126985e-06, "loss": 44.6577, "step": 2071 }, { "epoch": 49.33432835820896, "grad_norm": 36.53571701049805, "learning_rate": 8.48015873015873e-06, "loss": 46.4774, "step": 2072 }, { "epoch": 49.35820895522388, "grad_norm": 32.15842819213867, "learning_rate": 8.476190476190477e-06, "loss": 45.3236, "step": 2073 }, { "epoch": 49.3820895522388, "grad_norm": 29.57740020751953, "learning_rate": 8.472222222222223e-06, "loss": 44.7034, "step": 2074 }, { "epoch": 49.40597014925373, "grad_norm": 28.12784194946289, "learning_rate": 8.468253968253968e-06, "loss": 43.741, "step": 2075 }, { "epoch": 49.429850746268656, "grad_norm": 28.08392906188965, "learning_rate": 8.464285714285715e-06, "loss": 45.326, "step": 2076 }, { "epoch": 49.45373134328358, "grad_norm": 24.909330368041992, "learning_rate": 8.46031746031746e-06, "loss": 45.979, "step": 2077 }, { "epoch": 49.47761194029851, "grad_norm": 26.343902587890625, "learning_rate": 8.456349206349208e-06, "loss": 44.1665, "step": 2078 }, { "epoch": 49.50149253731343, "grad_norm": 30.070533752441406, "learning_rate": 8.452380952380953e-06, "loss": 45.1331, "step": 2079 }, { "epoch": 49.525373134328355, "grad_norm": 26.733827590942383, "learning_rate": 8.448412698412699e-06, "loss": 43.9576, "step": 2080 }, { "epoch": 49.549253731343285, "grad_norm": 31.43610191345215, "learning_rate": 8.444444444444446e-06, "loss": 44.3933, "step": 2081 }, { "epoch": 49.57313432835821, "grad_norm": 24.856496810913086, "learning_rate": 8.440476190476191e-06, "loss": 44.561, "step": 2082 }, { "epoch": 49.59701492537313, "grad_norm": 30.097368240356445, "learning_rate": 8.436507936507936e-06, "loss": 44.617, "step": 2083 }, { "epoch": 49.62089552238806, "grad_norm": 26.63928985595703, "learning_rate": 8.432539682539684e-06, "loss": 45.1091, "step": 2084 }, { "epoch": 49.644776119402984, "grad_norm": 33.428932189941406, "learning_rate": 8.428571428571429e-06, "loss": 45.8576, "step": 2085 }, { "epoch": 49.668656716417914, "grad_norm": 26.33061408996582, "learning_rate": 8.424603174603176e-06, "loss": 46.6266, "step": 2086 }, { "epoch": 49.69253731343284, "grad_norm": 35.67467498779297, "learning_rate": 8.420634920634921e-06, "loss": 43.8886, "step": 2087 }, { "epoch": 49.71641791044776, "grad_norm": 33.62556076049805, "learning_rate": 8.416666666666667e-06, "loss": 44.819, "step": 2088 }, { "epoch": 49.74029850746269, "grad_norm": 29.146684646606445, "learning_rate": 8.412698412698414e-06, "loss": 45.1877, "step": 2089 }, { "epoch": 49.76417910447761, "grad_norm": 29.51055335998535, "learning_rate": 8.40873015873016e-06, "loss": 44.9054, "step": 2090 }, { "epoch": 49.788059701492536, "grad_norm": 31.709413528442383, "learning_rate": 8.404761904761905e-06, "loss": 44.8456, "step": 2091 }, { "epoch": 49.811940298507466, "grad_norm": 26.646390914916992, "learning_rate": 8.400793650793652e-06, "loss": 44.1815, "step": 2092 }, { "epoch": 49.83582089552239, "grad_norm": 35.582496643066406, "learning_rate": 8.396825396825397e-06, "loss": 44.9951, "step": 2093 }, { "epoch": 49.85970149253731, "grad_norm": 25.587371826171875, "learning_rate": 8.392857142857144e-06, "loss": 44.3349, "step": 2094 }, { "epoch": 49.88358208955224, "grad_norm": 29.13399887084961, "learning_rate": 8.38888888888889e-06, "loss": 45.28, "step": 2095 }, { "epoch": 49.907462686567165, "grad_norm": 21.462890625, "learning_rate": 8.384920634920635e-06, "loss": 44.4383, "step": 2096 }, { "epoch": 49.93134328358209, "grad_norm": 31.970626831054688, "learning_rate": 8.380952380952382e-06, "loss": 45.989, "step": 2097 }, { "epoch": 49.95522388059702, "grad_norm": 21.948705673217773, "learning_rate": 8.376984126984128e-06, "loss": 44.0871, "step": 2098 }, { "epoch": 49.97910447761194, "grad_norm": 35.07805252075195, "learning_rate": 8.373015873015875e-06, "loss": 44.709, "step": 2099 }, { "epoch": 50.0, "grad_norm": 21.554956436157227, "learning_rate": 8.36904761904762e-06, "loss": 38.6725, "step": 2100 }, { "epoch": 50.02388059701492, "grad_norm": 35.4162712097168, "learning_rate": 8.365079365079365e-06, "loss": 44.2866, "step": 2101 }, { "epoch": 50.04776119402985, "grad_norm": 31.357215881347656, "learning_rate": 8.361111111111113e-06, "loss": 44.9399, "step": 2102 }, { "epoch": 50.071641791044776, "grad_norm": 28.055850982666016, "learning_rate": 8.357142857142858e-06, "loss": 44.2145, "step": 2103 }, { "epoch": 50.0955223880597, "grad_norm": 27.62700080871582, "learning_rate": 8.353174603174603e-06, "loss": 44.715, "step": 2104 }, { "epoch": 50.11940298507463, "grad_norm": 32.586219787597656, "learning_rate": 8.34920634920635e-06, "loss": 45.6174, "step": 2105 }, { "epoch": 50.14328358208955, "grad_norm": 24.922584533691406, "learning_rate": 8.345238095238096e-06, "loss": 46.0653, "step": 2106 }, { "epoch": 50.167164179104475, "grad_norm": 29.282079696655273, "learning_rate": 8.341269841269843e-06, "loss": 44.8826, "step": 2107 }, { "epoch": 50.191044776119405, "grad_norm": 25.85003089904785, "learning_rate": 8.337301587301588e-06, "loss": 43.7337, "step": 2108 }, { "epoch": 50.21492537313433, "grad_norm": 26.331398010253906, "learning_rate": 8.333333333333334e-06, "loss": 44.9624, "step": 2109 }, { "epoch": 50.23880597014925, "grad_norm": 19.595951080322266, "learning_rate": 8.32936507936508e-06, "loss": 45.0561, "step": 2110 }, { "epoch": 50.26268656716418, "grad_norm": 18.431438446044922, "learning_rate": 8.325396825396826e-06, "loss": 44.6963, "step": 2111 }, { "epoch": 50.286567164179104, "grad_norm": 20.670730590820312, "learning_rate": 8.321428571428573e-06, "loss": 44.6057, "step": 2112 }, { "epoch": 50.31044776119403, "grad_norm": 20.497106552124023, "learning_rate": 8.317460317460319e-06, "loss": 45.6219, "step": 2113 }, { "epoch": 50.33432835820896, "grad_norm": 21.33808708190918, "learning_rate": 8.313492063492064e-06, "loss": 43.6802, "step": 2114 }, { "epoch": 50.35820895522388, "grad_norm": 17.015180587768555, "learning_rate": 8.309523809523811e-06, "loss": 45.6156, "step": 2115 }, { "epoch": 50.3820895522388, "grad_norm": 25.82108497619629, "learning_rate": 8.305555555555557e-06, "loss": 45.529, "step": 2116 }, { "epoch": 50.40597014925373, "grad_norm": 20.37699317932129, "learning_rate": 8.301587301587302e-06, "loss": 44.4007, "step": 2117 }, { "epoch": 50.429850746268656, "grad_norm": 24.1844482421875, "learning_rate": 8.297619047619049e-06, "loss": 45.0155, "step": 2118 }, { "epoch": 50.45373134328358, "grad_norm": 21.229581832885742, "learning_rate": 8.293650793650794e-06, "loss": 44.8109, "step": 2119 }, { "epoch": 50.47761194029851, "grad_norm": 23.752500534057617, "learning_rate": 8.289682539682542e-06, "loss": 45.1129, "step": 2120 }, { "epoch": 50.50149253731343, "grad_norm": 19.724092483520508, "learning_rate": 8.285714285714287e-06, "loss": 44.1519, "step": 2121 }, { "epoch": 50.525373134328355, "grad_norm": 21.154827117919922, "learning_rate": 8.281746031746032e-06, "loss": 43.8136, "step": 2122 }, { "epoch": 50.549253731343285, "grad_norm": 21.17751121520996, "learning_rate": 8.277777777777778e-06, "loss": 44.7593, "step": 2123 }, { "epoch": 50.57313432835821, "grad_norm": 24.729738235473633, "learning_rate": 8.273809523809523e-06, "loss": 44.7794, "step": 2124 }, { "epoch": 50.59701492537313, "grad_norm": 18.432241439819336, "learning_rate": 8.26984126984127e-06, "loss": 44.0237, "step": 2125 }, { "epoch": 50.62089552238806, "grad_norm": 26.357515335083008, "learning_rate": 8.265873015873016e-06, "loss": 45.2566, "step": 2126 }, { "epoch": 50.644776119402984, "grad_norm": 24.270259857177734, "learning_rate": 8.261904761904763e-06, "loss": 44.1182, "step": 2127 }, { "epoch": 50.668656716417914, "grad_norm": 20.756067276000977, "learning_rate": 8.257936507936508e-06, "loss": 46.2374, "step": 2128 }, { "epoch": 50.69253731343284, "grad_norm": 23.159393310546875, "learning_rate": 8.253968253968254e-06, "loss": 44.1878, "step": 2129 }, { "epoch": 50.71641791044776, "grad_norm": 22.44221305847168, "learning_rate": 8.25e-06, "loss": 45.3746, "step": 2130 }, { "epoch": 50.74029850746269, "grad_norm": 20.27827262878418, "learning_rate": 8.246031746031746e-06, "loss": 44.1278, "step": 2131 }, { "epoch": 50.76417910447761, "grad_norm": 21.407669067382812, "learning_rate": 8.242063492063493e-06, "loss": 44.8487, "step": 2132 }, { "epoch": 50.788059701492536, "grad_norm": 24.570688247680664, "learning_rate": 8.238095238095239e-06, "loss": 44.2913, "step": 2133 }, { "epoch": 50.811940298507466, "grad_norm": 23.73247528076172, "learning_rate": 8.234126984126984e-06, "loss": 45.4539, "step": 2134 }, { "epoch": 50.83582089552239, "grad_norm": 20.265886306762695, "learning_rate": 8.230158730158731e-06, "loss": 43.1901, "step": 2135 }, { "epoch": 50.85970149253731, "grad_norm": 16.51488494873047, "learning_rate": 8.226190476190476e-06, "loss": 45.0321, "step": 2136 }, { "epoch": 50.88358208955224, "grad_norm": 19.107425689697266, "learning_rate": 8.222222222222222e-06, "loss": 44.3746, "step": 2137 }, { "epoch": 50.907462686567165, "grad_norm": 19.300790786743164, "learning_rate": 8.218253968253969e-06, "loss": 45.1466, "step": 2138 }, { "epoch": 50.93134328358209, "grad_norm": 19.817272186279297, "learning_rate": 8.214285714285714e-06, "loss": 44.9703, "step": 2139 }, { "epoch": 50.95522388059702, "grad_norm": 22.794174194335938, "learning_rate": 8.210317460317461e-06, "loss": 43.917, "step": 2140 }, { "epoch": 50.97910447761194, "grad_norm": 18.948871612548828, "learning_rate": 8.206349206349207e-06, "loss": 44.4099, "step": 2141 }, { "epoch": 51.0, "grad_norm": 13.966577529907227, "learning_rate": 8.202380952380952e-06, "loss": 38.9733, "step": 2142 }, { "epoch": 51.02388059701492, "grad_norm": 29.5616397857666, "learning_rate": 8.1984126984127e-06, "loss": 44.8355, "step": 2143 }, { "epoch": 51.04776119402985, "grad_norm": 22.391014099121094, "learning_rate": 8.194444444444445e-06, "loss": 44.6835, "step": 2144 }, { "epoch": 51.071641791044776, "grad_norm": 28.830854415893555, "learning_rate": 8.190476190476192e-06, "loss": 43.3011, "step": 2145 }, { "epoch": 51.0955223880597, "grad_norm": 21.114011764526367, "learning_rate": 8.186507936507937e-06, "loss": 44.4223, "step": 2146 }, { "epoch": 51.11940298507463, "grad_norm": 28.902416229248047, "learning_rate": 8.182539682539683e-06, "loss": 44.0485, "step": 2147 }, { "epoch": 51.14328358208955, "grad_norm": 21.923168182373047, "learning_rate": 8.17857142857143e-06, "loss": 45.3272, "step": 2148 }, { "epoch": 51.167164179104475, "grad_norm": 28.772884368896484, "learning_rate": 8.174603174603175e-06, "loss": 45.6205, "step": 2149 }, { "epoch": 51.191044776119405, "grad_norm": 23.949098587036133, "learning_rate": 8.17063492063492e-06, "loss": 45.0204, "step": 2150 }, { "epoch": 51.21492537313433, "grad_norm": 26.735624313354492, "learning_rate": 8.166666666666668e-06, "loss": 45.6338, "step": 2151 }, { "epoch": 51.23880597014925, "grad_norm": 28.049888610839844, "learning_rate": 8.162698412698413e-06, "loss": 44.2502, "step": 2152 }, { "epoch": 51.26268656716418, "grad_norm": 23.256439208984375, "learning_rate": 8.15873015873016e-06, "loss": 44.1981, "step": 2153 }, { "epoch": 51.286567164179104, "grad_norm": 32.3640022277832, "learning_rate": 8.154761904761905e-06, "loss": 43.6928, "step": 2154 }, { "epoch": 51.31044776119403, "grad_norm": 23.900907516479492, "learning_rate": 8.15079365079365e-06, "loss": 45.3594, "step": 2155 }, { "epoch": 51.33432835820896, "grad_norm": 39.41314697265625, "learning_rate": 8.146825396825398e-06, "loss": 44.5862, "step": 2156 }, { "epoch": 51.35820895522388, "grad_norm": 31.826566696166992, "learning_rate": 8.142857142857143e-06, "loss": 44.6213, "step": 2157 }, { "epoch": 51.3820895522388, "grad_norm": 35.3351936340332, "learning_rate": 8.138888888888889e-06, "loss": 44.9952, "step": 2158 }, { "epoch": 51.40597014925373, "grad_norm": 33.0169677734375, "learning_rate": 8.134920634920636e-06, "loss": 44.7576, "step": 2159 }, { "epoch": 51.429850746268656, "grad_norm": 32.347251892089844, "learning_rate": 8.130952380952381e-06, "loss": 45.0997, "step": 2160 }, { "epoch": 51.45373134328358, "grad_norm": 25.79857635498047, "learning_rate": 8.126984126984128e-06, "loss": 45.8578, "step": 2161 }, { "epoch": 51.47761194029851, "grad_norm": 33.378108978271484, "learning_rate": 8.123015873015874e-06, "loss": 44.6084, "step": 2162 }, { "epoch": 51.50149253731343, "grad_norm": 27.625028610229492, "learning_rate": 8.119047619047619e-06, "loss": 45.1928, "step": 2163 }, { "epoch": 51.525373134328355, "grad_norm": 32.47718811035156, "learning_rate": 8.115079365079366e-06, "loss": 44.38, "step": 2164 }, { "epoch": 51.549253731343285, "grad_norm": 31.10133934020996, "learning_rate": 8.111111111111112e-06, "loss": 44.1878, "step": 2165 }, { "epoch": 51.57313432835821, "grad_norm": 33.062007904052734, "learning_rate": 8.107142857142859e-06, "loss": 44.6587, "step": 2166 }, { "epoch": 51.59701492537313, "grad_norm": 31.35774803161621, "learning_rate": 8.103174603174604e-06, "loss": 44.0408, "step": 2167 }, { "epoch": 51.62089552238806, "grad_norm": 35.262237548828125, "learning_rate": 8.09920634920635e-06, "loss": 45.3717, "step": 2168 }, { "epoch": 51.644776119402984, "grad_norm": 32.77524948120117, "learning_rate": 8.095238095238097e-06, "loss": 44.8105, "step": 2169 }, { "epoch": 51.668656716417914, "grad_norm": 28.838821411132812, "learning_rate": 8.091269841269842e-06, "loss": 44.3364, "step": 2170 }, { "epoch": 51.69253731343284, "grad_norm": 26.18807029724121, "learning_rate": 8.087301587301587e-06, "loss": 44.5054, "step": 2171 }, { "epoch": 51.71641791044776, "grad_norm": 31.639286041259766, "learning_rate": 8.083333333333334e-06, "loss": 45.4023, "step": 2172 }, { "epoch": 51.74029850746269, "grad_norm": 27.998628616333008, "learning_rate": 8.07936507936508e-06, "loss": 44.8306, "step": 2173 }, { "epoch": 51.76417910447761, "grad_norm": 30.69230079650879, "learning_rate": 8.075396825396827e-06, "loss": 45.1802, "step": 2174 }, { "epoch": 51.788059701492536, "grad_norm": 23.640962600708008, "learning_rate": 8.071428571428572e-06, "loss": 43.7667, "step": 2175 }, { "epoch": 51.811940298507466, "grad_norm": 29.017114639282227, "learning_rate": 8.067460317460318e-06, "loss": 43.9821, "step": 2176 }, { "epoch": 51.83582089552239, "grad_norm": 21.79175567626953, "learning_rate": 8.063492063492065e-06, "loss": 45.0959, "step": 2177 }, { "epoch": 51.85970149253731, "grad_norm": 25.505756378173828, "learning_rate": 8.05952380952381e-06, "loss": 44.1622, "step": 2178 }, { "epoch": 51.88358208955224, "grad_norm": 19.43979263305664, "learning_rate": 8.055555555555557e-06, "loss": 43.4959, "step": 2179 }, { "epoch": 51.907462686567165, "grad_norm": 32.855037689208984, "learning_rate": 8.051587301587303e-06, "loss": 44.3206, "step": 2180 }, { "epoch": 51.93134328358209, "grad_norm": 23.80797576904297, "learning_rate": 8.047619047619048e-06, "loss": 43.6716, "step": 2181 }, { "epoch": 51.95522388059702, "grad_norm": 37.09321594238281, "learning_rate": 8.043650793650795e-06, "loss": 45.3091, "step": 2182 }, { "epoch": 51.97910447761194, "grad_norm": 25.76487922668457, "learning_rate": 8.03968253968254e-06, "loss": 44.5829, "step": 2183 }, { "epoch": 52.0, "grad_norm": 24.34773063659668, "learning_rate": 8.035714285714286e-06, "loss": 39.637, "step": 2184 }, { "epoch": 52.02388059701492, "grad_norm": 24.28459358215332, "learning_rate": 8.031746031746033e-06, "loss": 42.8823, "step": 2185 }, { "epoch": 52.04776119402985, "grad_norm": 31.015172958374023, "learning_rate": 8.027777777777778e-06, "loss": 43.6859, "step": 2186 }, { "epoch": 52.071641791044776, "grad_norm": 27.413232803344727, "learning_rate": 8.023809523809526e-06, "loss": 44.0734, "step": 2187 }, { "epoch": 52.0955223880597, "grad_norm": 34.3042106628418, "learning_rate": 8.019841269841271e-06, "loss": 44.4303, "step": 2188 }, { "epoch": 52.11940298507463, "grad_norm": 25.737226486206055, "learning_rate": 8.015873015873016e-06, "loss": 45.6858, "step": 2189 }, { "epoch": 52.14328358208955, "grad_norm": 33.09044647216797, "learning_rate": 8.011904761904763e-06, "loss": 44.0591, "step": 2190 }, { "epoch": 52.167164179104475, "grad_norm": 26.903594970703125, "learning_rate": 8.007936507936509e-06, "loss": 44.4434, "step": 2191 }, { "epoch": 52.191044776119405, "grad_norm": 32.05507278442383, "learning_rate": 8.003968253968254e-06, "loss": 44.1334, "step": 2192 }, { "epoch": 52.21492537313433, "grad_norm": 23.954050064086914, "learning_rate": 8.000000000000001e-06, "loss": 45.4077, "step": 2193 }, { "epoch": 52.23880597014925, "grad_norm": 25.273069381713867, "learning_rate": 7.996031746031747e-06, "loss": 44.4704, "step": 2194 }, { "epoch": 52.26268656716418, "grad_norm": 24.762975692749023, "learning_rate": 7.992063492063494e-06, "loss": 44.9846, "step": 2195 }, { "epoch": 52.286567164179104, "grad_norm": 31.624853134155273, "learning_rate": 7.98809523809524e-06, "loss": 44.6678, "step": 2196 }, { "epoch": 52.31044776119403, "grad_norm": 20.407798767089844, "learning_rate": 7.984126984126985e-06, "loss": 44.5191, "step": 2197 }, { "epoch": 52.33432835820896, "grad_norm": 35.610721588134766, "learning_rate": 7.980158730158732e-06, "loss": 43.797, "step": 2198 }, { "epoch": 52.35820895522388, "grad_norm": 23.916271209716797, "learning_rate": 7.976190476190477e-06, "loss": 44.5035, "step": 2199 }, { "epoch": 52.3820895522388, "grad_norm": 30.07246971130371, "learning_rate": 7.972222222222224e-06, "loss": 44.8658, "step": 2200 }, { "epoch": 52.40597014925373, "grad_norm": 26.69670295715332, "learning_rate": 7.968253968253968e-06, "loss": 43.1086, "step": 2201 }, { "epoch": 52.429850746268656, "grad_norm": 35.99201583862305, "learning_rate": 7.964285714285715e-06, "loss": 43.8965, "step": 2202 }, { "epoch": 52.45373134328358, "grad_norm": 26.909433364868164, "learning_rate": 7.96031746031746e-06, "loss": 44.3023, "step": 2203 }, { "epoch": 52.47761194029851, "grad_norm": 31.2402286529541, "learning_rate": 7.956349206349206e-06, "loss": 43.8009, "step": 2204 }, { "epoch": 52.50149253731343, "grad_norm": 28.230714797973633, "learning_rate": 7.952380952380953e-06, "loss": 45.6781, "step": 2205 }, { "epoch": 52.525373134328355, "grad_norm": 32.47516632080078, "learning_rate": 7.948412698412698e-06, "loss": 46.0123, "step": 2206 }, { "epoch": 52.549253731343285, "grad_norm": 29.042253494262695, "learning_rate": 7.944444444444445e-06, "loss": 46.4036, "step": 2207 }, { "epoch": 52.57313432835821, "grad_norm": 24.23044776916504, "learning_rate": 7.94047619047619e-06, "loss": 44.0722, "step": 2208 }, { "epoch": 52.59701492537313, "grad_norm": 25.844972610473633, "learning_rate": 7.936507936507936e-06, "loss": 44.1403, "step": 2209 }, { "epoch": 52.62089552238806, "grad_norm": 25.40447235107422, "learning_rate": 7.932539682539683e-06, "loss": 43.699, "step": 2210 }, { "epoch": 52.644776119402984, "grad_norm": 24.027687072753906, "learning_rate": 7.928571428571429e-06, "loss": 45.1803, "step": 2211 }, { "epoch": 52.668656716417914, "grad_norm": 22.707393646240234, "learning_rate": 7.924603174603174e-06, "loss": 43.7808, "step": 2212 }, { "epoch": 52.69253731343284, "grad_norm": 17.410104751586914, "learning_rate": 7.920634920634921e-06, "loss": 44.7556, "step": 2213 }, { "epoch": 52.71641791044776, "grad_norm": 19.376863479614258, "learning_rate": 7.916666666666667e-06, "loss": 45.3176, "step": 2214 }, { "epoch": 52.74029850746269, "grad_norm": 21.29641342163086, "learning_rate": 7.912698412698414e-06, "loss": 44.8597, "step": 2215 }, { "epoch": 52.76417910447761, "grad_norm": 21.937013626098633, "learning_rate": 7.908730158730159e-06, "loss": 44.3548, "step": 2216 }, { "epoch": 52.788059701492536, "grad_norm": 27.38592529296875, "learning_rate": 7.904761904761904e-06, "loss": 45.204, "step": 2217 }, { "epoch": 52.811940298507466, "grad_norm": 21.232566833496094, "learning_rate": 7.900793650793652e-06, "loss": 43.9788, "step": 2218 }, { "epoch": 52.83582089552239, "grad_norm": 22.52651023864746, "learning_rate": 7.896825396825397e-06, "loss": 44.161, "step": 2219 }, { "epoch": 52.85970149253731, "grad_norm": 23.06977081298828, "learning_rate": 7.892857142857144e-06, "loss": 44.5394, "step": 2220 }, { "epoch": 52.88358208955224, "grad_norm": 19.71670150756836, "learning_rate": 7.88888888888889e-06, "loss": 44.4384, "step": 2221 }, { "epoch": 52.907462686567165, "grad_norm": 19.651142120361328, "learning_rate": 7.884920634920635e-06, "loss": 45.3143, "step": 2222 }, { "epoch": 52.93134328358209, "grad_norm": 23.386962890625, "learning_rate": 7.880952380952382e-06, "loss": 44.4246, "step": 2223 }, { "epoch": 52.95522388059702, "grad_norm": 19.763513565063477, "learning_rate": 7.876984126984127e-06, "loss": 45.6001, "step": 2224 }, { "epoch": 52.97910447761194, "grad_norm": 21.81069564819336, "learning_rate": 7.873015873015873e-06, "loss": 45.6176, "step": 2225 }, { "epoch": 53.0, "grad_norm": 18.15079689025879, "learning_rate": 7.86904761904762e-06, "loss": 39.4819, "step": 2226 }, { "epoch": 53.02388059701492, "grad_norm": 17.333694458007812, "learning_rate": 7.865079365079365e-06, "loss": 45.612, "step": 2227 }, { "epoch": 53.04776119402985, "grad_norm": 18.72818946838379, "learning_rate": 7.861111111111112e-06, "loss": 43.5423, "step": 2228 }, { "epoch": 53.071641791044776, "grad_norm": 18.34732437133789, "learning_rate": 7.857142857142858e-06, "loss": 43.972, "step": 2229 }, { "epoch": 53.0955223880597, "grad_norm": 21.568077087402344, "learning_rate": 7.853174603174603e-06, "loss": 44.8122, "step": 2230 }, { "epoch": 53.11940298507463, "grad_norm": 20.801836013793945, "learning_rate": 7.84920634920635e-06, "loss": 43.8729, "step": 2231 }, { "epoch": 53.14328358208955, "grad_norm": 23.20212745666504, "learning_rate": 7.845238095238096e-06, "loss": 45.3738, "step": 2232 }, { "epoch": 53.167164179104475, "grad_norm": 24.016311645507812, "learning_rate": 7.841269841269843e-06, "loss": 45.0862, "step": 2233 }, { "epoch": 53.191044776119405, "grad_norm": 18.800554275512695, "learning_rate": 7.837301587301588e-06, "loss": 43.2166, "step": 2234 }, { "epoch": 53.21492537313433, "grad_norm": 20.73765754699707, "learning_rate": 7.833333333333333e-06, "loss": 43.3917, "step": 2235 }, { "epoch": 53.23880597014925, "grad_norm": 23.1943302154541, "learning_rate": 7.82936507936508e-06, "loss": 44.0957, "step": 2236 }, { "epoch": 53.26268656716418, "grad_norm": 18.450380325317383, "learning_rate": 7.825396825396826e-06, "loss": 44.1782, "step": 2237 }, { "epoch": 53.286567164179104, "grad_norm": 24.16314697265625, "learning_rate": 7.821428571428571e-06, "loss": 45.0735, "step": 2238 }, { "epoch": 53.31044776119403, "grad_norm": 25.004743576049805, "learning_rate": 7.817460317460318e-06, "loss": 44.4628, "step": 2239 }, { "epoch": 53.33432835820896, "grad_norm": 17.29636573791504, "learning_rate": 7.813492063492064e-06, "loss": 45.2476, "step": 2240 }, { "epoch": 53.35820895522388, "grad_norm": 24.759471893310547, "learning_rate": 7.809523809523811e-06, "loss": 44.71, "step": 2241 }, { "epoch": 53.3820895522388, "grad_norm": 21.52720832824707, "learning_rate": 7.805555555555556e-06, "loss": 44.9929, "step": 2242 }, { "epoch": 53.40597014925373, "grad_norm": 22.760278701782227, "learning_rate": 7.801587301587302e-06, "loss": 43.6639, "step": 2243 }, { "epoch": 53.429850746268656, "grad_norm": 19.5325927734375, "learning_rate": 7.797619047619049e-06, "loss": 44.2974, "step": 2244 }, { "epoch": 53.45373134328358, "grad_norm": 25.756797790527344, "learning_rate": 7.793650793650794e-06, "loss": 45.401, "step": 2245 }, { "epoch": 53.47761194029851, "grad_norm": 19.75324058532715, "learning_rate": 7.78968253968254e-06, "loss": 44.6426, "step": 2246 }, { "epoch": 53.50149253731343, "grad_norm": 25.47930145263672, "learning_rate": 7.785714285714287e-06, "loss": 42.2875, "step": 2247 }, { "epoch": 53.525373134328355, "grad_norm": 21.61121368408203, "learning_rate": 7.781746031746032e-06, "loss": 45.7982, "step": 2248 }, { "epoch": 53.549253731343285, "grad_norm": 24.11342430114746, "learning_rate": 7.77777777777778e-06, "loss": 43.6397, "step": 2249 }, { "epoch": 53.57313432835821, "grad_norm": 25.151281356811523, "learning_rate": 7.773809523809525e-06, "loss": 44.0536, "step": 2250 }, { "epoch": 53.59701492537313, "grad_norm": 21.925559997558594, "learning_rate": 7.76984126984127e-06, "loss": 45.2035, "step": 2251 }, { "epoch": 53.62089552238806, "grad_norm": 22.38170623779297, "learning_rate": 7.765873015873017e-06, "loss": 44.3272, "step": 2252 }, { "epoch": 53.644776119402984, "grad_norm": 24.35360336303711, "learning_rate": 7.761904761904762e-06, "loss": 45.687, "step": 2253 }, { "epoch": 53.668656716417914, "grad_norm": 20.127119064331055, "learning_rate": 7.75793650793651e-06, "loss": 44.001, "step": 2254 }, { "epoch": 53.69253731343284, "grad_norm": 20.66204833984375, "learning_rate": 7.753968253968255e-06, "loss": 45.1368, "step": 2255 }, { "epoch": 53.71641791044776, "grad_norm": 22.565038681030273, "learning_rate": 7.75e-06, "loss": 43.7021, "step": 2256 }, { "epoch": 53.74029850746269, "grad_norm": 20.893674850463867, "learning_rate": 7.746031746031747e-06, "loss": 44.381, "step": 2257 }, { "epoch": 53.76417910447761, "grad_norm": 21.53620147705078, "learning_rate": 7.742063492063493e-06, "loss": 45.2511, "step": 2258 }, { "epoch": 53.788059701492536, "grad_norm": 20.66484832763672, "learning_rate": 7.738095238095238e-06, "loss": 45.167, "step": 2259 }, { "epoch": 53.811940298507466, "grad_norm": 24.964414596557617, "learning_rate": 7.734126984126985e-06, "loss": 44.6754, "step": 2260 }, { "epoch": 53.83582089552239, "grad_norm": 22.17997169494629, "learning_rate": 7.73015873015873e-06, "loss": 44.1696, "step": 2261 }, { "epoch": 53.85970149253731, "grad_norm": 19.715208053588867, "learning_rate": 7.726190476190478e-06, "loss": 43.8961, "step": 2262 }, { "epoch": 53.88358208955224, "grad_norm": 27.562166213989258, "learning_rate": 7.722222222222223e-06, "loss": 43.9035, "step": 2263 }, { "epoch": 53.907462686567165, "grad_norm": 21.021081924438477, "learning_rate": 7.718253968253969e-06, "loss": 45.5108, "step": 2264 }, { "epoch": 53.93134328358209, "grad_norm": 25.675813674926758, "learning_rate": 7.714285714285716e-06, "loss": 44.5437, "step": 2265 }, { "epoch": 53.95522388059702, "grad_norm": 25.80996322631836, "learning_rate": 7.710317460317461e-06, "loss": 44.7417, "step": 2266 }, { "epoch": 53.97910447761194, "grad_norm": 23.055217742919922, "learning_rate": 7.706349206349208e-06, "loss": 44.3122, "step": 2267 }, { "epoch": 54.0, "grad_norm": 14.958904266357422, "learning_rate": 7.702380952380954e-06, "loss": 40.1174, "step": 2268 }, { "epoch": 54.02388059701492, "grad_norm": 23.825021743774414, "learning_rate": 7.698412698412699e-06, "loss": 43.6857, "step": 2269 }, { "epoch": 54.04776119402985, "grad_norm": 20.29824447631836, "learning_rate": 7.694444444444446e-06, "loss": 43.9979, "step": 2270 }, { "epoch": 54.071641791044776, "grad_norm": 19.905017852783203, "learning_rate": 7.690476190476191e-06, "loss": 44.1637, "step": 2271 }, { "epoch": 54.0955223880597, "grad_norm": 16.342378616333008, "learning_rate": 7.686507936507937e-06, "loss": 43.9998, "step": 2272 }, { "epoch": 54.11940298507463, "grad_norm": 22.551780700683594, "learning_rate": 7.682539682539684e-06, "loss": 44.6528, "step": 2273 }, { "epoch": 54.14328358208955, "grad_norm": 16.87897491455078, "learning_rate": 7.67857142857143e-06, "loss": 44.4259, "step": 2274 }, { "epoch": 54.167164179104475, "grad_norm": 27.494592666625977, "learning_rate": 7.674603174603176e-06, "loss": 45.7648, "step": 2275 }, { "epoch": 54.191044776119405, "grad_norm": 22.326452255249023, "learning_rate": 7.670634920634922e-06, "loss": 44.1077, "step": 2276 }, { "epoch": 54.21492537313433, "grad_norm": 30.2500057220459, "learning_rate": 7.666666666666667e-06, "loss": 44.5322, "step": 2277 }, { "epoch": 54.23880597014925, "grad_norm": 28.212095260620117, "learning_rate": 7.662698412698414e-06, "loss": 43.6225, "step": 2278 }, { "epoch": 54.26268656716418, "grad_norm": 23.524145126342773, "learning_rate": 7.65873015873016e-06, "loss": 44.9014, "step": 2279 }, { "epoch": 54.286567164179104, "grad_norm": 29.799076080322266, "learning_rate": 7.654761904761905e-06, "loss": 44.6654, "step": 2280 }, { "epoch": 54.31044776119403, "grad_norm": 20.350683212280273, "learning_rate": 7.65079365079365e-06, "loss": 44.254, "step": 2281 }, { "epoch": 54.33432835820896, "grad_norm": 25.748899459838867, "learning_rate": 7.646825396825398e-06, "loss": 44.4278, "step": 2282 }, { "epoch": 54.35820895522388, "grad_norm": 25.086284637451172, "learning_rate": 7.642857142857143e-06, "loss": 43.8974, "step": 2283 }, { "epoch": 54.3820895522388, "grad_norm": 24.49972915649414, "learning_rate": 7.638888888888888e-06, "loss": 44.4423, "step": 2284 }, { "epoch": 54.40597014925373, "grad_norm": 18.78260612487793, "learning_rate": 7.634920634920635e-06, "loss": 44.5666, "step": 2285 }, { "epoch": 54.429850746268656, "grad_norm": 20.125263214111328, "learning_rate": 7.630952380952381e-06, "loss": 44.4853, "step": 2286 }, { "epoch": 54.45373134328358, "grad_norm": 20.763385772705078, "learning_rate": 7.626984126984127e-06, "loss": 43.6951, "step": 2287 }, { "epoch": 54.47761194029851, "grad_norm": 22.267620086669922, "learning_rate": 7.623015873015873e-06, "loss": 45.0328, "step": 2288 }, { "epoch": 54.50149253731343, "grad_norm": 24.786283493041992, "learning_rate": 7.61904761904762e-06, "loss": 45.0047, "step": 2289 }, { "epoch": 54.525373134328355, "grad_norm": 18.581987380981445, "learning_rate": 7.615079365079365e-06, "loss": 45.0172, "step": 2290 }, { "epoch": 54.549253731343285, "grad_norm": 27.262859344482422, "learning_rate": 7.611111111111111e-06, "loss": 44.1982, "step": 2291 }, { "epoch": 54.57313432835821, "grad_norm": 25.255537033081055, "learning_rate": 7.6071428571428575e-06, "loss": 43.8743, "step": 2292 }, { "epoch": 54.59701492537313, "grad_norm": 17.982698440551758, "learning_rate": 7.603174603174604e-06, "loss": 43.4799, "step": 2293 }, { "epoch": 54.62089552238806, "grad_norm": 25.1834716796875, "learning_rate": 7.599206349206349e-06, "loss": 44.8257, "step": 2294 }, { "epoch": 54.644776119402984, "grad_norm": 19.047700881958008, "learning_rate": 7.595238095238095e-06, "loss": 45.2678, "step": 2295 }, { "epoch": 54.668656716417914, "grad_norm": 26.404882431030273, "learning_rate": 7.591269841269842e-06, "loss": 43.3948, "step": 2296 }, { "epoch": 54.69253731343284, "grad_norm": 19.84337615966797, "learning_rate": 7.587301587301588e-06, "loss": 45.7629, "step": 2297 }, { "epoch": 54.71641791044776, "grad_norm": 19.9034481048584, "learning_rate": 7.583333333333333e-06, "loss": 44.4071, "step": 2298 }, { "epoch": 54.74029850746269, "grad_norm": NaN, "learning_rate": 7.5793650793650795e-06, "loss": 62.5737, "step": 2299 }, { "epoch": 54.76417910447761, "grad_norm": 21.210494995117188, "learning_rate": 7.5793650793650795e-06, "loss": 44.7093, "step": 2300 }, { "epoch": 54.788059701492536, "grad_norm": 21.403833389282227, "learning_rate": 7.575396825396826e-06, "loss": 44.6465, "step": 2301 }, { "epoch": 54.811940298507466, "grad_norm": 21.515085220336914, "learning_rate": 7.571428571428572e-06, "loss": 44.3846, "step": 2302 }, { "epoch": 54.83582089552239, "grad_norm": 25.024738311767578, "learning_rate": 7.567460317460317e-06, "loss": 44.7177, "step": 2303 }, { "epoch": 54.85970149253731, "grad_norm": 18.742982864379883, "learning_rate": 7.563492063492064e-06, "loss": 44.0076, "step": 2304 }, { "epoch": 54.88358208955224, "grad_norm": 20.333248138427734, "learning_rate": 7.55952380952381e-06, "loss": 43.6154, "step": 2305 }, { "epoch": 54.907462686567165, "grad_norm": 21.4791316986084, "learning_rate": 7.555555555555556e-06, "loss": 44.2151, "step": 2306 }, { "epoch": 54.93134328358209, "grad_norm": 26.272165298461914, "learning_rate": 7.551587301587302e-06, "loss": 45.5681, "step": 2307 }, { "epoch": 54.95522388059702, "grad_norm": 24.541637420654297, "learning_rate": 7.547619047619048e-06, "loss": 42.8248, "step": 2308 }, { "epoch": 54.97910447761194, "grad_norm": 23.563505172729492, "learning_rate": 7.543650793650794e-06, "loss": 45.1566, "step": 2309 }, { "epoch": 55.0, "grad_norm": 18.819583892822266, "learning_rate": 7.53968253968254e-06, "loss": 39.9674, "step": 2310 }, { "epoch": 55.02388059701492, "grad_norm": 21.50135040283203, "learning_rate": 7.5357142857142865e-06, "loss": 44.7008, "step": 2311 }, { "epoch": 55.04776119402985, "grad_norm": 20.28955078125, "learning_rate": 7.531746031746032e-06, "loss": 45.7705, "step": 2312 }, { "epoch": 55.071641791044776, "grad_norm": 19.94631004333496, "learning_rate": 7.527777777777778e-06, "loss": 44.3223, "step": 2313 }, { "epoch": 55.0955223880597, "grad_norm": 22.5699520111084, "learning_rate": 7.523809523809524e-06, "loss": 45.2447, "step": 2314 }, { "epoch": 55.11940298507463, "grad_norm": 17.17287254333496, "learning_rate": 7.519841269841271e-06, "loss": 44.4756, "step": 2315 }, { "epoch": 55.14328358208955, "grad_norm": 17.727617263793945, "learning_rate": 7.515873015873016e-06, "loss": 44.3907, "step": 2316 }, { "epoch": 55.167164179104475, "grad_norm": 18.320112228393555, "learning_rate": 7.511904761904762e-06, "loss": 44.3684, "step": 2317 }, { "epoch": 55.191044776119405, "grad_norm": 14.576784133911133, "learning_rate": 7.5079365079365085e-06, "loss": 43.8936, "step": 2318 }, { "epoch": 55.21492537313433, "grad_norm": 18.880218505859375, "learning_rate": 7.503968253968255e-06, "loss": 44.5556, "step": 2319 }, { "epoch": 55.23880597014925, "grad_norm": 19.764198303222656, "learning_rate": 7.500000000000001e-06, "loss": 43.8549, "step": 2320 }, { "epoch": 55.26268656716418, "grad_norm": NaN, "learning_rate": 7.4960317460317464e-06, "loss": 44.1908, "step": 2321 }, { "epoch": 55.286567164179104, "grad_norm": 19.977001190185547, "learning_rate": 7.4960317460317464e-06, "loss": 44.3724, "step": 2322 }, { "epoch": 55.31044776119403, "grad_norm": 21.85407066345215, "learning_rate": 7.492063492063493e-06, "loss": 44.2829, "step": 2323 }, { "epoch": 55.33432835820896, "grad_norm": 22.168100357055664, "learning_rate": 7.488095238095239e-06, "loss": 44.4898, "step": 2324 }, { "epoch": 55.35820895522388, "grad_norm": 18.234298706054688, "learning_rate": 7.484126984126985e-06, "loss": 44.3711, "step": 2325 }, { "epoch": 55.3820895522388, "grad_norm": 30.803691864013672, "learning_rate": 7.4801587301587306e-06, "loss": 43.825, "step": 2326 }, { "epoch": 55.40597014925373, "grad_norm": 22.18082046508789, "learning_rate": 7.476190476190477e-06, "loss": 44.8057, "step": 2327 }, { "epoch": 55.429850746268656, "grad_norm": 32.34336853027344, "learning_rate": 7.472222222222223e-06, "loss": 45.0128, "step": 2328 }, { "epoch": 55.45373134328358, "grad_norm": 28.040363311767578, "learning_rate": 7.468253968253969e-06, "loss": 45.2428, "step": 2329 }, { "epoch": 55.47761194029851, "grad_norm": 31.505037307739258, "learning_rate": 7.464285714285715e-06, "loss": 43.6846, "step": 2330 }, { "epoch": 55.50149253731343, "grad_norm": 21.62251853942871, "learning_rate": 7.460317460317461e-06, "loss": 44.467, "step": 2331 }, { "epoch": 55.525373134328355, "grad_norm": 29.035741806030273, "learning_rate": 7.456349206349207e-06, "loss": 43.7965, "step": 2332 }, { "epoch": 55.549253731343285, "grad_norm": 17.466440200805664, "learning_rate": 7.4523809523809534e-06, "loss": 44.8456, "step": 2333 }, { "epoch": 55.57313432835821, "grad_norm": 37.01939392089844, "learning_rate": 7.448412698412699e-06, "loss": 44.4359, "step": 2334 }, { "epoch": 55.59701492537313, "grad_norm": 24.612354278564453, "learning_rate": 7.444444444444445e-06, "loss": 43.7534, "step": 2335 }, { "epoch": 55.62089552238806, "grad_norm": 39.57061004638672, "learning_rate": 7.440476190476191e-06, "loss": 45.5826, "step": 2336 }, { "epoch": 55.644776119402984, "grad_norm": 33.687538146972656, "learning_rate": 7.4365079365079376e-06, "loss": 44.3013, "step": 2337 }, { "epoch": 55.668656716417914, "grad_norm": 38.39813995361328, "learning_rate": 7.432539682539684e-06, "loss": 44.7874, "step": 2338 }, { "epoch": 55.69253731343284, "grad_norm": 38.15765380859375, "learning_rate": 7.428571428571429e-06, "loss": 44.6177, "step": 2339 }, { "epoch": 55.71641791044776, "grad_norm": 27.5556640625, "learning_rate": 7.4246031746031754e-06, "loss": 43.1288, "step": 2340 }, { "epoch": 55.74029850746269, "grad_norm": 30.958349227905273, "learning_rate": 7.420634920634922e-06, "loss": 45.4904, "step": 2341 }, { "epoch": 55.76417910447761, "grad_norm": 21.71723747253418, "learning_rate": 7.416666666666668e-06, "loss": 43.0391, "step": 2342 }, { "epoch": 55.788059701492536, "grad_norm": 24.229736328125, "learning_rate": 7.412698412698413e-06, "loss": 44.0514, "step": 2343 }, { "epoch": 55.811940298507466, "grad_norm": 32.11771011352539, "learning_rate": 7.40873015873016e-06, "loss": 44.5521, "step": 2344 }, { "epoch": 55.83582089552239, "grad_norm": 21.694486618041992, "learning_rate": 7.404761904761906e-06, "loss": 43.9067, "step": 2345 }, { "epoch": 55.85970149253731, "grad_norm": 31.469402313232422, "learning_rate": 7.400793650793652e-06, "loss": 43.3506, "step": 2346 }, { "epoch": 55.88358208955224, "grad_norm": 28.431011199951172, "learning_rate": 7.3968253968253975e-06, "loss": 43.7056, "step": 2347 }, { "epoch": 55.907462686567165, "grad_norm": 34.616065979003906, "learning_rate": 7.392857142857144e-06, "loss": 44.0428, "step": 2348 }, { "epoch": 55.93134328358209, "grad_norm": 27.52882194519043, "learning_rate": 7.38888888888889e-06, "loss": 43.8619, "step": 2349 }, { "epoch": 55.95522388059702, "grad_norm": 36.93627166748047, "learning_rate": 7.384920634920636e-06, "loss": 44.1589, "step": 2350 }, { "epoch": 55.97910447761194, "grad_norm": 37.746578216552734, "learning_rate": 7.380952380952382e-06, "loss": 46.0174, "step": 2351 }, { "epoch": 56.0, "grad_norm": 20.20071029663086, "learning_rate": 7.376984126984128e-06, "loss": 37.7123, "step": 2352 }, { "epoch": 56.02388059701492, "grad_norm": 25.446529388427734, "learning_rate": 7.373015873015874e-06, "loss": 43.4657, "step": 2353 }, { "epoch": 56.04776119402985, "grad_norm": 25.78912353515625, "learning_rate": 7.36904761904762e-06, "loss": 44.7379, "step": 2354 }, { "epoch": 56.071641791044776, "grad_norm": 24.028154373168945, "learning_rate": 7.3650793650793666e-06, "loss": 43.4876, "step": 2355 }, { "epoch": 56.0955223880597, "grad_norm": 30.157793045043945, "learning_rate": 7.361111111111112e-06, "loss": 44.2346, "step": 2356 }, { "epoch": 56.11940298507463, "grad_norm": 29.836891174316406, "learning_rate": 7.357142857142858e-06, "loss": 44.0793, "step": 2357 }, { "epoch": 56.14328358208955, "grad_norm": 30.458818435668945, "learning_rate": 7.3531746031746045e-06, "loss": 44.7587, "step": 2358 }, { "epoch": 56.167164179104475, "grad_norm": 28.30854034423828, "learning_rate": 7.349206349206351e-06, "loss": 44.9153, "step": 2359 }, { "epoch": 56.191044776119405, "grad_norm": 29.497085571289062, "learning_rate": 7.345238095238096e-06, "loss": 44.194, "step": 2360 }, { "epoch": 56.21492537313433, "grad_norm": 21.790063858032227, "learning_rate": 7.3412698412698415e-06, "loss": 44.1507, "step": 2361 }, { "epoch": 56.23880597014925, "grad_norm": 27.489242553710938, "learning_rate": 7.337301587301588e-06, "loss": 44.3505, "step": 2362 }, { "epoch": 56.26268656716418, "grad_norm": 22.906452178955078, "learning_rate": 7.333333333333333e-06, "loss": 44.9329, "step": 2363 }, { "epoch": 56.286567164179104, "grad_norm": 33.46257400512695, "learning_rate": 7.329365079365079e-06, "loss": 43.1323, "step": 2364 }, { "epoch": 56.31044776119403, "grad_norm": 22.410470962524414, "learning_rate": 7.325396825396826e-06, "loss": 43.6305, "step": 2365 }, { "epoch": 56.33432835820896, "grad_norm": 32.74277114868164, "learning_rate": 7.321428571428572e-06, "loss": 43.6165, "step": 2366 }, { "epoch": 56.35820895522388, "grad_norm": 31.368419647216797, "learning_rate": 7.317460317460317e-06, "loss": 44.8837, "step": 2367 }, { "epoch": 56.3820895522388, "grad_norm": 30.941593170166016, "learning_rate": 7.3134920634920635e-06, "loss": 44.2282, "step": 2368 }, { "epoch": 56.40597014925373, "grad_norm": 34.12104415893555, "learning_rate": 7.30952380952381e-06, "loss": 45.2191, "step": 2369 }, { "epoch": 56.429850746268656, "grad_norm": 28.09603500366211, "learning_rate": 7.305555555555556e-06, "loss": 44.9848, "step": 2370 }, { "epoch": 56.45373134328358, "grad_norm": 29.32378578186035, "learning_rate": 7.301587301587301e-06, "loss": 44.2658, "step": 2371 }, { "epoch": 56.47761194029851, "grad_norm": 26.269071578979492, "learning_rate": 7.297619047619048e-06, "loss": 46.2933, "step": 2372 }, { "epoch": 56.50149253731343, "grad_norm": 25.23761558532715, "learning_rate": 7.293650793650794e-06, "loss": 44.4726, "step": 2373 }, { "epoch": 56.525373134328355, "grad_norm": 27.6646728515625, "learning_rate": 7.28968253968254e-06, "loss": 43.9823, "step": 2374 }, { "epoch": 56.549253731343285, "grad_norm": 23.01959228515625, "learning_rate": 7.285714285714286e-06, "loss": 44.5168, "step": 2375 }, { "epoch": 56.57313432835821, "grad_norm": 31.120128631591797, "learning_rate": 7.281746031746032e-06, "loss": 44.6706, "step": 2376 }, { "epoch": 56.59701492537313, "grad_norm": 24.57699203491211, "learning_rate": 7.277777777777778e-06, "loss": 45.1781, "step": 2377 }, { "epoch": 56.62089552238806, "grad_norm": 28.831064224243164, "learning_rate": 7.273809523809524e-06, "loss": 43.7437, "step": 2378 }, { "epoch": 56.644776119402984, "grad_norm": 26.15156364440918, "learning_rate": 7.2698412698412705e-06, "loss": 45.1624, "step": 2379 }, { "epoch": 56.668656716417914, "grad_norm": 28.825542449951172, "learning_rate": 7.265873015873016e-06, "loss": 44.66, "step": 2380 }, { "epoch": 56.69253731343284, "grad_norm": 26.763559341430664, "learning_rate": 7.261904761904762e-06, "loss": 43.6022, "step": 2381 }, { "epoch": 56.71641791044776, "grad_norm": 26.9444522857666, "learning_rate": 7.257936507936508e-06, "loss": 44.025, "step": 2382 }, { "epoch": 56.74029850746269, "grad_norm": 24.93570327758789, "learning_rate": 7.253968253968255e-06, "loss": 45.3596, "step": 2383 }, { "epoch": 56.76417910447761, "grad_norm": 29.15943717956543, "learning_rate": 7.25e-06, "loss": 44.2434, "step": 2384 }, { "epoch": 56.788059701492536, "grad_norm": 25.154356002807617, "learning_rate": 7.246031746031746e-06, "loss": 44.1319, "step": 2385 }, { "epoch": 56.811940298507466, "grad_norm": 24.86849021911621, "learning_rate": 7.2420634920634925e-06, "loss": 44.5886, "step": 2386 }, { "epoch": 56.83582089552239, "grad_norm": 22.996164321899414, "learning_rate": 7.238095238095239e-06, "loss": 43.9457, "step": 2387 }, { "epoch": 56.85970149253731, "grad_norm": 21.19574737548828, "learning_rate": 7.234126984126984e-06, "loss": 42.4004, "step": 2388 }, { "epoch": 56.88358208955224, "grad_norm": 22.6845645904541, "learning_rate": 7.23015873015873e-06, "loss": 43.7031, "step": 2389 }, { "epoch": 56.907462686567165, "grad_norm": 24.80902099609375, "learning_rate": 7.226190476190477e-06, "loss": 45.4586, "step": 2390 }, { "epoch": 56.93134328358209, "grad_norm": 23.157859802246094, "learning_rate": 7.222222222222223e-06, "loss": 43.4031, "step": 2391 }, { "epoch": 56.95522388059702, "grad_norm": 16.62670135498047, "learning_rate": 7.218253968253969e-06, "loss": 43.9552, "step": 2392 }, { "epoch": 56.97910447761194, "grad_norm": 31.313417434692383, "learning_rate": 7.2142857142857145e-06, "loss": 44.5037, "step": 2393 }, { "epoch": 57.0, "grad_norm": 17.58053970336914, "learning_rate": 7.210317460317461e-06, "loss": 38.2111, "step": 2394 }, { "epoch": 57.02388059701492, "grad_norm": 28.399444580078125, "learning_rate": 7.206349206349207e-06, "loss": 43.0462, "step": 2395 }, { "epoch": 57.04776119402985, "grad_norm": 28.965984344482422, "learning_rate": 7.202380952380953e-06, "loss": 45.5986, "step": 2396 }, { "epoch": 57.071641791044776, "grad_norm": 20.43558120727539, "learning_rate": 7.198412698412699e-06, "loss": 43.5525, "step": 2397 }, { "epoch": 57.0955223880597, "grad_norm": 29.135162353515625, "learning_rate": 7.194444444444445e-06, "loss": 44.4881, "step": 2398 }, { "epoch": 57.11940298507463, "grad_norm": 23.580699920654297, "learning_rate": 7.190476190476191e-06, "loss": 43.7271, "step": 2399 }, { "epoch": 57.14328358208955, "grad_norm": 22.67143440246582, "learning_rate": 7.186507936507937e-06, "loss": 42.7561, "step": 2400 }, { "epoch": 57.167164179104475, "grad_norm": 21.757719039916992, "learning_rate": 7.182539682539683e-06, "loss": 44.4426, "step": 2401 }, { "epoch": 57.191044776119405, "grad_norm": 22.878713607788086, "learning_rate": 7.178571428571429e-06, "loss": 43.9361, "step": 2402 }, { "epoch": 57.21492537313433, "grad_norm": 21.213376998901367, "learning_rate": 7.174603174603175e-06, "loss": 44.4992, "step": 2403 }, { "epoch": 57.23880597014925, "grad_norm": 26.92378807067871, "learning_rate": 7.1706349206349215e-06, "loss": 43.3883, "step": 2404 }, { "epoch": 57.26268656716418, "grad_norm": 21.269611358642578, "learning_rate": 7.166666666666667e-06, "loss": 45.1554, "step": 2405 }, { "epoch": 57.286567164179104, "grad_norm": 24.775962829589844, "learning_rate": 7.162698412698413e-06, "loss": 44.6757, "step": 2406 }, { "epoch": 57.31044776119403, "grad_norm": 23.0452880859375, "learning_rate": 7.1587301587301594e-06, "loss": 44.0688, "step": 2407 }, { "epoch": 57.33432835820896, "grad_norm": 25.50167465209961, "learning_rate": 7.154761904761906e-06, "loss": 44.7899, "step": 2408 }, { "epoch": 57.35820895522388, "grad_norm": 21.560483932495117, "learning_rate": 7.150793650793652e-06, "loss": 44.3325, "step": 2409 }, { "epoch": 57.3820895522388, "grad_norm": 22.5815486907959, "learning_rate": 7.146825396825397e-06, "loss": 44.1531, "step": 2410 }, { "epoch": 57.40597014925373, "grad_norm": 22.582660675048828, "learning_rate": 7.1428571428571436e-06, "loss": 45.738, "step": 2411 }, { "epoch": 57.429850746268656, "grad_norm": 21.388498306274414, "learning_rate": 7.13888888888889e-06, "loss": 44.2737, "step": 2412 }, { "epoch": 57.45373134328358, "grad_norm": 25.76162338256836, "learning_rate": 7.134920634920636e-06, "loss": 45.273, "step": 2413 }, { "epoch": 57.47761194029851, "grad_norm": 22.20672035217285, "learning_rate": 7.1309523809523814e-06, "loss": 44.8847, "step": 2414 }, { "epoch": 57.50149253731343, "grad_norm": 22.07602310180664, "learning_rate": 7.126984126984128e-06, "loss": 45.0162, "step": 2415 }, { "epoch": 57.525373134328355, "grad_norm": 23.339630126953125, "learning_rate": 7.123015873015874e-06, "loss": 45.1952, "step": 2416 }, { "epoch": 57.549253731343285, "grad_norm": 21.66541290283203, "learning_rate": 7.11904761904762e-06, "loss": 43.586, "step": 2417 }, { "epoch": 57.57313432835821, "grad_norm": 19.015748977661133, "learning_rate": 7.115079365079366e-06, "loss": 44.391, "step": 2418 }, { "epoch": 57.59701492537313, "grad_norm": 20.501789093017578, "learning_rate": 7.111111111111112e-06, "loss": 43.8462, "step": 2419 }, { "epoch": 57.62089552238806, "grad_norm": 20.447154998779297, "learning_rate": 7.107142857142858e-06, "loss": 44.0195, "step": 2420 }, { "epoch": 57.644776119402984, "grad_norm": 17.749000549316406, "learning_rate": 7.103174603174604e-06, "loss": 43.0026, "step": 2421 }, { "epoch": 57.668656716417914, "grad_norm": 22.757408142089844, "learning_rate": 7.09920634920635e-06, "loss": 44.1692, "step": 2422 }, { "epoch": 57.69253731343284, "grad_norm": 17.98983383178711, "learning_rate": 7.095238095238096e-06, "loss": 43.4101, "step": 2423 }, { "epoch": 57.71641791044776, "grad_norm": 24.20079803466797, "learning_rate": 7.091269841269842e-06, "loss": 44.8966, "step": 2424 }, { "epoch": 57.74029850746269, "grad_norm": 22.89764404296875, "learning_rate": 7.0873015873015884e-06, "loss": 43.2216, "step": 2425 }, { "epoch": 57.76417910447761, "grad_norm": 22.002994537353516, "learning_rate": 7.083333333333335e-06, "loss": 44.6504, "step": 2426 }, { "epoch": 57.788059701492536, "grad_norm": 21.76395606994629, "learning_rate": 7.07936507936508e-06, "loss": 44.2117, "step": 2427 }, { "epoch": 57.811940298507466, "grad_norm": 21.774457931518555, "learning_rate": 7.075396825396826e-06, "loss": 42.8879, "step": 2428 }, { "epoch": 57.83582089552239, "grad_norm": 20.503652572631836, "learning_rate": 7.0714285714285726e-06, "loss": 45.954, "step": 2429 }, { "epoch": 57.85970149253731, "grad_norm": 23.904399871826172, "learning_rate": 7.067460317460319e-06, "loss": 43.356, "step": 2430 }, { "epoch": 57.88358208955224, "grad_norm": 19.87941551208496, "learning_rate": 7.063492063492064e-06, "loss": 43.2895, "step": 2431 }, { "epoch": 57.907462686567165, "grad_norm": 21.451114654541016, "learning_rate": 7.0595238095238105e-06, "loss": 45.5597, "step": 2432 }, { "epoch": 57.93134328358209, "grad_norm": 19.936561584472656, "learning_rate": 7.055555555555557e-06, "loss": 44.9952, "step": 2433 }, { "epoch": 57.95522388059702, "grad_norm": 28.258575439453125, "learning_rate": 7.051587301587303e-06, "loss": 44.6988, "step": 2434 }, { "epoch": 57.97910447761194, "grad_norm": 17.210622787475586, "learning_rate": 7.047619047619048e-06, "loss": 43.6215, "step": 2435 }, { "epoch": 58.0, "grad_norm": 24.241130828857422, "learning_rate": 7.043650793650795e-06, "loss": 38.4149, "step": 2436 }, { "epoch": 58.02388059701492, "grad_norm": 16.801782608032227, "learning_rate": 7.039682539682541e-06, "loss": 43.5018, "step": 2437 }, { "epoch": 58.04776119402985, "grad_norm": 29.925647735595703, "learning_rate": 7.035714285714287e-06, "loss": 43.2125, "step": 2438 }, { "epoch": 58.071641791044776, "grad_norm": 18.95758056640625, "learning_rate": 7.0317460317460325e-06, "loss": 44.4259, "step": 2439 }, { "epoch": 58.0955223880597, "grad_norm": 23.302980422973633, "learning_rate": 7.027777777777778e-06, "loss": 44.0357, "step": 2440 }, { "epoch": 58.11940298507463, "grad_norm": 20.59381866455078, "learning_rate": 7.023809523809524e-06, "loss": 43.307, "step": 2441 }, { "epoch": 58.14328358208955, "grad_norm": 22.13384246826172, "learning_rate": 7.0198412698412695e-06, "loss": 44.4651, "step": 2442 }, { "epoch": 58.167164179104475, "grad_norm": 19.862943649291992, "learning_rate": 7.015873015873016e-06, "loss": 44.2492, "step": 2443 }, { "epoch": 58.191044776119405, "grad_norm": 17.979585647583008, "learning_rate": 7.011904761904762e-06, "loss": 44.8333, "step": 2444 }, { "epoch": 58.21492537313433, "grad_norm": 24.35641860961914, "learning_rate": 7.007936507936508e-06, "loss": 43.9583, "step": 2445 }, { "epoch": 58.23880597014925, "grad_norm": 20.068201065063477, "learning_rate": 7.0039682539682545e-06, "loss": 44.2379, "step": 2446 }, { "epoch": 58.26268656716418, "grad_norm": 24.976778030395508, "learning_rate": 7e-06, "loss": 45.3054, "step": 2447 }, { "epoch": 58.286567164179104, "grad_norm": 19.772478103637695, "learning_rate": 6.996031746031746e-06, "loss": 44.7523, "step": 2448 }, { "epoch": 58.31044776119403, "grad_norm": 24.084999084472656, "learning_rate": 6.992063492063492e-06, "loss": 43.7454, "step": 2449 }, { "epoch": 58.33432835820896, "grad_norm": 23.313718795776367, "learning_rate": 6.988095238095239e-06, "loss": 44.8838, "step": 2450 }, { "epoch": 58.35820895522388, "grad_norm": 22.206872940063477, "learning_rate": 6.984126984126984e-06, "loss": 45.3141, "step": 2451 }, { "epoch": 58.3820895522388, "grad_norm": 17.005720138549805, "learning_rate": 6.98015873015873e-06, "loss": 43.8783, "step": 2452 }, { "epoch": 58.40597014925373, "grad_norm": NaN, "learning_rate": 6.9761904761904765e-06, "loss": 43.2019, "step": 2453 }, { "epoch": 58.429850746268656, "grad_norm": 20.627288818359375, "learning_rate": 6.9761904761904765e-06, "loss": 44.6086, "step": 2454 }, { "epoch": 58.45373134328358, "grad_norm": 19.47835350036621, "learning_rate": 6.972222222222223e-06, "loss": 44.6011, "step": 2455 }, { "epoch": 58.47761194029851, "grad_norm": 23.258609771728516, "learning_rate": 6.968253968253968e-06, "loss": 44.0184, "step": 2456 }, { "epoch": 58.50149253731343, "grad_norm": 22.79289436340332, "learning_rate": 6.964285714285714e-06, "loss": 44.1053, "step": 2457 }, { "epoch": 58.525373134328355, "grad_norm": 20.517322540283203, "learning_rate": 6.960317460317461e-06, "loss": 43.8647, "step": 2458 }, { "epoch": 58.549253731343285, "grad_norm": 15.967523574829102, "learning_rate": 6.956349206349207e-06, "loss": 44.9457, "step": 2459 }, { "epoch": 58.57313432835821, "grad_norm": 26.704971313476562, "learning_rate": 6.952380952380952e-06, "loss": 42.5455, "step": 2460 }, { "epoch": 58.59701492537313, "grad_norm": 17.790040969848633, "learning_rate": 6.9484126984126985e-06, "loss": 45.0774, "step": 2461 }, { "epoch": 58.62089552238806, "grad_norm": 24.100841522216797, "learning_rate": 6.944444444444445e-06, "loss": 44.7616, "step": 2462 }, { "epoch": 58.644776119402984, "grad_norm": 17.18956184387207, "learning_rate": 6.940476190476191e-06, "loss": 44.6506, "step": 2463 }, { "epoch": 58.668656716417914, "grad_norm": 25.185026168823242, "learning_rate": 6.936507936507937e-06, "loss": 44.9502, "step": 2464 }, { "epoch": 58.69253731343284, "grad_norm": 22.515111923217773, "learning_rate": 6.932539682539683e-06, "loss": 43.802, "step": 2465 }, { "epoch": 58.71641791044776, "grad_norm": 26.777843475341797, "learning_rate": 6.928571428571429e-06, "loss": 44.0202, "step": 2466 }, { "epoch": 58.74029850746269, "grad_norm": 22.73821449279785, "learning_rate": 6.924603174603175e-06, "loss": 44.1536, "step": 2467 }, { "epoch": 58.76417910447761, "grad_norm": 20.890169143676758, "learning_rate": 6.920634920634921e-06, "loss": 43.6729, "step": 2468 }, { "epoch": 58.788059701492536, "grad_norm": 21.856304168701172, "learning_rate": 6.916666666666667e-06, "loss": 44.5367, "step": 2469 }, { "epoch": 58.811940298507466, "grad_norm": 23.25510025024414, "learning_rate": 6.912698412698413e-06, "loss": 44.8127, "step": 2470 }, { "epoch": 58.83582089552239, "grad_norm": 26.32556915283203, "learning_rate": 6.908730158730159e-06, "loss": 44.8174, "step": 2471 }, { "epoch": 58.85970149253731, "grad_norm": 17.11307716369629, "learning_rate": 6.9047619047619055e-06, "loss": 42.9014, "step": 2472 }, { "epoch": 58.88358208955224, "grad_norm": 22.380199432373047, "learning_rate": 6.900793650793651e-06, "loss": 43.9276, "step": 2473 }, { "epoch": 58.907462686567165, "grad_norm": 17.855484008789062, "learning_rate": 6.896825396825397e-06, "loss": 43.5554, "step": 2474 }, { "epoch": 58.93134328358209, "grad_norm": 18.112934112548828, "learning_rate": 6.892857142857143e-06, "loss": 43.1567, "step": 2475 }, { "epoch": 58.95522388059702, "grad_norm": 21.682844161987305, "learning_rate": 6.88888888888889e-06, "loss": 44.4182, "step": 2476 }, { "epoch": 58.97910447761194, "grad_norm": 22.78960609436035, "learning_rate": 6.884920634920635e-06, "loss": 44.5085, "step": 2477 }, { "epoch": 59.0, "grad_norm": 26.30175018310547, "learning_rate": 6.880952380952381e-06, "loss": 38.3404, "step": 2478 }, { "epoch": 59.02388059701492, "grad_norm": 23.714672088623047, "learning_rate": 6.8769841269841275e-06, "loss": 44.131, "step": 2479 }, { "epoch": 59.04776119402985, "grad_norm": 21.14238929748535, "learning_rate": 6.873015873015874e-06, "loss": 44.0208, "step": 2480 }, { "epoch": 59.071641791044776, "grad_norm": 25.2884464263916, "learning_rate": 6.86904761904762e-06, "loss": 43.4771, "step": 2481 }, { "epoch": 59.0955223880597, "grad_norm": 21.62645149230957, "learning_rate": 6.8650793650793654e-06, "loss": 43.3618, "step": 2482 }, { "epoch": 59.11940298507463, "grad_norm": 21.61750602722168, "learning_rate": 6.861111111111112e-06, "loss": 45.1006, "step": 2483 }, { "epoch": 59.14328358208955, "grad_norm": 25.124187469482422, "learning_rate": 6.857142857142858e-06, "loss": 44.2154, "step": 2484 }, { "epoch": 59.167164179104475, "grad_norm": 21.88067626953125, "learning_rate": 6.853174603174604e-06, "loss": 44.6725, "step": 2485 }, { "epoch": 59.191044776119405, "grad_norm": 26.95779037475586, "learning_rate": 6.8492063492063496e-06, "loss": 43.2419, "step": 2486 }, { "epoch": 59.21492537313433, "grad_norm": 18.35979461669922, "learning_rate": 6.845238095238096e-06, "loss": 42.7172, "step": 2487 }, { "epoch": 59.23880597014925, "grad_norm": 27.409034729003906, "learning_rate": 6.841269841269842e-06, "loss": 44.7929, "step": 2488 }, { "epoch": 59.26268656716418, "grad_norm": 23.257780075073242, "learning_rate": 6.837301587301588e-06, "loss": 45.0742, "step": 2489 }, { "epoch": 59.286567164179104, "grad_norm": 23.64436912536621, "learning_rate": 6.833333333333334e-06, "loss": 45.3608, "step": 2490 }, { "epoch": 59.31044776119403, "grad_norm": 19.836320877075195, "learning_rate": 6.82936507936508e-06, "loss": 43.3152, "step": 2491 }, { "epoch": 59.33432835820896, "grad_norm": 23.7291259765625, "learning_rate": 6.825396825396826e-06, "loss": 45.0224, "step": 2492 }, { "epoch": 59.35820895522388, "grad_norm": 22.776365280151367, "learning_rate": 6.8214285714285724e-06, "loss": 44.0495, "step": 2493 }, { "epoch": 59.3820895522388, "grad_norm": 19.556560516357422, "learning_rate": 6.817460317460318e-06, "loss": 43.0716, "step": 2494 }, { "epoch": 59.40597014925373, "grad_norm": 19.592493057250977, "learning_rate": 6.813492063492064e-06, "loss": 43.8102, "step": 2495 }, { "epoch": 59.429850746268656, "grad_norm": 18.4060001373291, "learning_rate": 6.80952380952381e-06, "loss": 42.5119, "step": 2496 }, { "epoch": 59.45373134328358, "grad_norm": 18.86701202392578, "learning_rate": 6.8055555555555566e-06, "loss": 42.7576, "step": 2497 }, { "epoch": 59.47761194029851, "grad_norm": 19.602235794067383, "learning_rate": 6.801587301587303e-06, "loss": 44.8432, "step": 2498 }, { "epoch": 59.50149253731343, "grad_norm": NaN, "learning_rate": 6.797619047619048e-06, "loss": 77.3141, "step": 2499 }, { "epoch": 59.525373134328355, "grad_norm": 18.09695816040039, "learning_rate": 6.797619047619048e-06, "loss": 44.5263, "step": 2500 }, { "epoch": 59.549253731343285, "grad_norm": 20.728633880615234, "learning_rate": 6.7936507936507944e-06, "loss": 45.2417, "step": 2501 }, { "epoch": 59.57313432835821, "grad_norm": 19.164405822753906, "learning_rate": 6.789682539682541e-06, "loss": 44.1673, "step": 2502 }, { "epoch": 59.59701492537313, "grad_norm": 17.986604690551758, "learning_rate": 6.785714285714287e-06, "loss": 44.9311, "step": 2503 }, { "epoch": 59.62089552238806, "grad_norm": 21.212223052978516, "learning_rate": 6.781746031746032e-06, "loss": 43.9186, "step": 2504 }, { "epoch": 59.644776119402984, "grad_norm": 19.34587287902832, "learning_rate": 6.777777777777779e-06, "loss": 43.9205, "step": 2505 }, { "epoch": 59.668656716417914, "grad_norm": 21.107257843017578, "learning_rate": 6.773809523809525e-06, "loss": 43.7522, "step": 2506 }, { "epoch": 59.69253731343284, "grad_norm": 19.897724151611328, "learning_rate": 6.769841269841271e-06, "loss": 43.7826, "step": 2507 }, { "epoch": 59.71641791044776, "grad_norm": 22.272457122802734, "learning_rate": 6.7658730158730165e-06, "loss": 44.4984, "step": 2508 }, { "epoch": 59.74029850746269, "grad_norm": 20.087844848632812, "learning_rate": 6.761904761904763e-06, "loss": 43.6682, "step": 2509 }, { "epoch": 59.76417910447761, "grad_norm": 22.083215713500977, "learning_rate": 6.757936507936509e-06, "loss": 43.1799, "step": 2510 }, { "epoch": 59.788059701492536, "grad_norm": 18.583234786987305, "learning_rate": 6.753968253968255e-06, "loss": 44.9017, "step": 2511 }, { "epoch": 59.811940298507466, "grad_norm": 20.20134735107422, "learning_rate": 6.750000000000001e-06, "loss": 44.1051, "step": 2512 }, { "epoch": 59.83582089552239, "grad_norm": 20.973419189453125, "learning_rate": 6.746031746031747e-06, "loss": 44.138, "step": 2513 }, { "epoch": 59.85970149253731, "grad_norm": 15.97662353515625, "learning_rate": 6.742063492063493e-06, "loss": 44.9675, "step": 2514 }, { "epoch": 59.88358208955224, "grad_norm": 23.126541137695312, "learning_rate": 6.738095238095239e-06, "loss": 44.4417, "step": 2515 }, { "epoch": 59.907462686567165, "grad_norm": 19.715782165527344, "learning_rate": 6.7341269841269856e-06, "loss": 44.768, "step": 2516 }, { "epoch": 59.93134328358209, "grad_norm": 27.000070571899414, "learning_rate": 6.730158730158731e-06, "loss": 45.0485, "step": 2517 }, { "epoch": 59.95522388059702, "grad_norm": 21.746152877807617, "learning_rate": 6.726190476190477e-06, "loss": 44.5848, "step": 2518 }, { "epoch": 59.97910447761194, "grad_norm": 17.86555290222168, "learning_rate": 6.7222222222222235e-06, "loss": 44.0213, "step": 2519 }, { "epoch": 60.0, "grad_norm": 20.369977951049805, "learning_rate": 6.718253968253968e-06, "loss": 38.2947, "step": 2520 }, { "epoch": 60.0, "step": 2520, "total_flos": 1.2389502240404026e+17, "train_loss": 14.973776844569615, "train_runtime": 26580.6785, "train_samples_per_second": 12.081, "train_steps_per_second": 0.095 }, { "epoch": 60.02388059701492, "grad_norm": 22.268356323242188, "learning_rate": 1e-05, "loss": 43.4551, "step": 2521 }, { "epoch": 60.04776119402985, "grad_norm": Infinity, "learning_rate": 9.99702380952381e-06, "loss": 49.1255, "step": 2522 }, { "epoch": 60.071641791044776, "grad_norm": 202.42481994628906, "learning_rate": 9.99702380952381e-06, "loss": 48.2461, "step": 2523 }, { "epoch": 60.0955223880597, "grad_norm": 190.5668182373047, "learning_rate": 9.99404761904762e-06, "loss": 49.3017, "step": 2524 }, { "epoch": 60.11940298507463, "grad_norm": 69.69708251953125, "learning_rate": 9.99107142857143e-06, "loss": 47.4179, "step": 2525 }, { "epoch": 60.14328358208955, "grad_norm": 94.14574432373047, "learning_rate": 9.988095238095239e-06, "loss": 46.2633, "step": 2526 }, { "epoch": 60.167164179104475, "grad_norm": 59.666481018066406, "learning_rate": 9.985119047619048e-06, "loss": 45.9356, "step": 2527 }, { "epoch": 60.191044776119405, "grad_norm": 66.49242401123047, "learning_rate": 9.982142857142858e-06, "loss": 46.0376, "step": 2528 }, { "epoch": 60.21492537313433, "grad_norm": 46.52798080444336, "learning_rate": 9.979166666666668e-06, "loss": 42.4879, "step": 2529 }, { "epoch": 60.23880597014925, "grad_norm": 41.28635025024414, "learning_rate": 9.976190476190477e-06, "loss": 43.5567, "step": 2530 }, { "epoch": 60.26268656716418, "grad_norm": 48.749210357666016, "learning_rate": 9.973214285714287e-06, "loss": 44.6395, "step": 2531 }, { "epoch": 60.286567164179104, "grad_norm": 40.452842712402344, "learning_rate": 9.970238095238096e-06, "loss": 44.65, "step": 2532 }, { "epoch": 60.31044776119403, "grad_norm": 41.35492706298828, "learning_rate": 9.967261904761905e-06, "loss": 44.8214, "step": 2533 }, { "epoch": 60.33432835820896, "grad_norm": 32.109527587890625, "learning_rate": 9.964285714285714e-06, "loss": 46.1763, "step": 2534 }, { "epoch": 60.35820895522388, "grad_norm": 28.22223472595215, "learning_rate": 9.961309523809525e-06, "loss": 45.4326, "step": 2535 }, { "epoch": 60.3820895522388, "grad_norm": 42.460872650146484, "learning_rate": 9.958333333333334e-06, "loss": 44.2028, "step": 2536 }, { "epoch": 60.40597014925373, "grad_norm": 24.426565170288086, "learning_rate": 9.955357142857143e-06, "loss": 44.141, "step": 2537 }, { "epoch": 60.429850746268656, "grad_norm": 30.63353729248047, "learning_rate": 9.952380952380954e-06, "loss": 44.1465, "step": 2538 }, { "epoch": 60.45373134328358, "grad_norm": 24.15091323852539, "learning_rate": 9.949404761904763e-06, "loss": 44.4579, "step": 2539 }, { "epoch": 60.47761194029851, "grad_norm": 30.757854461669922, "learning_rate": 9.946428571428572e-06, "loss": 44.251, "step": 2540 }, { "epoch": 60.50149253731343, "grad_norm": 24.651243209838867, "learning_rate": 9.943452380952381e-06, "loss": 45.6042, "step": 2541 }, { "epoch": 60.525373134328355, "grad_norm": 23.414180755615234, "learning_rate": 9.940476190476192e-06, "loss": 43.8686, "step": 2542 }, { "epoch": 60.549253731343285, "grad_norm": 29.41202735900879, "learning_rate": 9.937500000000001e-06, "loss": 45.111, "step": 2543 }, { "epoch": 60.57313432835821, "grad_norm": 25.947559356689453, "learning_rate": 9.93452380952381e-06, "loss": 43.6131, "step": 2544 }, { "epoch": 60.59701492537313, "grad_norm": 30.613439559936523, "learning_rate": 9.93154761904762e-06, "loss": 44.9413, "step": 2545 }, { "epoch": 60.62089552238806, "grad_norm": 28.363725662231445, "learning_rate": 9.92857142857143e-06, "loss": 43.3202, "step": 2546 }, { "epoch": 60.644776119402984, "grad_norm": 24.05493927001953, "learning_rate": 9.925595238095239e-06, "loss": 45.434, "step": 2547 }, { "epoch": 60.668656716417914, "grad_norm": 30.285140991210938, "learning_rate": 9.922619047619048e-06, "loss": 44.2144, "step": 2548 }, { "epoch": 60.69253731343284, "grad_norm": 24.850799560546875, "learning_rate": 9.919642857142859e-06, "loss": 43.4151, "step": 2549 }, { "epoch": 60.71641791044776, "grad_norm": 23.70008087158203, "learning_rate": 9.916666666666668e-06, "loss": 44.0035, "step": 2550 }, { "epoch": 60.74029850746269, "grad_norm": 29.343976974487305, "learning_rate": 9.913690476190477e-06, "loss": 44.9651, "step": 2551 }, { "epoch": 60.76417910447761, "grad_norm": 19.914142608642578, "learning_rate": 9.910714285714288e-06, "loss": 44.3699, "step": 2552 }, { "epoch": 60.788059701492536, "grad_norm": 23.31308937072754, "learning_rate": 9.907738095238097e-06, "loss": 45.7724, "step": 2553 }, { "epoch": 60.811940298507466, "grad_norm": 26.089832305908203, "learning_rate": 9.904761904761906e-06, "loss": 44.9611, "step": 2554 }, { "epoch": 60.83582089552239, "grad_norm": 22.15082550048828, "learning_rate": 9.901785714285715e-06, "loss": 43.1386, "step": 2555 }, { "epoch": 60.85970149253731, "grad_norm": 30.4031925201416, "learning_rate": 9.898809523809525e-06, "loss": 44.324, "step": 2556 }, { "epoch": 60.88358208955224, "grad_norm": 22.16672134399414, "learning_rate": 9.895833333333334e-06, "loss": 43.6286, "step": 2557 }, { "epoch": 60.907462686567165, "grad_norm": 21.490468978881836, "learning_rate": 9.892857142857143e-06, "loss": 42.8494, "step": 2558 }, { "epoch": 60.93134328358209, "grad_norm": 28.823944091796875, "learning_rate": 9.889880952380954e-06, "loss": 42.9944, "step": 2559 }, { "epoch": 60.95522388059702, "grad_norm": 19.950031280517578, "learning_rate": 9.886904761904763e-06, "loss": 44.7105, "step": 2560 }, { "epoch": 60.97910447761194, "grad_norm": 31.069194793701172, "learning_rate": 9.883928571428572e-06, "loss": 43.6064, "step": 2561 }, { "epoch": 61.0, "grad_norm": 19.411388397216797, "learning_rate": 9.880952380952381e-06, "loss": 38.8114, "step": 2562 }, { "epoch": 61.02388059701492, "grad_norm": 26.66782569885254, "learning_rate": 9.877976190476192e-06, "loss": 43.0697, "step": 2563 }, { "epoch": 61.04776119402985, "grad_norm": 25.78309440612793, "learning_rate": 9.875000000000001e-06, "loss": 43.8682, "step": 2564 }, { "epoch": 61.071641791044776, "grad_norm": 19.312572479248047, "learning_rate": 9.87202380952381e-06, "loss": 44.7347, "step": 2565 }, { "epoch": 61.0955223880597, "grad_norm": 33.189598083496094, "learning_rate": 9.869047619047621e-06, "loss": 43.6253, "step": 2566 }, { "epoch": 61.11940298507463, "grad_norm": 27.351518630981445, "learning_rate": 9.86607142857143e-06, "loss": 43.6716, "step": 2567 }, { "epoch": 61.14328358208955, "grad_norm": 21.69624900817871, "learning_rate": 9.863095238095239e-06, "loss": 44.2277, "step": 2568 }, { "epoch": 61.167164179104475, "grad_norm": 27.58108901977539, "learning_rate": 9.860119047619048e-06, "loss": 44.3117, "step": 2569 }, { "epoch": 61.191044776119405, "grad_norm": 18.802303314208984, "learning_rate": 9.857142857142859e-06, "loss": 44.4119, "step": 2570 }, { "epoch": 61.21492537313433, "grad_norm": 21.36333656311035, "learning_rate": 9.854166666666668e-06, "loss": 45.0113, "step": 2571 }, { "epoch": 61.23880597014925, "grad_norm": 23.86919593811035, "learning_rate": 9.851190476190477e-06, "loss": 43.8577, "step": 2572 }, { "epoch": 61.26268656716418, "grad_norm": 20.259685516357422, "learning_rate": 9.848214285714288e-06, "loss": 43.921, "step": 2573 }, { "epoch": 61.286567164179104, "grad_norm": 29.262182235717773, "learning_rate": 9.845238095238097e-06, "loss": 44.1589, "step": 2574 }, { "epoch": 61.31044776119403, "grad_norm": 21.13313102722168, "learning_rate": 9.842261904761906e-06, "loss": 44.3854, "step": 2575 }, { "epoch": 61.33432835820896, "grad_norm": 23.83458137512207, "learning_rate": 9.839285714285715e-06, "loss": 43.5571, "step": 2576 }, { "epoch": 61.35820895522388, "grad_norm": 18.778934478759766, "learning_rate": 9.836309523809524e-06, "loss": 43.827, "step": 2577 }, { "epoch": 61.3820895522388, "grad_norm": 22.37734031677246, "learning_rate": 9.833333333333333e-06, "loss": 45.207, "step": 2578 }, { "epoch": 61.40597014925373, "grad_norm": 25.046817779541016, "learning_rate": 9.830357142857144e-06, "loss": 43.7649, "step": 2579 }, { "epoch": 61.429850746268656, "grad_norm": 21.867618560791016, "learning_rate": 9.827380952380953e-06, "loss": 43.7025, "step": 2580 }, { "epoch": 61.45373134328358, "grad_norm": 22.729969024658203, "learning_rate": 9.824404761904762e-06, "loss": 44.3454, "step": 2581 }, { "epoch": 61.47761194029851, "grad_norm": 23.03755760192871, "learning_rate": 9.821428571428573e-06, "loss": 43.6942, "step": 2582 }, { "epoch": 61.50149253731343, "grad_norm": 20.736244201660156, "learning_rate": 9.818452380952382e-06, "loss": 44.6758, "step": 2583 }, { "epoch": 61.525373134328355, "grad_norm": 17.63121795654297, "learning_rate": 9.81547619047619e-06, "loss": 43.3541, "step": 2584 }, { "epoch": 61.549253731343285, "grad_norm": 18.32825469970703, "learning_rate": 9.8125e-06, "loss": 43.8749, "step": 2585 }, { "epoch": 61.57313432835821, "grad_norm": 19.190811157226562, "learning_rate": 9.80952380952381e-06, "loss": 44.1191, "step": 2586 }, { "epoch": 61.59701492537313, "grad_norm": 17.29913330078125, "learning_rate": 9.80654761904762e-06, "loss": 44.5885, "step": 2587 }, { "epoch": 61.62089552238806, "grad_norm": 21.12825584411621, "learning_rate": 9.803571428571428e-06, "loss": 43.9206, "step": 2588 }, { "epoch": 61.644776119402984, "grad_norm": 26.83173942565918, "learning_rate": 9.80059523809524e-06, "loss": 44.7224, "step": 2589 }, { "epoch": 61.668656716417914, "grad_norm": 18.34907341003418, "learning_rate": 9.797619047619048e-06, "loss": 44.3625, "step": 2590 }, { "epoch": 61.69253731343284, "grad_norm": 18.709016799926758, "learning_rate": 9.794642857142857e-06, "loss": 43.2573, "step": 2591 }, { "epoch": 61.71641791044776, "grad_norm": 24.946210861206055, "learning_rate": 9.791666666666666e-06, "loss": 43.4754, "step": 2592 }, { "epoch": 61.74029850746269, "grad_norm": 22.253814697265625, "learning_rate": 9.788690476190477e-06, "loss": 43.7911, "step": 2593 }, { "epoch": 61.76417910447761, "grad_norm": 21.44365692138672, "learning_rate": 9.785714285714286e-06, "loss": 44.9724, "step": 2594 }, { "epoch": 61.788059701492536, "grad_norm": 19.002973556518555, "learning_rate": 9.782738095238095e-06, "loss": 44.5918, "step": 2595 }, { "epoch": 61.811940298507466, "grad_norm": 23.515329360961914, "learning_rate": 9.779761904761906e-06, "loss": 43.7888, "step": 2596 }, { "epoch": 61.83582089552239, "grad_norm": 22.531938552856445, "learning_rate": 9.776785714285715e-06, "loss": 43.3796, "step": 2597 }, { "epoch": 61.85970149253731, "grad_norm": 21.187646865844727, "learning_rate": 9.773809523809524e-06, "loss": 44.5443, "step": 2598 }, { "epoch": 61.88358208955224, "grad_norm": 16.221466064453125, "learning_rate": 9.770833333333333e-06, "loss": 43.3915, "step": 2599 }, { "epoch": 61.907462686567165, "grad_norm": 26.778657913208008, "learning_rate": 9.767857142857144e-06, "loss": 43.5025, "step": 2600 }, { "epoch": 61.93134328358209, "grad_norm": 22.768152236938477, "learning_rate": 9.764880952380953e-06, "loss": 44.5111, "step": 2601 }, { "epoch": 61.95522388059702, "grad_norm": 27.74826431274414, "learning_rate": 9.761904761904762e-06, "loss": 44.5974, "step": 2602 }, { "epoch": 61.97910447761194, "grad_norm": 22.05244255065918, "learning_rate": 9.758928571428573e-06, "loss": 43.5759, "step": 2603 }, { "epoch": 62.0, "grad_norm": 19.424118041992188, "learning_rate": 9.755952380952382e-06, "loss": 39.0078, "step": 2604 }, { "epoch": 62.02388059701492, "grad_norm": 26.683176040649414, "learning_rate": 9.75297619047619e-06, "loss": 43.6656, "step": 2605 }, { "epoch": 62.04776119402985, "grad_norm": 20.35538673400879, "learning_rate": 9.75e-06, "loss": 45.2448, "step": 2606 }, { "epoch": 62.071641791044776, "grad_norm": 20.601633071899414, "learning_rate": 9.74702380952381e-06, "loss": 45.3538, "step": 2607 }, { "epoch": 62.0955223880597, "grad_norm": 17.472143173217773, "learning_rate": 9.74404761904762e-06, "loss": 44.3077, "step": 2608 }, { "epoch": 62.11940298507463, "grad_norm": 21.365177154541016, "learning_rate": 9.741071428571429e-06, "loss": 43.7227, "step": 2609 }, { "epoch": 62.14328358208955, "grad_norm": 25.560060501098633, "learning_rate": 9.73809523809524e-06, "loss": 44.6434, "step": 2610 }, { "epoch": 62.167164179104475, "grad_norm": 20.327442169189453, "learning_rate": 9.735119047619048e-06, "loss": 43.5369, "step": 2611 }, { "epoch": 62.191044776119405, "grad_norm": 19.99593734741211, "learning_rate": 9.732142857142858e-06, "loss": 44.1446, "step": 2612 }, { "epoch": 62.21492537313433, "grad_norm": 19.648574829101562, "learning_rate": 9.729166666666667e-06, "loss": 43.4092, "step": 2613 }, { "epoch": 62.23880597014925, "grad_norm": 26.606019973754883, "learning_rate": 9.726190476190477e-06, "loss": 44.3531, "step": 2614 }, { "epoch": 62.26268656716418, "grad_norm": 27.989334106445312, "learning_rate": 9.723214285714286e-06, "loss": 44.3962, "step": 2615 }, { "epoch": 62.286567164179104, "grad_norm": 22.545223236083984, "learning_rate": 9.720238095238095e-06, "loss": 42.8718, "step": 2616 }, { "epoch": 62.31044776119403, "grad_norm": 26.62592315673828, "learning_rate": 9.717261904761906e-06, "loss": 43.004, "step": 2617 }, { "epoch": 62.33432835820896, "grad_norm": 26.52608299255371, "learning_rate": 9.714285714285715e-06, "loss": 43.2152, "step": 2618 }, { "epoch": 62.35820895522388, "grad_norm": 20.17901611328125, "learning_rate": 9.711309523809524e-06, "loss": 43.2228, "step": 2619 }, { "epoch": 62.3820895522388, "grad_norm": 30.335596084594727, "learning_rate": 9.708333333333333e-06, "loss": 45.0079, "step": 2620 }, { "epoch": 62.40597014925373, "grad_norm": 26.02729606628418, "learning_rate": 9.705357142857144e-06, "loss": 43.3155, "step": 2621 }, { "epoch": 62.429850746268656, "grad_norm": 25.07903480529785, "learning_rate": 9.702380952380953e-06, "loss": 45.2378, "step": 2622 }, { "epoch": 62.45373134328358, "grad_norm": 28.89750099182129, "learning_rate": 9.699404761904762e-06, "loss": 42.2074, "step": 2623 }, { "epoch": 62.47761194029851, "grad_norm": 20.475128173828125, "learning_rate": 9.696428571428573e-06, "loss": 43.8792, "step": 2624 }, { "epoch": 62.50149253731343, "grad_norm": 26.103612899780273, "learning_rate": 9.693452380952382e-06, "loss": 44.961, "step": 2625 }, { "epoch": 62.525373134328355, "grad_norm": 21.666906356811523, "learning_rate": 9.690476190476191e-06, "loss": 43.4776, "step": 2626 }, { "epoch": 62.549253731343285, "grad_norm": 22.16206932067871, "learning_rate": 9.6875e-06, "loss": 43.1076, "step": 2627 }, { "epoch": 62.57313432835821, "grad_norm": 18.17336082458496, "learning_rate": 9.68452380952381e-06, "loss": 43.7046, "step": 2628 }, { "epoch": 62.59701492537313, "grad_norm": 25.36472511291504, "learning_rate": 9.68154761904762e-06, "loss": 44.8569, "step": 2629 }, { "epoch": 62.62089552238806, "grad_norm": 18.750511169433594, "learning_rate": 9.678571428571429e-06, "loss": 43.73, "step": 2630 }, { "epoch": 62.644776119402984, "grad_norm": 18.174638748168945, "learning_rate": 9.67559523809524e-06, "loss": 44.4229, "step": 2631 }, { "epoch": 62.668656716417914, "grad_norm": 17.8627872467041, "learning_rate": 9.672619047619049e-06, "loss": 44.9343, "step": 2632 }, { "epoch": 62.69253731343284, "grad_norm": 28.161239624023438, "learning_rate": 9.669642857142858e-06, "loss": 43.7649, "step": 2633 }, { "epoch": 62.71641791044776, "grad_norm": 27.701793670654297, "learning_rate": 9.666666666666667e-06, "loss": 43.7122, "step": 2634 }, { "epoch": 62.74029850746269, "grad_norm": 18.659244537353516, "learning_rate": 9.663690476190477e-06, "loss": 43.5179, "step": 2635 }, { "epoch": 62.76417910447761, "grad_norm": 35.163169860839844, "learning_rate": 9.660714285714287e-06, "loss": 44.4568, "step": 2636 }, { "epoch": 62.788059701492536, "grad_norm": NaN, "learning_rate": 9.657738095238096e-06, "loss": 42.4459, "step": 2637 }, { "epoch": 62.811940298507466, "grad_norm": 26.87259292602539, "learning_rate": 9.657738095238096e-06, "loss": 42.7683, "step": 2638 }, { "epoch": 62.83582089552239, "grad_norm": 31.837942123413086, "learning_rate": 9.654761904761906e-06, "loss": 44.2405, "step": 2639 }, { "epoch": 62.85970149253731, "grad_norm": 24.40672492980957, "learning_rate": 9.651785714285715e-06, "loss": 44.4058, "step": 2640 }, { "epoch": 62.88358208955224, "grad_norm": 29.0338134765625, "learning_rate": 9.648809523809524e-06, "loss": 43.405, "step": 2641 }, { "epoch": 62.907462686567165, "grad_norm": 28.022174835205078, "learning_rate": 9.645833333333333e-06, "loss": 43.6733, "step": 2642 }, { "epoch": 62.93134328358209, "grad_norm": 19.517061233520508, "learning_rate": 9.642857142857144e-06, "loss": 44.2386, "step": 2643 }, { "epoch": 62.95522388059702, "grad_norm": 22.2393741607666, "learning_rate": 9.639880952380953e-06, "loss": 45.1874, "step": 2644 }, { "epoch": 62.97910447761194, "grad_norm": 27.02622413635254, "learning_rate": 9.636904761904762e-06, "loss": 43.8541, "step": 2645 }, { "epoch": 63.0, "grad_norm": 20.51211929321289, "learning_rate": 9.633928571428573e-06, "loss": 39.6487, "step": 2646 }, { "epoch": 63.02388059701492, "grad_norm": 24.02116584777832, "learning_rate": 9.630952380952382e-06, "loss": 44.5685, "step": 2647 }, { "epoch": 63.04776119402985, "grad_norm": 30.00434112548828, "learning_rate": 9.627976190476191e-06, "loss": 43.2549, "step": 2648 }, { "epoch": 63.071641791044776, "grad_norm": 23.16147804260254, "learning_rate": 9.625e-06, "loss": 44.1254, "step": 2649 }, { "epoch": 63.0955223880597, "grad_norm": 30.86275291442871, "learning_rate": 9.622023809523811e-06, "loss": 43.4804, "step": 2650 }, { "epoch": 63.11940298507463, "grad_norm": 27.942575454711914, "learning_rate": 9.61904761904762e-06, "loss": 44.4437, "step": 2651 }, { "epoch": 63.14328358208955, "grad_norm": 22.330169677734375, "learning_rate": 9.616071428571429e-06, "loss": 44.1067, "step": 2652 }, { "epoch": 63.167164179104475, "grad_norm": 27.878795623779297, "learning_rate": 9.61309523809524e-06, "loss": 42.5768, "step": 2653 }, { "epoch": 63.191044776119405, "grad_norm": 23.200098037719727, "learning_rate": 9.610119047619049e-06, "loss": 43.5906, "step": 2654 }, { "epoch": 63.21492537313433, "grad_norm": 23.872238159179688, "learning_rate": 9.607142857142858e-06, "loss": 43.8177, "step": 2655 }, { "epoch": 63.23880597014925, "grad_norm": 31.89397430419922, "learning_rate": 9.604166666666669e-06, "loss": 43.5719, "step": 2656 }, { "epoch": 63.26268656716418, "grad_norm": 24.745256423950195, "learning_rate": 9.601190476190478e-06, "loss": 43.1085, "step": 2657 }, { "epoch": 63.286567164179104, "grad_norm": 31.859682083129883, "learning_rate": 9.598214285714287e-06, "loss": 42.8871, "step": 2658 }, { "epoch": 63.31044776119403, "grad_norm": 25.792551040649414, "learning_rate": 9.595238095238096e-06, "loss": 42.7027, "step": 2659 }, { "epoch": 63.33432835820896, "grad_norm": 25.225967407226562, "learning_rate": 9.592261904761906e-06, "loss": 43.0075, "step": 2660 }, { "epoch": 63.35820895522388, "grad_norm": 27.146207809448242, "learning_rate": 9.589285714285716e-06, "loss": 44.3992, "step": 2661 }, { "epoch": 63.3820895522388, "grad_norm": 16.27069664001465, "learning_rate": 9.586309523809525e-06, "loss": 44.1708, "step": 2662 }, { "epoch": 63.40597014925373, "grad_norm": 34.79555892944336, "learning_rate": 9.583333333333335e-06, "loss": 44.4863, "step": 2663 }, { "epoch": 63.429850746268656, "grad_norm": 23.31925392150879, "learning_rate": 9.580357142857144e-06, "loss": 44.3615, "step": 2664 }, { "epoch": 63.45373134328358, "grad_norm": 28.239566802978516, "learning_rate": 9.577380952380953e-06, "loss": 45.042, "step": 2665 }, { "epoch": 63.47761194029851, "grad_norm": 25.107566833496094, "learning_rate": 9.574404761904762e-06, "loss": 44.4372, "step": 2666 }, { "epoch": 63.50149253731343, "grad_norm": 20.601322174072266, "learning_rate": 9.571428571428573e-06, "loss": 43.5807, "step": 2667 }, { "epoch": 63.525373134328355, "grad_norm": 34.94065475463867, "learning_rate": 9.568452380952382e-06, "loss": 42.8904, "step": 2668 }, { "epoch": 63.549253731343285, "grad_norm": 26.336591720581055, "learning_rate": 9.565476190476191e-06, "loss": 44.1117, "step": 2669 }, { "epoch": 63.57313432835821, "grad_norm": 35.81476974487305, "learning_rate": 9.562500000000002e-06, "loss": 44.847, "step": 2670 }, { "epoch": 63.59701492537313, "grad_norm": 21.011463165283203, "learning_rate": 9.559523809523811e-06, "loss": 43.9457, "step": 2671 }, { "epoch": 63.62089552238806, "grad_norm": 36.32665252685547, "learning_rate": 9.55654761904762e-06, "loss": 43.4279, "step": 2672 }, { "epoch": 63.644776119402984, "grad_norm": 21.384214401245117, "learning_rate": 9.55357142857143e-06, "loss": 45.3947, "step": 2673 }, { "epoch": 63.668656716417914, "grad_norm": 38.893680572509766, "learning_rate": 9.55059523809524e-06, "loss": 43.9397, "step": 2674 }, { "epoch": 63.69253731343284, "grad_norm": 26.085948944091797, "learning_rate": 9.547619047619049e-06, "loss": 43.118, "step": 2675 }, { "epoch": 63.71641791044776, "grad_norm": 35.67828369140625, "learning_rate": 9.544642857142858e-06, "loss": 44.8236, "step": 2676 }, { "epoch": 63.74029850746269, "grad_norm": 25.065685272216797, "learning_rate": 9.541666666666669e-06, "loss": 43.8344, "step": 2677 }, { "epoch": 63.76417910447761, "grad_norm": 26.32991600036621, "learning_rate": 9.538690476190478e-06, "loss": 45.5309, "step": 2678 }, { "epoch": 63.788059701492536, "grad_norm": 26.833250045776367, "learning_rate": 9.535714285714287e-06, "loss": 43.7626, "step": 2679 }, { "epoch": 63.811940298507466, "grad_norm": 23.64604949951172, "learning_rate": 9.532738095238096e-06, "loss": 43.7545, "step": 2680 }, { "epoch": 63.83582089552239, "grad_norm": 24.57122230529785, "learning_rate": 9.529761904761905e-06, "loss": 43.5666, "step": 2681 }, { "epoch": 63.85970149253731, "grad_norm": 21.429603576660156, "learning_rate": 9.526785714285714e-06, "loss": 44.8421, "step": 2682 }, { "epoch": 63.88358208955224, "grad_norm": NaN, "learning_rate": 9.523809523809525e-06, "loss": 77.6969, "step": 2683 }, { "epoch": 63.907462686567165, "grad_norm": 25.843442916870117, "learning_rate": 9.523809523809525e-06, "loss": 42.4353, "step": 2684 }, { "epoch": 63.93134328358209, "grad_norm": 26.74856185913086, "learning_rate": 9.520833333333334e-06, "loss": 45.0478, "step": 2685 }, { "epoch": 63.95522388059702, "grad_norm": 22.9956111907959, "learning_rate": 9.517857142857143e-06, "loss": 44.3383, "step": 2686 }, { "epoch": 63.97910447761194, "grad_norm": 17.62372589111328, "learning_rate": 9.514880952380952e-06, "loss": 43.2765, "step": 2687 }, { "epoch": 64.0, "grad_norm": 21.372318267822266, "learning_rate": 9.511904761904763e-06, "loss": 37.8049, "step": 2688 }, { "epoch": 64.02388059701492, "grad_norm": 23.412595748901367, "learning_rate": 9.508928571428572e-06, "loss": 45.446, "step": 2689 }, { "epoch": 64.04776119402985, "grad_norm": 21.825000762939453, "learning_rate": 9.50595238095238e-06, "loss": 42.871, "step": 2690 }, { "epoch": 64.07164179104478, "grad_norm": 18.50835418701172, "learning_rate": 9.502976190476191e-06, "loss": 43.1485, "step": 2691 }, { "epoch": 64.0955223880597, "grad_norm": 22.428272247314453, "learning_rate": 9.5e-06, "loss": 42.7172, "step": 2692 }, { "epoch": 64.11940298507463, "grad_norm": 19.58050537109375, "learning_rate": 9.49702380952381e-06, "loss": 43.4599, "step": 2693 }, { "epoch": 64.14328358208955, "grad_norm": 21.66231346130371, "learning_rate": 9.494047619047619e-06, "loss": 43.601, "step": 2694 }, { "epoch": 64.16716417910447, "grad_norm": 29.5888729095459, "learning_rate": 9.49107142857143e-06, "loss": 44.8395, "step": 2695 }, { "epoch": 64.1910447761194, "grad_norm": 16.3875675201416, "learning_rate": 9.488095238095238e-06, "loss": 43.8201, "step": 2696 }, { "epoch": 64.21492537313434, "grad_norm": 28.326553344726562, "learning_rate": 9.485119047619047e-06, "loss": 45.1189, "step": 2697 }, { "epoch": 64.23880597014926, "grad_norm": 20.549386978149414, "learning_rate": 9.482142857142858e-06, "loss": 44.0127, "step": 2698 }, { "epoch": 64.26268656716418, "grad_norm": 25.79012680053711, "learning_rate": 9.479166666666667e-06, "loss": 43.0571, "step": 2699 }, { "epoch": 64.2865671641791, "grad_norm": 31.000024795532227, "learning_rate": 9.476190476190476e-06, "loss": 42.4615, "step": 2700 }, { "epoch": 64.31044776119403, "grad_norm": 19.49623680114746, "learning_rate": 9.473214285714285e-06, "loss": 45.6714, "step": 2701 }, { "epoch": 64.33432835820895, "grad_norm": 23.13125991821289, "learning_rate": 9.470238095238096e-06, "loss": 44.1373, "step": 2702 }, { "epoch": 64.35820895522389, "grad_norm": 32.59320068359375, "learning_rate": 9.467261904761905e-06, "loss": 42.5976, "step": 2703 }, { "epoch": 64.38208955223881, "grad_norm": 19.396995544433594, "learning_rate": 9.464285714285714e-06, "loss": 43.9782, "step": 2704 }, { "epoch": 64.40597014925373, "grad_norm": 28.275136947631836, "learning_rate": 9.461309523809525e-06, "loss": 44.3116, "step": 2705 }, { "epoch": 64.42985074626866, "grad_norm": 25.157663345336914, "learning_rate": 9.458333333333334e-06, "loss": 45.07, "step": 2706 }, { "epoch": 64.45373134328358, "grad_norm": 22.684513092041016, "learning_rate": 9.455357142857143e-06, "loss": 44.1489, "step": 2707 }, { "epoch": 64.4776119402985, "grad_norm": 19.41883659362793, "learning_rate": 9.452380952380952e-06, "loss": 43.5031, "step": 2708 }, { "epoch": 64.50149253731344, "grad_norm": 27.202308654785156, "learning_rate": 9.449404761904763e-06, "loss": 44.4811, "step": 2709 }, { "epoch": 64.52537313432836, "grad_norm": 17.535995483398438, "learning_rate": 9.446428571428572e-06, "loss": 43.7648, "step": 2710 }, { "epoch": 64.54925373134328, "grad_norm": 21.47702980041504, "learning_rate": 9.443452380952381e-06, "loss": 42.7421, "step": 2711 }, { "epoch": 64.57313432835821, "grad_norm": 20.23499870300293, "learning_rate": 9.440476190476192e-06, "loss": 43.8339, "step": 2712 }, { "epoch": 64.59701492537313, "grad_norm": 19.41843032836914, "learning_rate": 9.4375e-06, "loss": 44.182, "step": 2713 }, { "epoch": 64.62089552238805, "grad_norm": 22.892518997192383, "learning_rate": 9.43452380952381e-06, "loss": 42.7459, "step": 2714 }, { "epoch": 64.64477611940299, "grad_norm": 25.601083755493164, "learning_rate": 9.431547619047619e-06, "loss": 44.316, "step": 2715 }, { "epoch": 64.66865671641791, "grad_norm": 20.23451042175293, "learning_rate": 9.42857142857143e-06, "loss": 44.6613, "step": 2716 }, { "epoch": 64.69253731343284, "grad_norm": 16.326499938964844, "learning_rate": 9.425595238095239e-06, "loss": 43.9092, "step": 2717 }, { "epoch": 64.71641791044776, "grad_norm": 29.170324325561523, "learning_rate": 9.422619047619048e-06, "loss": 42.957, "step": 2718 }, { "epoch": 64.74029850746268, "grad_norm": 24.257295608520508, "learning_rate": 9.419642857142858e-06, "loss": 44.119, "step": 2719 }, { "epoch": 64.7641791044776, "grad_norm": 21.303083419799805, "learning_rate": 9.416666666666667e-06, "loss": 43.4882, "step": 2720 }, { "epoch": 64.78805970149254, "grad_norm": 20.77082633972168, "learning_rate": 9.413690476190476e-06, "loss": 43.9079, "step": 2721 }, { "epoch": 64.81194029850747, "grad_norm": 24.470279693603516, "learning_rate": 9.410714285714286e-06, "loss": 45.0313, "step": 2722 }, { "epoch": 64.83582089552239, "grad_norm": 22.445308685302734, "learning_rate": 9.407738095238096e-06, "loss": 43.0798, "step": 2723 }, { "epoch": 64.85970149253731, "grad_norm": 27.02490234375, "learning_rate": 9.404761904761905e-06, "loss": 43.1318, "step": 2724 }, { "epoch": 64.88358208955223, "grad_norm": 22.678592681884766, "learning_rate": 9.401785714285714e-06, "loss": 44.1473, "step": 2725 }, { "epoch": 64.90746268656716, "grad_norm": 26.62460708618164, "learning_rate": 9.398809523809525e-06, "loss": 43.2439, "step": 2726 }, { "epoch": 64.9313432835821, "grad_norm": 24.331209182739258, "learning_rate": 9.395833333333334e-06, "loss": 43.517, "step": 2727 }, { "epoch": 64.95522388059702, "grad_norm": 20.00579071044922, "learning_rate": 9.392857142857143e-06, "loss": 44.7679, "step": 2728 }, { "epoch": 64.97910447761194, "grad_norm": 28.423246383666992, "learning_rate": 9.389880952380954e-06, "loss": 43.5171, "step": 2729 }, { "epoch": 65.0, "grad_norm": 22.855792999267578, "learning_rate": 9.386904761904763e-06, "loss": 38.3847, "step": 2730 }, { "epoch": 65.02388059701492, "grad_norm": 22.053749084472656, "learning_rate": 9.383928571428572e-06, "loss": 44.0032, "step": 2731 }, { "epoch": 65.04776119402985, "grad_norm": 24.45530891418457, "learning_rate": 9.380952380952381e-06, "loss": 43.4024, "step": 2732 }, { "epoch": 65.07164179104478, "grad_norm": 24.508438110351562, "learning_rate": 9.377976190476192e-06, "loss": 43.4435, "step": 2733 }, { "epoch": 65.0955223880597, "grad_norm": 22.03391456604004, "learning_rate": 9.375000000000001e-06, "loss": 43.413, "step": 2734 }, { "epoch": 65.11940298507463, "grad_norm": 23.95793342590332, "learning_rate": 9.37202380952381e-06, "loss": 44.1908, "step": 2735 }, { "epoch": 65.14328358208955, "grad_norm": 23.00299072265625, "learning_rate": 9.36904761904762e-06, "loss": 43.9526, "step": 2736 }, { "epoch": 65.16716417910447, "grad_norm": 21.019451141357422, "learning_rate": 9.36607142857143e-06, "loss": 44.114, "step": 2737 }, { "epoch": 65.1910447761194, "grad_norm": 21.974138259887695, "learning_rate": 9.363095238095239e-06, "loss": 43.8516, "step": 2738 }, { "epoch": 65.21492537313434, "grad_norm": 17.860519409179688, "learning_rate": 9.360119047619048e-06, "loss": 44.2829, "step": 2739 }, { "epoch": 65.23880597014926, "grad_norm": 18.76349639892578, "learning_rate": 9.357142857142859e-06, "loss": 42.7555, "step": 2740 }, { "epoch": 65.26268656716418, "grad_norm": 22.4278621673584, "learning_rate": 9.354166666666668e-06, "loss": 43.807, "step": 2741 }, { "epoch": 65.2865671641791, "grad_norm": 17.867431640625, "learning_rate": 9.351190476190477e-06, "loss": 43.474, "step": 2742 }, { "epoch": 65.31044776119403, "grad_norm": 16.060117721557617, "learning_rate": 9.348214285714287e-06, "loss": 44.2362, "step": 2743 }, { "epoch": 65.33432835820895, "grad_norm": 27.756179809570312, "learning_rate": 9.345238095238096e-06, "loss": 43.9566, "step": 2744 }, { "epoch": 65.35820895522389, "grad_norm": 18.937381744384766, "learning_rate": 9.342261904761905e-06, "loss": 42.2936, "step": 2745 }, { "epoch": 65.38208955223881, "grad_norm": 23.91965675354004, "learning_rate": 9.339285714285715e-06, "loss": 43.1194, "step": 2746 }, { "epoch": 65.40597014925373, "grad_norm": 28.585317611694336, "learning_rate": 9.336309523809525e-06, "loss": 43.7419, "step": 2747 }, { "epoch": 65.42985074626866, "grad_norm": 18.788578033447266, "learning_rate": 9.333333333333334e-06, "loss": 43.7811, "step": 2748 }, { "epoch": 65.45373134328358, "grad_norm": 24.84532928466797, "learning_rate": 9.330357142857143e-06, "loss": 44.4898, "step": 2749 }, { "epoch": 65.4776119402985, "grad_norm": 20.880659103393555, "learning_rate": 9.327380952380954e-06, "loss": 44.5627, "step": 2750 }, { "epoch": 65.50149253731344, "grad_norm": 18.502254486083984, "learning_rate": 9.324404761904763e-06, "loss": 43.621, "step": 2751 }, { "epoch": 65.52537313432836, "grad_norm": 23.150991439819336, "learning_rate": 9.321428571428572e-06, "loss": 43.9683, "step": 2752 }, { "epoch": 65.54925373134328, "grad_norm": 20.03653907775879, "learning_rate": 9.318452380952381e-06, "loss": 42.4545, "step": 2753 }, { "epoch": 65.57313432835821, "grad_norm": 24.8642635345459, "learning_rate": 9.315476190476192e-06, "loss": 43.1368, "step": 2754 }, { "epoch": 65.59701492537313, "grad_norm": 19.812273025512695, "learning_rate": 9.312500000000001e-06, "loss": 44.5991, "step": 2755 }, { "epoch": 65.62089552238805, "grad_norm": 20.746320724487305, "learning_rate": 9.30952380952381e-06, "loss": 42.3573, "step": 2756 }, { "epoch": 65.64477611940299, "grad_norm": 28.684810638427734, "learning_rate": 9.30654761904762e-06, "loss": 43.798, "step": 2757 }, { "epoch": 65.66865671641791, "grad_norm": 17.441326141357422, "learning_rate": 9.30357142857143e-06, "loss": 44.0526, "step": 2758 }, { "epoch": 65.69253731343284, "grad_norm": 27.091472625732422, "learning_rate": 9.300595238095239e-06, "loss": 43.5748, "step": 2759 }, { "epoch": 65.71641791044776, "grad_norm": 23.270544052124023, "learning_rate": 9.297619047619048e-06, "loss": 44.0027, "step": 2760 }, { "epoch": 65.74029850746268, "grad_norm": 28.322011947631836, "learning_rate": 9.294642857142859e-06, "loss": 44.4845, "step": 2761 }, { "epoch": 65.7641791044776, "grad_norm": 22.097503662109375, "learning_rate": 9.291666666666668e-06, "loss": 42.6655, "step": 2762 }, { "epoch": 65.78805970149254, "grad_norm": 28.492340087890625, "learning_rate": 9.288690476190477e-06, "loss": 44.669, "step": 2763 }, { "epoch": 65.81194029850747, "grad_norm": 18.208921432495117, "learning_rate": 9.285714285714288e-06, "loss": 44.1797, "step": 2764 }, { "epoch": 65.83582089552239, "grad_norm": 32.15492248535156, "learning_rate": 9.282738095238097e-06, "loss": 43.1981, "step": 2765 }, { "epoch": 65.85970149253731, "grad_norm": 22.335176467895508, "learning_rate": 9.279761904761906e-06, "loss": 43.0925, "step": 2766 }, { "epoch": 65.88358208955223, "grad_norm": 26.412460327148438, "learning_rate": 9.276785714285715e-06, "loss": 43.3175, "step": 2767 }, { "epoch": 65.90746268656716, "grad_norm": 21.380569458007812, "learning_rate": 9.273809523809525e-06, "loss": 45.7111, "step": 2768 }, { "epoch": 65.9313432835821, "grad_norm": 26.568763732910156, "learning_rate": 9.270833333333334e-06, "loss": 44.4841, "step": 2769 }, { "epoch": 65.95522388059702, "grad_norm": 22.947973251342773, "learning_rate": 9.267857142857144e-06, "loss": 44.0597, "step": 2770 }, { "epoch": 65.97910447761194, "grad_norm": 28.732847213745117, "learning_rate": 9.264880952380954e-06, "loss": 43.9232, "step": 2771 }, { "epoch": 66.0, "grad_norm": 19.51029396057129, "learning_rate": 9.261904761904763e-06, "loss": 38.3696, "step": 2772 }, { "epoch": 66.02388059701492, "grad_norm": 26.772396087646484, "learning_rate": 9.258928571428572e-06, "loss": 44.29, "step": 2773 }, { "epoch": 66.04776119402985, "grad_norm": 28.08762550354004, "learning_rate": 9.255952380952381e-06, "loss": 43.5123, "step": 2774 }, { "epoch": 66.07164179104478, "grad_norm": 23.839458465576172, "learning_rate": 9.252976190476192e-06, "loss": 42.9248, "step": 2775 }, { "epoch": 66.0955223880597, "grad_norm": 34.98361587524414, "learning_rate": 9.250000000000001e-06, "loss": 44.5183, "step": 2776 }, { "epoch": 66.11940298507463, "grad_norm": 18.406028747558594, "learning_rate": 9.24702380952381e-06, "loss": 43.6267, "step": 2777 }, { "epoch": 66.14328358208955, "grad_norm": 24.17736053466797, "learning_rate": 9.244047619047621e-06, "loss": 43.9814, "step": 2778 }, { "epoch": 66.16716417910447, "grad_norm": 25.904033660888672, "learning_rate": 9.24107142857143e-06, "loss": 44.2089, "step": 2779 }, { "epoch": 66.1910447761194, "grad_norm": 18.518312454223633, "learning_rate": 9.238095238095239e-06, "loss": 43.8829, "step": 2780 }, { "epoch": 66.21492537313434, "grad_norm": 13.93060302734375, "learning_rate": 9.235119047619048e-06, "loss": 43.0088, "step": 2781 }, { "epoch": 66.23880597014926, "grad_norm": 18.91266632080078, "learning_rate": 9.232142857142859e-06, "loss": 43.9835, "step": 2782 }, { "epoch": 66.26268656716418, "grad_norm": 22.63692283630371, "learning_rate": 9.229166666666668e-06, "loss": 43.8378, "step": 2783 }, { "epoch": 66.2865671641791, "grad_norm": 19.935054779052734, "learning_rate": 9.226190476190477e-06, "loss": 43.5139, "step": 2784 }, { "epoch": 66.31044776119403, "grad_norm": 20.09627342224121, "learning_rate": 9.223214285714288e-06, "loss": 42.9882, "step": 2785 }, { "epoch": 66.33432835820895, "grad_norm": 16.47371482849121, "learning_rate": 9.220238095238097e-06, "loss": 44.0665, "step": 2786 }, { "epoch": 66.35820895522389, "grad_norm": 25.363866806030273, "learning_rate": 9.217261904761904e-06, "loss": 44.696, "step": 2787 }, { "epoch": 66.38208955223881, "grad_norm": 19.95291519165039, "learning_rate": 9.214285714285715e-06, "loss": 44.1116, "step": 2788 }, { "epoch": 66.40597014925373, "grad_norm": NaN, "learning_rate": 9.211309523809524e-06, "loss": 76.4785, "step": 2789 }, { "epoch": 66.42985074626866, "grad_norm": 19.490074157714844, "learning_rate": 9.211309523809524e-06, "loss": 44.0432, "step": 2790 }, { "epoch": 66.45373134328358, "grad_norm": 17.4990234375, "learning_rate": 9.208333333333333e-06, "loss": 43.2972, "step": 2791 }, { "epoch": 66.4776119402985, "grad_norm": 18.9461727142334, "learning_rate": 9.205357142857144e-06, "loss": 43.6698, "step": 2792 }, { "epoch": 66.50149253731344, "grad_norm": 27.035369873046875, "learning_rate": 9.202380952380953e-06, "loss": 43.0748, "step": 2793 }, { "epoch": 66.52537313432836, "grad_norm": 18.747451782226562, "learning_rate": 9.199404761904762e-06, "loss": 43.4684, "step": 2794 }, { "epoch": 66.54925373134328, "grad_norm": 22.31947135925293, "learning_rate": 9.196428571428571e-06, "loss": 43.224, "step": 2795 }, { "epoch": 66.57313432835821, "grad_norm": 20.444355010986328, "learning_rate": 9.193452380952382e-06, "loss": 44.7154, "step": 2796 }, { "epoch": 66.59701492537313, "grad_norm": 20.574586868286133, "learning_rate": 9.19047619047619e-06, "loss": 42.8251, "step": 2797 }, { "epoch": 66.62089552238805, "grad_norm": 21.91870880126953, "learning_rate": 9.1875e-06, "loss": 42.1616, "step": 2798 }, { "epoch": 66.64477611940299, "grad_norm": 20.03777503967285, "learning_rate": 9.18452380952381e-06, "loss": 43.9713, "step": 2799 }, { "epoch": 66.66865671641791, "grad_norm": 25.128442764282227, "learning_rate": 9.18154761904762e-06, "loss": 43.1631, "step": 2800 }, { "epoch": 66.69253731343284, "grad_norm": 21.742931365966797, "learning_rate": 9.178571428571429e-06, "loss": 43.8442, "step": 2801 }, { "epoch": 66.71641791044776, "grad_norm": 25.366992950439453, "learning_rate": 9.17559523809524e-06, "loss": 42.6068, "step": 2802 }, { "epoch": 66.74029850746268, "grad_norm": 22.109886169433594, "learning_rate": 9.172619047619048e-06, "loss": 43.0879, "step": 2803 }, { "epoch": 66.7641791044776, "grad_norm": 26.36429786682129, "learning_rate": 9.169642857142857e-06, "loss": 43.9465, "step": 2804 }, { "epoch": 66.78805970149254, "grad_norm": 20.30027198791504, "learning_rate": 9.166666666666666e-06, "loss": 44.1096, "step": 2805 }, { "epoch": 66.81194029850747, "grad_norm": 25.123811721801758, "learning_rate": 9.163690476190477e-06, "loss": 44.2981, "step": 2806 }, { "epoch": 66.83582089552239, "grad_norm": 23.127016067504883, "learning_rate": 9.160714285714286e-06, "loss": 42.5751, "step": 2807 }, { "epoch": 66.85970149253731, "grad_norm": NaN, "learning_rate": 9.157738095238095e-06, "loss": 66.1901, "step": 2808 }, { "epoch": 66.88358208955223, "grad_norm": 26.10099220275879, "learning_rate": 9.157738095238095e-06, "loss": 44.763, "step": 2809 }, { "epoch": 66.90746268656716, "grad_norm": 23.104337692260742, "learning_rate": 9.154761904761906e-06, "loss": 43.0964, "step": 2810 }, { "epoch": 66.9313432835821, "grad_norm": 25.94508934020996, "learning_rate": 9.151785714285715e-06, "loss": 44.2004, "step": 2811 }, { "epoch": 66.95522388059702, "grad_norm": 19.599328994750977, "learning_rate": 9.148809523809524e-06, "loss": 43.9027, "step": 2812 }, { "epoch": 66.97910447761194, "grad_norm": NaN, "learning_rate": 9.145833333333333e-06, "loss": 53.695, "step": 2813 }, { "epoch": 67.0, "grad_norm": 26.630434036254883, "learning_rate": 9.145833333333333e-06, "loss": 39.2172, "step": 2814 }, { "epoch": 67.02388059701492, "grad_norm": 20.954557418823242, "learning_rate": 9.142857142857144e-06, "loss": 45.022, "step": 2815 }, { "epoch": 67.04776119402985, "grad_norm": 34.23554611206055, "learning_rate": 9.139880952380953e-06, "loss": 44.5962, "step": 2816 }, { "epoch": 67.07164179104478, "grad_norm": 23.212488174438477, "learning_rate": 9.136904761904762e-06, "loss": 43.3898, "step": 2817 }, { "epoch": 67.0955223880597, "grad_norm": 28.811594009399414, "learning_rate": 9.133928571428573e-06, "loss": 43.0525, "step": 2818 }, { "epoch": 67.11940298507463, "grad_norm": 25.612319946289062, "learning_rate": 9.130952380952382e-06, "loss": 45.0229, "step": 2819 }, { "epoch": 67.14328358208955, "grad_norm": 19.928409576416016, "learning_rate": 9.12797619047619e-06, "loss": 42.2313, "step": 2820 }, { "epoch": 67.16716417910447, "grad_norm": 21.425382614135742, "learning_rate": 9.125e-06, "loss": 43.8085, "step": 2821 }, { "epoch": 67.1910447761194, "grad_norm": 24.726892471313477, "learning_rate": 9.12202380952381e-06, "loss": 42.5952, "step": 2822 }, { "epoch": 67.21492537313434, "grad_norm": 21.010210037231445, "learning_rate": 9.11904761904762e-06, "loss": 44.5508, "step": 2823 }, { "epoch": 67.23880597014926, "grad_norm": 17.506437301635742, "learning_rate": 9.116071428571429e-06, "loss": 42.7668, "step": 2824 }, { "epoch": 67.26268656716418, "grad_norm": 20.494918823242188, "learning_rate": 9.11309523809524e-06, "loss": 42.8061, "step": 2825 }, { "epoch": 67.2865671641791, "grad_norm": 20.985994338989258, "learning_rate": 9.110119047619049e-06, "loss": 44.8666, "step": 2826 }, { "epoch": 67.31044776119403, "grad_norm": 22.91364097595215, "learning_rate": 9.107142857142858e-06, "loss": 44.1208, "step": 2827 }, { "epoch": 67.33432835820895, "grad_norm": 19.81299591064453, "learning_rate": 9.104166666666667e-06, "loss": 43.939, "step": 2828 }, { "epoch": 67.35820895522389, "grad_norm": 23.234989166259766, "learning_rate": 9.101190476190477e-06, "loss": 42.0411, "step": 2829 }, { "epoch": 67.38208955223881, "grad_norm": 22.17540168762207, "learning_rate": 9.098214285714286e-06, "loss": 43.5693, "step": 2830 }, { "epoch": 67.40597014925373, "grad_norm": 21.292728424072266, "learning_rate": 9.095238095238095e-06, "loss": 44.0742, "step": 2831 }, { "epoch": 67.42985074626866, "grad_norm": 28.952625274658203, "learning_rate": 9.092261904761906e-06, "loss": 42.8393, "step": 2832 }, { "epoch": 67.45373134328358, "grad_norm": 19.387126922607422, "learning_rate": 9.089285714285715e-06, "loss": 41.7117, "step": 2833 }, { "epoch": 67.4776119402985, "grad_norm": 23.430946350097656, "learning_rate": 9.086309523809524e-06, "loss": 42.7006, "step": 2834 }, { "epoch": 67.50149253731344, "grad_norm": 28.108659744262695, "learning_rate": 9.083333333333333e-06, "loss": 45.417, "step": 2835 }, { "epoch": 67.52537313432836, "grad_norm": 22.115737915039062, "learning_rate": 9.080357142857144e-06, "loss": 44.4444, "step": 2836 }, { "epoch": 67.54925373134328, "grad_norm": 29.82137107849121, "learning_rate": 9.077380952380953e-06, "loss": 43.4888, "step": 2837 }, { "epoch": 67.57313432835821, "grad_norm": 25.010299682617188, "learning_rate": 9.074404761904762e-06, "loss": 43.9609, "step": 2838 }, { "epoch": 67.59701492537313, "grad_norm": 21.027952194213867, "learning_rate": 9.071428571428573e-06, "loss": 44.2489, "step": 2839 }, { "epoch": 67.62089552238805, "grad_norm": 27.009239196777344, "learning_rate": 9.068452380952382e-06, "loss": 43.6564, "step": 2840 }, { "epoch": 67.64477611940299, "grad_norm": 19.743545532226562, "learning_rate": 9.065476190476191e-06, "loss": 43.9997, "step": 2841 }, { "epoch": 67.66865671641791, "grad_norm": 28.90030288696289, "learning_rate": 9.0625e-06, "loss": 42.6926, "step": 2842 }, { "epoch": 67.69253731343284, "grad_norm": 25.418079376220703, "learning_rate": 9.05952380952381e-06, "loss": 43.2036, "step": 2843 }, { "epoch": 67.71641791044776, "grad_norm": 18.400348663330078, "learning_rate": 9.05654761904762e-06, "loss": 44.4565, "step": 2844 }, { "epoch": 67.74029850746268, "grad_norm": 26.924072265625, "learning_rate": 9.053571428571429e-06, "loss": 44.4893, "step": 2845 }, { "epoch": 67.7641791044776, "grad_norm": 25.352108001708984, "learning_rate": 9.05059523809524e-06, "loss": 43.0946, "step": 2846 }, { "epoch": 67.78805970149254, "grad_norm": 19.23507308959961, "learning_rate": 9.047619047619049e-06, "loss": 43.0507, "step": 2847 }, { "epoch": 67.81194029850747, "grad_norm": 28.143768310546875, "learning_rate": 9.044642857142858e-06, "loss": 43.2408, "step": 2848 }, { "epoch": 67.83582089552239, "grad_norm": 20.298084259033203, "learning_rate": 9.041666666666667e-06, "loss": 44.288, "step": 2849 }, { "epoch": 67.85970149253731, "grad_norm": 21.65508460998535, "learning_rate": 9.038690476190478e-06, "loss": 43.4648, "step": 2850 }, { "epoch": 67.88358208955223, "grad_norm": 23.92845916748047, "learning_rate": 9.035714285714287e-06, "loss": 42.1811, "step": 2851 }, { "epoch": 67.90746268656716, "grad_norm": 20.545076370239258, "learning_rate": 9.032738095238096e-06, "loss": 42.9022, "step": 2852 }, { "epoch": 67.9313432835821, "grad_norm": 32.36678695678711, "learning_rate": 9.029761904761906e-06, "loss": 43.4491, "step": 2853 }, { "epoch": 67.95522388059702, "grad_norm": 23.14188003540039, "learning_rate": 9.026785714285715e-06, "loss": 44.4911, "step": 2854 }, { "epoch": 67.97910447761194, "grad_norm": 31.488239288330078, "learning_rate": 9.023809523809524e-06, "loss": 44.5784, "step": 2855 }, { "epoch": 68.0, "grad_norm": 24.96849250793457, "learning_rate": 9.020833333333334e-06, "loss": 38.8154, "step": 2856 }, { "epoch": 68.02388059701492, "grad_norm": 26.00245475769043, "learning_rate": 9.017857142857144e-06, "loss": 43.6339, "step": 2857 }, { "epoch": 68.04776119402985, "grad_norm": 33.977596282958984, "learning_rate": 9.014880952380953e-06, "loss": 43.7634, "step": 2858 }, { "epoch": 68.07164179104478, "grad_norm": 21.762340545654297, "learning_rate": 9.011904761904762e-06, "loss": 43.8865, "step": 2859 }, { "epoch": 68.0955223880597, "grad_norm": 38.268455505371094, "learning_rate": 9.008928571428573e-06, "loss": 43.8947, "step": 2860 }, { "epoch": 68.11940298507463, "grad_norm": 26.789215087890625, "learning_rate": 9.005952380952382e-06, "loss": 42.0072, "step": 2861 }, { "epoch": 68.14328358208955, "grad_norm": 44.15632629394531, "learning_rate": 9.002976190476191e-06, "loss": 43.1045, "step": 2862 }, { "epoch": 68.16716417910447, "grad_norm": 36.71260070800781, "learning_rate": 9e-06, "loss": 43.7232, "step": 2863 }, { "epoch": 68.1910447761194, "grad_norm": 38.94734191894531, "learning_rate": 8.997023809523811e-06, "loss": 43.4934, "step": 2864 }, { "epoch": 68.21492537313434, "grad_norm": 38.295501708984375, "learning_rate": 8.99404761904762e-06, "loss": 43.3372, "step": 2865 }, { "epoch": 68.23880597014926, "grad_norm": 30.496740341186523, "learning_rate": 8.991071428571429e-06, "loss": 43.4905, "step": 2866 }, { "epoch": 68.26268656716418, "grad_norm": 30.001113891601562, "learning_rate": 8.98809523809524e-06, "loss": 44.0486, "step": 2867 }, { "epoch": 68.2865671641791, "grad_norm": 39.6522216796875, "learning_rate": 8.985119047619049e-06, "loss": 43.4828, "step": 2868 }, { "epoch": 68.31044776119403, "grad_norm": 31.28143310546875, "learning_rate": 8.982142857142858e-06, "loss": 43.7613, "step": 2869 }, { "epoch": 68.33432835820895, "grad_norm": 31.950016021728516, "learning_rate": 8.979166666666667e-06, "loss": 43.4209, "step": 2870 }, { "epoch": 68.35820895522389, "grad_norm": 30.61543083190918, "learning_rate": 8.976190476190478e-06, "loss": 43.2437, "step": 2871 }, { "epoch": 68.38208955223881, "grad_norm": 39.3588752746582, "learning_rate": 8.973214285714287e-06, "loss": 43.1893, "step": 2872 }, { "epoch": 68.40597014925373, "grad_norm": 29.70042610168457, "learning_rate": 8.970238095238096e-06, "loss": 42.1193, "step": 2873 }, { "epoch": 68.42985074626866, "grad_norm": 40.6136474609375, "learning_rate": 8.967261904761907e-06, "loss": 41.7532, "step": 2874 }, { "epoch": 68.45373134328358, "grad_norm": 36.44509506225586, "learning_rate": 8.964285714285716e-06, "loss": 44.5191, "step": 2875 }, { "epoch": 68.4776119402985, "grad_norm": NaN, "learning_rate": 8.961309523809525e-06, "loss": 70.4286, "step": 2876 }, { "epoch": 68.50149253731344, "grad_norm": 27.74057960510254, "learning_rate": 8.961309523809525e-06, "loss": 43.254, "step": 2877 }, { "epoch": 68.52537313432836, "grad_norm": 29.346860885620117, "learning_rate": 8.958333333333334e-06, "loss": 43.5863, "step": 2878 }, { "epoch": 68.54925373134328, "grad_norm": 36.642398834228516, "learning_rate": 8.955357142857144e-06, "loss": 43.3733, "step": 2879 }, { "epoch": 68.57313432835821, "grad_norm": 33.670162200927734, "learning_rate": 8.952380952380953e-06, "loss": 43.7232, "step": 2880 }, { "epoch": 68.59701492537313, "grad_norm": 33.738712310791016, "learning_rate": 8.949404761904763e-06, "loss": 42.704, "step": 2881 }, { "epoch": 68.62089552238805, "grad_norm": 31.452713012695312, "learning_rate": 8.946428571428573e-06, "loss": 43.8197, "step": 2882 }, { "epoch": 68.64477611940299, "grad_norm": 35.86618423461914, "learning_rate": 8.943452380952382e-06, "loss": 44.9871, "step": 2883 }, { "epoch": 68.66865671641791, "grad_norm": 30.94584083557129, "learning_rate": 8.940476190476191e-06, "loss": 43.8781, "step": 2884 }, { "epoch": 68.69253731343284, "grad_norm": 30.81380271911621, "learning_rate": 8.9375e-06, "loss": 44.2209, "step": 2885 }, { "epoch": 68.71641791044776, "grad_norm": 28.51966094970703, "learning_rate": 8.934523809523811e-06, "loss": 44.16, "step": 2886 }, { "epoch": 68.74029850746268, "grad_norm": NaN, "learning_rate": 8.93154761904762e-06, "loss": 77.0854, "step": 2887 }, { "epoch": 68.7641791044776, "grad_norm": 34.56461715698242, "learning_rate": 8.93154761904762e-06, "loss": 42.7519, "step": 2888 }, { "epoch": 68.78805970149254, "grad_norm": 27.781518936157227, "learning_rate": 8.92857142857143e-06, "loss": 44.0645, "step": 2889 }, { "epoch": 68.81194029850747, "grad_norm": 33.2479133605957, "learning_rate": 8.92559523809524e-06, "loss": 44.109, "step": 2890 }, { "epoch": 68.83582089552239, "grad_norm": 30.329626083374023, "learning_rate": 8.922619047619049e-06, "loss": 42.8678, "step": 2891 }, { "epoch": 68.85970149253731, "grad_norm": 32.120269775390625, "learning_rate": 8.919642857142858e-06, "loss": 44.2325, "step": 2892 }, { "epoch": 68.88358208955223, "grad_norm": 27.283164978027344, "learning_rate": 8.916666666666667e-06, "loss": 43.7788, "step": 2893 }, { "epoch": 68.90746268656716, "grad_norm": 31.86570930480957, "learning_rate": 8.913690476190478e-06, "loss": 44.3469, "step": 2894 }, { "epoch": 68.9313432835821, "grad_norm": 22.55097007751465, "learning_rate": 8.910714285714287e-06, "loss": 43.85, "step": 2895 }, { "epoch": 68.95522388059702, "grad_norm": 34.648773193359375, "learning_rate": 8.907738095238096e-06, "loss": 44.526, "step": 2896 }, { "epoch": 68.97910447761194, "grad_norm": 26.3565731048584, "learning_rate": 8.904761904761905e-06, "loss": 44.2, "step": 2897 }, { "epoch": 69.0, "grad_norm": 34.48598098754883, "learning_rate": 8.901785714285714e-06, "loss": 37.5511, "step": 2898 }, { "epoch": 69.02388059701492, "grad_norm": 36.6775016784668, "learning_rate": 8.898809523809525e-06, "loss": 42.6231, "step": 2899 }, { "epoch": 69.04776119402985, "grad_norm": 33.529296875, "learning_rate": 8.895833333333334e-06, "loss": 43.2504, "step": 2900 }, { "epoch": 69.07164179104478, "grad_norm": 31.762542724609375, "learning_rate": 8.892857142857143e-06, "loss": 43.752, "step": 2901 }, { "epoch": 69.0955223880597, "grad_norm": 28.147245407104492, "learning_rate": 8.889880952380952e-06, "loss": 43.8319, "step": 2902 }, { "epoch": 69.11940298507463, "grad_norm": 27.896669387817383, "learning_rate": 8.886904761904763e-06, "loss": 42.1947, "step": 2903 }, { "epoch": 69.14328358208955, "grad_norm": 28.625850677490234, "learning_rate": 8.883928571428572e-06, "loss": 43.3085, "step": 2904 }, { "epoch": 69.16716417910447, "grad_norm": 28.545974731445312, "learning_rate": 8.88095238095238e-06, "loss": 42.9532, "step": 2905 }, { "epoch": 69.1910447761194, "grad_norm": 34.869781494140625, "learning_rate": 8.877976190476192e-06, "loss": 44.2308, "step": 2906 }, { "epoch": 69.21492537313434, "grad_norm": 30.4566650390625, "learning_rate": 8.875e-06, "loss": 43.3219, "step": 2907 }, { "epoch": 69.23880597014926, "grad_norm": 29.15296745300293, "learning_rate": 8.87202380952381e-06, "loss": 44.0736, "step": 2908 }, { "epoch": 69.26268656716418, "grad_norm": 29.97230339050293, "learning_rate": 8.869047619047619e-06, "loss": 43.3511, "step": 2909 }, { "epoch": 69.2865671641791, "grad_norm": 30.087413787841797, "learning_rate": 8.86607142857143e-06, "loss": 43.8738, "step": 2910 }, { "epoch": 69.31044776119403, "grad_norm": 23.660356521606445, "learning_rate": 8.863095238095238e-06, "loss": 43.5448, "step": 2911 }, { "epoch": 69.33432835820895, "grad_norm": 28.7690372467041, "learning_rate": 8.860119047619048e-06, "loss": 42.9617, "step": 2912 }, { "epoch": 69.35820895522389, "grad_norm": 25.60896110534668, "learning_rate": 8.857142857142858e-06, "loss": 45.3785, "step": 2913 }, { "epoch": 69.38208955223881, "grad_norm": 31.27063751220703, "learning_rate": 8.854166666666667e-06, "loss": 43.376, "step": 2914 }, { "epoch": 69.40597014925373, "grad_norm": 18.817829132080078, "learning_rate": 8.851190476190476e-06, "loss": 44.0999, "step": 2915 }, { "epoch": 69.42985074626866, "grad_norm": 36.125919342041016, "learning_rate": 8.848214285714285e-06, "loss": 43.8083, "step": 2916 }, { "epoch": 69.45373134328358, "grad_norm": 25.558866500854492, "learning_rate": 8.845238095238096e-06, "loss": 45.1369, "step": 2917 }, { "epoch": 69.4776119402985, "grad_norm": 38.35983657836914, "learning_rate": 8.842261904761905e-06, "loss": 43.587, "step": 2918 }, { "epoch": 69.50149253731344, "grad_norm": 30.0064754486084, "learning_rate": 8.839285714285714e-06, "loss": 43.1645, "step": 2919 }, { "epoch": 69.52537313432836, "grad_norm": 31.177242279052734, "learning_rate": 8.836309523809525e-06, "loss": 42.2305, "step": 2920 }, { "epoch": 69.54925373134328, "grad_norm": 29.04176139831543, "learning_rate": 8.833333333333334e-06, "loss": 45.1403, "step": 2921 }, { "epoch": 69.57313432835821, "grad_norm": 35.95783233642578, "learning_rate": 8.830357142857143e-06, "loss": 43.4081, "step": 2922 }, { "epoch": 69.59701492537313, "grad_norm": 27.838382720947266, "learning_rate": 8.827380952380952e-06, "loss": 44.7195, "step": 2923 }, { "epoch": 69.62089552238805, "grad_norm": 30.860624313354492, "learning_rate": 8.824404761904763e-06, "loss": 42.7175, "step": 2924 }, { "epoch": 69.64477611940299, "grad_norm": 21.701316833496094, "learning_rate": 8.821428571428572e-06, "loss": 43.0401, "step": 2925 }, { "epoch": 69.66865671641791, "grad_norm": 27.270732879638672, "learning_rate": 8.818452380952381e-06, "loss": 43.686, "step": 2926 }, { "epoch": 69.69253731343284, "grad_norm": 25.814538955688477, "learning_rate": 8.815476190476192e-06, "loss": 44.3424, "step": 2927 }, { "epoch": 69.71641791044776, "grad_norm": 26.155197143554688, "learning_rate": 8.8125e-06, "loss": 43.6455, "step": 2928 }, { "epoch": 69.74029850746268, "grad_norm": 20.438846588134766, "learning_rate": 8.80952380952381e-06, "loss": 44.3784, "step": 2929 }, { "epoch": 69.7641791044776, "grad_norm": 26.45317268371582, "learning_rate": 8.806547619047619e-06, "loss": 42.6501, "step": 2930 }, { "epoch": 69.78805970149254, "grad_norm": 22.06026840209961, "learning_rate": 8.80357142857143e-06, "loss": 42.4144, "step": 2931 }, { "epoch": 69.81194029850747, "grad_norm": 24.45191764831543, "learning_rate": 8.800595238095239e-06, "loss": 43.7415, "step": 2932 }, { "epoch": 69.83582089552239, "grad_norm": 26.77782440185547, "learning_rate": 8.797619047619048e-06, "loss": 43.2565, "step": 2933 }, { "epoch": 69.85970149253731, "grad_norm": 22.350242614746094, "learning_rate": 8.794642857142858e-06, "loss": 43.5869, "step": 2934 }, { "epoch": 69.88358208955223, "grad_norm": 23.063016891479492, "learning_rate": 8.791666666666667e-06, "loss": 43.3821, "step": 2935 }, { "epoch": 69.90746268656716, "grad_norm": 18.864139556884766, "learning_rate": 8.788690476190477e-06, "loss": 42.8421, "step": 2936 }, { "epoch": 69.9313432835821, "grad_norm": 19.763843536376953, "learning_rate": 8.785714285714286e-06, "loss": 43.3783, "step": 2937 }, { "epoch": 69.95522388059702, "grad_norm": 19.347801208496094, "learning_rate": 8.782738095238096e-06, "loss": 42.8249, "step": 2938 }, { "epoch": 69.97910447761194, "grad_norm": 16.3013858795166, "learning_rate": 8.779761904761905e-06, "loss": 42.9306, "step": 2939 }, { "epoch": 70.0, "grad_norm": 15.056166648864746, "learning_rate": 8.776785714285714e-06, "loss": 38.2541, "step": 2940 }, { "epoch": 70.02388059701492, "grad_norm": 14.691337585449219, "learning_rate": 8.773809523809525e-06, "loss": 42.8378, "step": 2941 }, { "epoch": 70.04776119402985, "grad_norm": 24.51978874206543, "learning_rate": 8.770833333333334e-06, "loss": 43.2568, "step": 2942 }, { "epoch": 70.07164179104478, "grad_norm": 16.552833557128906, "learning_rate": 8.767857142857143e-06, "loss": 44.9251, "step": 2943 }, { "epoch": 70.0955223880597, "grad_norm": 26.28189468383789, "learning_rate": 8.764880952380952e-06, "loss": 43.8141, "step": 2944 }, { "epoch": 70.11940298507463, "grad_norm": 21.29142951965332, "learning_rate": 8.761904761904763e-06, "loss": 43.8724, "step": 2945 }, { "epoch": 70.14328358208955, "grad_norm": 23.159542083740234, "learning_rate": 8.758928571428572e-06, "loss": 44.4016, "step": 2946 }, { "epoch": 70.16716417910447, "grad_norm": 21.08184051513672, "learning_rate": 8.755952380952381e-06, "loss": 44.1115, "step": 2947 }, { "epoch": 70.1910447761194, "grad_norm": 18.838504791259766, "learning_rate": 8.752976190476192e-06, "loss": 43.2276, "step": 2948 }, { "epoch": 70.21492537313434, "grad_norm": 21.613079071044922, "learning_rate": 8.750000000000001e-06, "loss": 42.679, "step": 2949 }, { "epoch": 70.23880597014926, "grad_norm": 21.29805564880371, "learning_rate": 8.74702380952381e-06, "loss": 43.1822, "step": 2950 }, { "epoch": 70.26268656716418, "grad_norm": NaN, "learning_rate": 8.744047619047619e-06, "loss": 43.3802, "step": 2951 }, { "epoch": 70.2865671641791, "grad_norm": 22.393659591674805, "learning_rate": 8.744047619047619e-06, "loss": 43.3569, "step": 2952 }, { "epoch": 70.31044776119403, "grad_norm": 17.94029998779297, "learning_rate": 8.74107142857143e-06, "loss": 42.4398, "step": 2953 }, { "epoch": 70.33432835820895, "grad_norm": 17.575550079345703, "learning_rate": 8.738095238095239e-06, "loss": 42.3488, "step": 2954 }, { "epoch": 70.35820895522389, "grad_norm": 18.268203735351562, "learning_rate": 8.735119047619048e-06, "loss": 42.6199, "step": 2955 }, { "epoch": 70.38208955223881, "grad_norm": 25.415603637695312, "learning_rate": 8.732142857142859e-06, "loss": 43.803, "step": 2956 }, { "epoch": 70.40597014925373, "grad_norm": 23.37176513671875, "learning_rate": 8.729166666666668e-06, "loss": 44.5072, "step": 2957 }, { "epoch": 70.42985074626866, "grad_norm": 24.91670036315918, "learning_rate": 8.726190476190477e-06, "loss": 44.1411, "step": 2958 }, { "epoch": 70.45373134328358, "grad_norm": 20.50780487060547, "learning_rate": 8.723214285714286e-06, "loss": 45.4114, "step": 2959 }, { "epoch": 70.4776119402985, "grad_norm": 21.885364532470703, "learning_rate": 8.720238095238096e-06, "loss": 43.1786, "step": 2960 }, { "epoch": 70.50149253731344, "grad_norm": 18.620540618896484, "learning_rate": 8.717261904761906e-06, "loss": 42.5272, "step": 2961 }, { "epoch": 70.52537313432836, "grad_norm": 27.28016471862793, "learning_rate": 8.714285714285715e-06, "loss": 44.0531, "step": 2962 }, { "epoch": 70.54925373134328, "grad_norm": 22.124799728393555, "learning_rate": 8.711309523809525e-06, "loss": 43.445, "step": 2963 }, { "epoch": 70.57313432835821, "grad_norm": 25.905492782592773, "learning_rate": 8.708333333333334e-06, "loss": 43.619, "step": 2964 }, { "epoch": 70.59701492537313, "grad_norm": 23.890172958374023, "learning_rate": 8.705357142857143e-06, "loss": 43.1365, "step": 2965 }, { "epoch": 70.62089552238805, "grad_norm": 20.158838272094727, "learning_rate": 8.702380952380952e-06, "loss": 41.9394, "step": 2966 }, { "epoch": 70.64477611940299, "grad_norm": 24.878849029541016, "learning_rate": 8.699404761904763e-06, "loss": 43.7568, "step": 2967 }, { "epoch": 70.66865671641791, "grad_norm": 20.08368492126465, "learning_rate": 8.696428571428572e-06, "loss": 43.7444, "step": 2968 }, { "epoch": 70.69253731343284, "grad_norm": 24.59374237060547, "learning_rate": 8.693452380952381e-06, "loss": 43.6659, "step": 2969 }, { "epoch": 70.71641791044776, "grad_norm": 21.96346664428711, "learning_rate": 8.690476190476192e-06, "loss": 42.8718, "step": 2970 }, { "epoch": 70.74029850746268, "grad_norm": 20.61510467529297, "learning_rate": 8.687500000000001e-06, "loss": 43.8264, "step": 2971 }, { "epoch": 70.7641791044776, "grad_norm": 25.367786407470703, "learning_rate": 8.68452380952381e-06, "loss": 42.2802, "step": 2972 }, { "epoch": 70.78805970149254, "grad_norm": 21.911298751831055, "learning_rate": 8.68154761904762e-06, "loss": 44.4695, "step": 2973 }, { "epoch": 70.81194029850747, "grad_norm": 26.7462100982666, "learning_rate": 8.67857142857143e-06, "loss": 43.4564, "step": 2974 }, { "epoch": 70.83582089552239, "grad_norm": 23.370485305786133, "learning_rate": 8.675595238095239e-06, "loss": 45.0502, "step": 2975 }, { "epoch": 70.85970149253731, "grad_norm": 26.052675247192383, "learning_rate": 8.672619047619048e-06, "loss": 42.6782, "step": 2976 }, { "epoch": 70.88358208955223, "grad_norm": 21.637617111206055, "learning_rate": 8.669642857142859e-06, "loss": 44.426, "step": 2977 }, { "epoch": 70.90746268656716, "grad_norm": 26.575313568115234, "learning_rate": 8.666666666666668e-06, "loss": 43.6968, "step": 2978 }, { "epoch": 70.9313432835821, "grad_norm": 23.814599990844727, "learning_rate": 8.663690476190477e-06, "loss": 43.3269, "step": 2979 }, { "epoch": 70.95522388059702, "grad_norm": 21.367717742919922, "learning_rate": 8.660714285714286e-06, "loss": 43.1399, "step": 2980 }, { "epoch": 70.97910447761194, "grad_norm": 19.98285484313965, "learning_rate": 8.657738095238097e-06, "loss": 42.9342, "step": 2981 }, { "epoch": 71.0, "grad_norm": 22.52842140197754, "learning_rate": 8.654761904761906e-06, "loss": 36.6415, "step": 2982 }, { "epoch": 71.02388059701492, "grad_norm": 22.04327392578125, "learning_rate": 8.651785714285715e-06, "loss": 43.0825, "step": 2983 }, { "epoch": 71.04776119402985, "grad_norm": 21.24346351623535, "learning_rate": 8.648809523809526e-06, "loss": 43.511, "step": 2984 }, { "epoch": 71.07164179104478, "grad_norm": 23.4123592376709, "learning_rate": 8.645833333333335e-06, "loss": 43.1464, "step": 2985 }, { "epoch": 71.0955223880597, "grad_norm": 23.918460845947266, "learning_rate": 8.642857142857144e-06, "loss": 44.1223, "step": 2986 }, { "epoch": 71.11940298507463, "grad_norm": 16.164955139160156, "learning_rate": 8.639880952380953e-06, "loss": 43.2759, "step": 2987 }, { "epoch": 71.14328358208955, "grad_norm": 22.15060043334961, "learning_rate": 8.636904761904763e-06, "loss": 43.1227, "step": 2988 }, { "epoch": 71.16716417910447, "grad_norm": 19.598203659057617, "learning_rate": 8.633928571428572e-06, "loss": 41.9802, "step": 2989 }, { "epoch": 71.1910447761194, "grad_norm": 16.25682830810547, "learning_rate": 8.630952380952381e-06, "loss": 42.1285, "step": 2990 }, { "epoch": 71.21492537313434, "grad_norm": 20.54530143737793, "learning_rate": 8.627976190476192e-06, "loss": 43.4601, "step": 2991 }, { "epoch": 71.23880597014926, "grad_norm": 25.911041259765625, "learning_rate": 8.625000000000001e-06, "loss": 42.6006, "step": 2992 }, { "epoch": 71.26268656716418, "grad_norm": 16.15741539001465, "learning_rate": 8.62202380952381e-06, "loss": 42.6621, "step": 2993 }, { "epoch": 71.2865671641791, "grad_norm": 30.334243774414062, "learning_rate": 8.61904761904762e-06, "loss": 43.4206, "step": 2994 }, { "epoch": 71.31044776119403, "grad_norm": 26.023889541625977, "learning_rate": 8.61607142857143e-06, "loss": 43.5777, "step": 2995 }, { "epoch": 71.33432835820895, "grad_norm": 21.3012638092041, "learning_rate": 8.61309523809524e-06, "loss": 42.4823, "step": 2996 }, { "epoch": 71.35820895522389, "grad_norm": 25.109596252441406, "learning_rate": 8.610119047619048e-06, "loss": 44.2666, "step": 2997 }, { "epoch": 71.38208955223881, "grad_norm": 22.26563835144043, "learning_rate": 8.607142857142859e-06, "loss": 40.9261, "step": 2998 }, { "epoch": 71.40597014925373, "grad_norm": 30.94297218322754, "learning_rate": 8.604166666666668e-06, "loss": 43.5651, "step": 2999 }, { "epoch": 71.42985074626866, "grad_norm": 24.670034408569336, "learning_rate": 8.601190476190477e-06, "loss": 43.6695, "step": 3000 }, { "epoch": 71.45373134328358, "grad_norm": 29.290430068969727, "learning_rate": 8.598214285714288e-06, "loss": 43.6725, "step": 3001 }, { "epoch": 71.4776119402985, "grad_norm": 23.058176040649414, "learning_rate": 8.595238095238097e-06, "loss": 43.695, "step": 3002 }, { "epoch": 71.50149253731344, "grad_norm": 21.41179084777832, "learning_rate": 8.592261904761904e-06, "loss": 43.1715, "step": 3003 }, { "epoch": 71.52537313432836, "grad_norm": 22.226594924926758, "learning_rate": 8.589285714285715e-06, "loss": 43.1411, "step": 3004 }, { "epoch": 71.54925373134328, "grad_norm": 19.892719268798828, "learning_rate": 8.586309523809524e-06, "loss": 44.4913, "step": 3005 }, { "epoch": 71.57313432835821, "grad_norm": 18.263708114624023, "learning_rate": 8.583333333333333e-06, "loss": 43.2348, "step": 3006 }, { "epoch": 71.59701492537313, "grad_norm": 22.065439224243164, "learning_rate": 8.580357142857144e-06, "loss": 44.3296, "step": 3007 }, { "epoch": 71.62089552238805, "grad_norm": 19.95087432861328, "learning_rate": 8.577380952380953e-06, "loss": 44.827, "step": 3008 }, { "epoch": 71.64477611940299, "grad_norm": 19.371231079101562, "learning_rate": 8.574404761904762e-06, "loss": 43.9034, "step": 3009 }, { "epoch": 71.66865671641791, "grad_norm": 22.265600204467773, "learning_rate": 8.571428571428571e-06, "loss": 42.659, "step": 3010 }, { "epoch": 71.69253731343284, "grad_norm": 18.449695587158203, "learning_rate": 8.568452380952382e-06, "loss": 43.674, "step": 3011 }, { "epoch": 71.71641791044776, "grad_norm": 25.14525604248047, "learning_rate": 8.56547619047619e-06, "loss": 43.5625, "step": 3012 }, { "epoch": 71.74029850746268, "grad_norm": 23.78099822998047, "learning_rate": 8.5625e-06, "loss": 44.3134, "step": 3013 }, { "epoch": 71.7641791044776, "grad_norm": 18.84084129333496, "learning_rate": 8.55952380952381e-06, "loss": 44.8436, "step": 3014 }, { "epoch": 71.78805970149254, "grad_norm": 28.59735107421875, "learning_rate": 8.55654761904762e-06, "loss": 43.3521, "step": 3015 }, { "epoch": 71.81194029850747, "grad_norm": 22.86484718322754, "learning_rate": 8.553571428571429e-06, "loss": 44.0742, "step": 3016 }, { "epoch": 71.83582089552239, "grad_norm": 17.34327507019043, "learning_rate": 8.550595238095238e-06, "loss": 43.5721, "step": 3017 }, { "epoch": 71.85970149253731, "grad_norm": 32.2520637512207, "learning_rate": 8.547619047619048e-06, "loss": 42.3465, "step": 3018 }, { "epoch": 71.88358208955223, "grad_norm": 23.380569458007812, "learning_rate": 8.544642857142857e-06, "loss": 43.2287, "step": 3019 }, { "epoch": 71.90746268656716, "grad_norm": 31.07112693786621, "learning_rate": 8.541666666666666e-06, "loss": 44.3177, "step": 3020 }, { "epoch": 71.9313432835821, "grad_norm": 24.860567092895508, "learning_rate": 8.538690476190477e-06, "loss": 43.6361, "step": 3021 }, { "epoch": 71.95522388059702, "grad_norm": 22.43517303466797, "learning_rate": 8.535714285714286e-06, "loss": 43.5824, "step": 3022 }, { "epoch": 71.97910447761194, "grad_norm": 27.975297927856445, "learning_rate": 8.532738095238095e-06, "loss": 43.4829, "step": 3023 }, { "epoch": 72.0, "grad_norm": 17.978660583496094, "learning_rate": 8.529761904761904e-06, "loss": 38.115, "step": 3024 }, { "epoch": 72.02388059701492, "grad_norm": 31.69437599182129, "learning_rate": 8.526785714285715e-06, "loss": 43.5237, "step": 3025 }, { "epoch": 72.04776119402985, "grad_norm": 27.577686309814453, "learning_rate": 8.523809523809524e-06, "loss": 43.1406, "step": 3026 }, { "epoch": 72.07164179104478, "grad_norm": 28.320255279541016, "learning_rate": 8.520833333333333e-06, "loss": 44.5784, "step": 3027 }, { "epoch": 72.0955223880597, "grad_norm": 26.59323501586914, "learning_rate": 8.517857142857144e-06, "loss": 42.2067, "step": 3028 }, { "epoch": 72.11940298507463, "grad_norm": 21.94460105895996, "learning_rate": 8.514880952380953e-06, "loss": 43.4262, "step": 3029 }, { "epoch": 72.14328358208955, "grad_norm": 23.56421661376953, "learning_rate": 8.511904761904762e-06, "loss": 41.1196, "step": 3030 }, { "epoch": 72.16716417910447, "grad_norm": 19.21329689025879, "learning_rate": 8.508928571428571e-06, "loss": 42.5441, "step": 3031 }, { "epoch": 72.1910447761194, "grad_norm": 23.377782821655273, "learning_rate": 8.505952380952382e-06, "loss": 43.0296, "step": 3032 }, { "epoch": 72.21492537313434, "grad_norm": 24.402435302734375, "learning_rate": 8.502976190476191e-06, "loss": 44.2474, "step": 3033 }, { "epoch": 72.23880597014926, "grad_norm": 18.61969566345215, "learning_rate": 8.5e-06, "loss": 43.6984, "step": 3034 }, { "epoch": 72.26268656716418, "grad_norm": 30.627338409423828, "learning_rate": 8.49702380952381e-06, "loss": 42.5441, "step": 3035 }, { "epoch": 72.2865671641791, "grad_norm": 26.115427017211914, "learning_rate": 8.49404761904762e-06, "loss": 41.8235, "step": 3036 }, { "epoch": 72.31044776119403, "grad_norm": 24.971904754638672, "learning_rate": 8.491071428571429e-06, "loss": 43.9344, "step": 3037 }, { "epoch": 72.33432835820895, "grad_norm": 26.42667007446289, "learning_rate": 8.488095238095238e-06, "loss": 43.2757, "step": 3038 }, { "epoch": 72.35820895522389, "grad_norm": 23.19200897216797, "learning_rate": 8.485119047619049e-06, "loss": 42.9536, "step": 3039 }, { "epoch": 72.38208955223881, "grad_norm": 31.263626098632812, "learning_rate": 8.482142857142858e-06, "loss": 42.8037, "step": 3040 }, { "epoch": 72.40597014925373, "grad_norm": 25.049270629882812, "learning_rate": 8.479166666666667e-06, "loss": 42.8005, "step": 3041 }, { "epoch": 72.42985074626866, "grad_norm": 20.71118927001953, "learning_rate": 8.476190476190477e-06, "loss": 43.106, "step": 3042 }, { "epoch": 72.45373134328358, "grad_norm": 22.156679153442383, "learning_rate": 8.473214285714286e-06, "loss": 42.6742, "step": 3043 }, { "epoch": 72.4776119402985, "grad_norm": 22.091957092285156, "learning_rate": 8.470238095238095e-06, "loss": 43.6855, "step": 3044 }, { "epoch": 72.50149253731344, "grad_norm": 21.12959861755371, "learning_rate": 8.467261904761905e-06, "loss": 42.9416, "step": 3045 }, { "epoch": 72.52537313432836, "grad_norm": 20.53251075744629, "learning_rate": 8.464285714285715e-06, "loss": 44.3919, "step": 3046 }, { "epoch": 72.54925373134328, "grad_norm": 19.188758850097656, "learning_rate": 8.461309523809524e-06, "loss": 43.3152, "step": 3047 }, { "epoch": 72.57313432835821, "grad_norm": 26.149826049804688, "learning_rate": 8.458333333333333e-06, "loss": 44.7382, "step": 3048 }, { "epoch": 72.59701492537313, "grad_norm": 18.40545082092285, "learning_rate": 8.455357142857144e-06, "loss": 44.0886, "step": 3049 }, { "epoch": 72.62089552238805, "grad_norm": 21.535911560058594, "learning_rate": 8.452380952380953e-06, "loss": 43.2625, "step": 3050 }, { "epoch": 72.64477611940299, "grad_norm": 17.798324584960938, "learning_rate": 8.449404761904762e-06, "loss": 43.5087, "step": 3051 }, { "epoch": 72.66865671641791, "grad_norm": 22.086271286010742, "learning_rate": 8.446428571428571e-06, "loss": 43.7427, "step": 3052 }, { "epoch": 72.69253731343284, "grad_norm": 20.795154571533203, "learning_rate": 8.443452380952382e-06, "loss": 43.6492, "step": 3053 }, { "epoch": 72.71641791044776, "grad_norm": 23.004671096801758, "learning_rate": 8.440476190476191e-06, "loss": 43.2841, "step": 3054 }, { "epoch": 72.74029850746268, "grad_norm": 19.808507919311523, "learning_rate": 8.4375e-06, "loss": 43.2447, "step": 3055 }, { "epoch": 72.7641791044776, "grad_norm": 25.06849479675293, "learning_rate": 8.434523809523811e-06, "loss": 42.7637, "step": 3056 }, { "epoch": 72.78805970149254, "grad_norm": 25.014245986938477, "learning_rate": 8.43154761904762e-06, "loss": 43.4822, "step": 3057 }, { "epoch": 72.81194029850747, "grad_norm": 22.324596405029297, "learning_rate": 8.428571428571429e-06, "loss": 43.3555, "step": 3058 }, { "epoch": 72.83582089552239, "grad_norm": 28.37264060974121, "learning_rate": 8.425595238095238e-06, "loss": 45.1914, "step": 3059 }, { "epoch": 72.85970149253731, "grad_norm": 20.218700408935547, "learning_rate": 8.422619047619049e-06, "loss": 43.7534, "step": 3060 }, { "epoch": 72.88358208955223, "grad_norm": 23.96106719970703, "learning_rate": 8.419642857142858e-06, "loss": 43.3726, "step": 3061 }, { "epoch": 72.90746268656716, "grad_norm": 24.620227813720703, "learning_rate": 8.416666666666667e-06, "loss": 43.2246, "step": 3062 }, { "epoch": 72.9313432835821, "grad_norm": 17.006282806396484, "learning_rate": 8.413690476190478e-06, "loss": 43.0239, "step": 3063 }, { "epoch": 72.95522388059702, "grad_norm": 32.321250915527344, "learning_rate": 8.410714285714287e-06, "loss": 43.8265, "step": 3064 }, { "epoch": 72.97910447761194, "grad_norm": 26.541305541992188, "learning_rate": 8.407738095238096e-06, "loss": 43.715, "step": 3065 }, { "epoch": 73.0, "grad_norm": 20.71360969543457, "learning_rate": 8.404761904761905e-06, "loss": 38.4916, "step": 3066 }, { "epoch": 73.02388059701492, "grad_norm": 25.500295639038086, "learning_rate": 8.401785714285715e-06, "loss": 43.3955, "step": 3067 }, { "epoch": 73.04776119402985, "grad_norm": 26.59987449645996, "learning_rate": 8.398809523809525e-06, "loss": 43.3811, "step": 3068 }, { "epoch": 73.07164179104478, "grad_norm": 22.731945037841797, "learning_rate": 8.395833333333334e-06, "loss": 43.2902, "step": 3069 }, { "epoch": 73.0955223880597, "grad_norm": 20.676626205444336, "learning_rate": 8.392857142857144e-06, "loss": 44.4288, "step": 3070 }, { "epoch": 73.11940298507463, "grad_norm": 24.257009506225586, "learning_rate": 8.389880952380953e-06, "loss": 42.6346, "step": 3071 }, { "epoch": 73.14328358208955, "grad_norm": 20.27753448486328, "learning_rate": 8.386904761904762e-06, "loss": 43.195, "step": 3072 }, { "epoch": 73.16716417910447, "grad_norm": 22.37655258178711, "learning_rate": 8.383928571428573e-06, "loss": 43.7297, "step": 3073 }, { "epoch": 73.1910447761194, "grad_norm": 22.078298568725586, "learning_rate": 8.380952380952382e-06, "loss": 45.0908, "step": 3074 }, { "epoch": 73.21492537313434, "grad_norm": 22.645662307739258, "learning_rate": 8.377976190476191e-06, "loss": 43.4577, "step": 3075 }, { "epoch": 73.23880597014926, "grad_norm": 18.159029006958008, "learning_rate": 8.375e-06, "loss": 42.8618, "step": 3076 }, { "epoch": 73.26268656716418, "grad_norm": 22.44676399230957, "learning_rate": 8.372023809523811e-06, "loss": 41.7892, "step": 3077 }, { "epoch": 73.2865671641791, "grad_norm": 21.480403900146484, "learning_rate": 8.36904761904762e-06, "loss": 44.0939, "step": 3078 }, { "epoch": 73.31044776119403, "grad_norm": 19.49287986755371, "learning_rate": 8.366071428571429e-06, "loss": 44.0851, "step": 3079 }, { "epoch": 73.33432835820895, "grad_norm": 18.453174591064453, "learning_rate": 8.36309523809524e-06, "loss": 42.5673, "step": 3080 }, { "epoch": 73.35820895522389, "grad_norm": NaN, "learning_rate": 8.360119047619049e-06, "loss": 71.053, "step": 3081 }, { "epoch": 73.38208955223881, "grad_norm": 20.119003295898438, "learning_rate": 8.360119047619049e-06, "loss": 42.981, "step": 3082 }, { "epoch": 73.40597014925373, "grad_norm": 18.897857666015625, "learning_rate": 8.357142857142858e-06, "loss": 42.5696, "step": 3083 }, { "epoch": 73.42985074626866, "grad_norm": 26.755035400390625, "learning_rate": 8.354166666666667e-06, "loss": 43.2951, "step": 3084 }, { "epoch": 73.45373134328358, "grad_norm": 19.104629516601562, "learning_rate": 8.351190476190478e-06, "loss": 42.5016, "step": 3085 }, { "epoch": 73.4776119402985, "grad_norm": 25.36631965637207, "learning_rate": 8.348214285714287e-06, "loss": 42.6552, "step": 3086 }, { "epoch": 73.50149253731344, "grad_norm": 27.23288345336914, "learning_rate": 8.345238095238096e-06, "loss": 42.6917, "step": 3087 }, { "epoch": 73.52537313432836, "grad_norm": 16.930316925048828, "learning_rate": 8.342261904761907e-06, "loss": 43.1315, "step": 3088 }, { "epoch": 73.54925373134328, "grad_norm": 26.30918312072754, "learning_rate": 8.339285714285716e-06, "loss": 42.7197, "step": 3089 }, { "epoch": 73.57313432835821, "grad_norm": 24.781511306762695, "learning_rate": 8.336309523809525e-06, "loss": 42.5099, "step": 3090 }, { "epoch": 73.59701492537313, "grad_norm": 19.516469955444336, "learning_rate": 8.333333333333334e-06, "loss": 43.0713, "step": 3091 }, { "epoch": 73.62089552238805, "grad_norm": 22.657184600830078, "learning_rate": 8.330357142857144e-06, "loss": 43.3808, "step": 3092 }, { "epoch": 73.64477611940299, "grad_norm": 18.468502044677734, "learning_rate": 8.327380952380954e-06, "loss": 43.7249, "step": 3093 }, { "epoch": 73.66865671641791, "grad_norm": 17.16704750061035, "learning_rate": 8.324404761904763e-06, "loss": 43.8457, "step": 3094 }, { "epoch": 73.69253731343284, "grad_norm": 21.254226684570312, "learning_rate": 8.321428571428573e-06, "loss": 43.5131, "step": 3095 }, { "epoch": 73.71641791044776, "grad_norm": 24.988006591796875, "learning_rate": 8.318452380952382e-06, "loss": 43.419, "step": 3096 }, { "epoch": 73.74029850746268, "grad_norm": 18.345117568969727, "learning_rate": 8.315476190476191e-06, "loss": 43.89, "step": 3097 }, { "epoch": 73.7641791044776, "grad_norm": 19.947589874267578, "learning_rate": 8.3125e-06, "loss": 41.9095, "step": 3098 }, { "epoch": 73.78805970149254, "grad_norm": 21.689882278442383, "learning_rate": 8.309523809523811e-06, "loss": 43.5629, "step": 3099 }, { "epoch": 73.81194029850747, "grad_norm": 18.021583557128906, "learning_rate": 8.30654761904762e-06, "loss": 45.2045, "step": 3100 }, { "epoch": 73.83582089552239, "grad_norm": 21.016939163208008, "learning_rate": 8.30357142857143e-06, "loss": 42.9508, "step": 3101 }, { "epoch": 73.85970149253731, "grad_norm": 19.921489715576172, "learning_rate": 8.30059523809524e-06, "loss": 45.0384, "step": 3102 }, { "epoch": 73.88358208955223, "grad_norm": 17.989734649658203, "learning_rate": 8.297619047619049e-06, "loss": 43.6752, "step": 3103 }, { "epoch": 73.90746268656716, "grad_norm": 19.126956939697266, "learning_rate": 8.294642857142858e-06, "loss": 42.4258, "step": 3104 }, { "epoch": 73.9313432835821, "grad_norm": 18.107421875, "learning_rate": 8.291666666666667e-06, "loss": 42.0089, "step": 3105 }, { "epoch": 73.95522388059702, "grad_norm": 22.599328994750977, "learning_rate": 8.288690476190478e-06, "loss": 43.1967, "step": 3106 }, { "epoch": 73.97910447761194, "grad_norm": 17.103744506835938, "learning_rate": 8.285714285714287e-06, "loss": 42.932, "step": 3107 }, { "epoch": 74.0, "grad_norm": 16.514545440673828, "learning_rate": 8.282738095238096e-06, "loss": 38.5601, "step": 3108 }, { "epoch": 74.02388059701492, "grad_norm": 19.938108444213867, "learning_rate": 8.279761904761905e-06, "loss": 43.1656, "step": 3109 }, { "epoch": 74.04776119402985, "grad_norm": 23.691556930541992, "learning_rate": 8.276785714285714e-06, "loss": 43.248, "step": 3110 }, { "epoch": 74.07164179104478, "grad_norm": 24.84130859375, "learning_rate": 8.273809523809523e-06, "loss": 43.0973, "step": 3111 }, { "epoch": 74.0955223880597, "grad_norm": 16.541378021240234, "learning_rate": 8.270833333333334e-06, "loss": 43.8453, "step": 3112 }, { "epoch": 74.11940298507463, "grad_norm": 34.161293029785156, "learning_rate": 8.267857142857143e-06, "loss": 41.7, "step": 3113 }, { "epoch": 74.14328358208955, "grad_norm": 26.104328155517578, "learning_rate": 8.264880952380952e-06, "loss": 43.6119, "step": 3114 }, { "epoch": 74.16716417910447, "grad_norm": 26.31689453125, "learning_rate": 8.261904761904763e-06, "loss": 41.5545, "step": 3115 }, { "epoch": 74.1910447761194, "grad_norm": 23.808761596679688, "learning_rate": 8.258928571428572e-06, "loss": 44.5862, "step": 3116 }, { "epoch": 74.21492537313434, "grad_norm": 24.158493041992188, "learning_rate": 8.25595238095238e-06, "loss": 42.9814, "step": 3117 }, { "epoch": 74.23880597014926, "grad_norm": 25.35089874267578, "learning_rate": 8.25297619047619e-06, "loss": 42.4484, "step": 3118 }, { "epoch": 74.26268656716418, "grad_norm": 24.48615264892578, "learning_rate": 8.25e-06, "loss": 42.7431, "step": 3119 }, { "epoch": 74.2865671641791, "grad_norm": 24.813716888427734, "learning_rate": 8.24702380952381e-06, "loss": 43.1515, "step": 3120 }, { "epoch": 74.31044776119403, "grad_norm": 18.43018341064453, "learning_rate": 8.244047619047619e-06, "loss": 43.5142, "step": 3121 }, { "epoch": 74.33432835820895, "grad_norm": 25.593732833862305, "learning_rate": 8.24107142857143e-06, "loss": 44.4342, "step": 3122 }, { "epoch": 74.35820895522389, "grad_norm": 21.224576950073242, "learning_rate": 8.238095238095239e-06, "loss": 44.1973, "step": 3123 }, { "epoch": 74.38208955223881, "grad_norm": 17.604145050048828, "learning_rate": 8.235119047619048e-06, "loss": 43.4662, "step": 3124 }, { "epoch": 74.40597014925373, "grad_norm": 30.535215377807617, "learning_rate": 8.232142857142857e-06, "loss": 42.8872, "step": 3125 }, { "epoch": 74.42985074626866, "grad_norm": 22.767736434936523, "learning_rate": 8.229166666666667e-06, "loss": 43.4187, "step": 3126 }, { "epoch": 74.45373134328358, "grad_norm": 33.97389221191406, "learning_rate": 8.226190476190476e-06, "loss": 43.8719, "step": 3127 }, { "epoch": 74.4776119402985, "grad_norm": 26.33451271057129, "learning_rate": 8.223214285714285e-06, "loss": 43.6458, "step": 3128 }, { "epoch": 74.50149253731344, "grad_norm": 35.393733978271484, "learning_rate": 8.220238095238096e-06, "loss": 44.2996, "step": 3129 }, { "epoch": 74.52537313432836, "grad_norm": 27.903955459594727, "learning_rate": 8.217261904761905e-06, "loss": 43.5218, "step": 3130 }, { "epoch": 74.54925373134328, "grad_norm": 27.946807861328125, "learning_rate": 8.214285714285714e-06, "loss": 43.8993, "step": 3131 }, { "epoch": 74.57313432835821, "grad_norm": 21.519737243652344, "learning_rate": 8.211309523809523e-06, "loss": 42.9369, "step": 3132 }, { "epoch": 74.59701492537313, "grad_norm": 27.311965942382812, "learning_rate": 8.208333333333334e-06, "loss": 43.4193, "step": 3133 }, { "epoch": 74.62089552238805, "grad_norm": 23.38337516784668, "learning_rate": 8.205357142857143e-06, "loss": 43.5582, "step": 3134 }, { "epoch": 74.64477611940299, "grad_norm": 19.415571212768555, "learning_rate": 8.202380952380952e-06, "loss": 42.8066, "step": 3135 }, { "epoch": 74.66865671641791, "grad_norm": 25.44513511657715, "learning_rate": 8.199404761904763e-06, "loss": 42.8859, "step": 3136 }, { "epoch": 74.69253731343284, "grad_norm": 23.1788330078125, "learning_rate": 8.196428571428572e-06, "loss": 42.1339, "step": 3137 }, { "epoch": 74.71641791044776, "grad_norm": 14.436179161071777, "learning_rate": 8.193452380952381e-06, "loss": 42.6687, "step": 3138 }, { "epoch": 74.74029850746268, "grad_norm": 30.928714752197266, "learning_rate": 8.190476190476192e-06, "loss": 44.5744, "step": 3139 }, { "epoch": 74.7641791044776, "grad_norm": 23.915878295898438, "learning_rate": 8.1875e-06, "loss": 44.3435, "step": 3140 }, { "epoch": 74.78805970149254, "grad_norm": 27.95979881286621, "learning_rate": 8.18452380952381e-06, "loss": 42.4667, "step": 3141 }, { "epoch": 74.81194029850747, "grad_norm": 22.4390811920166, "learning_rate": 8.181547619047619e-06, "loss": 42.6036, "step": 3142 }, { "epoch": 74.83582089552239, "grad_norm": 22.94829750061035, "learning_rate": 8.17857142857143e-06, "loss": 42.4304, "step": 3143 }, { "epoch": 74.85970149253731, "grad_norm": 20.711339950561523, "learning_rate": 8.175595238095239e-06, "loss": 43.1806, "step": 3144 }, { "epoch": 74.88358208955223, "grad_norm": 21.30629539489746, "learning_rate": 8.172619047619048e-06, "loss": 42.7325, "step": 3145 }, { "epoch": 74.90746268656716, "grad_norm": 20.381263732910156, "learning_rate": 8.169642857142858e-06, "loss": 43.0491, "step": 3146 }, { "epoch": 74.9313432835821, "grad_norm": 21.54926300048828, "learning_rate": 8.166666666666668e-06, "loss": 44.8298, "step": 3147 }, { "epoch": 74.95522388059702, "grad_norm": 15.518889427185059, "learning_rate": 8.163690476190477e-06, "loss": 42.6821, "step": 3148 }, { "epoch": 74.97910447761194, "grad_norm": 24.487192153930664, "learning_rate": 8.160714285714286e-06, "loss": 43.0891, "step": 3149 }, { "epoch": 75.0, "grad_norm": 15.607013702392578, "learning_rate": 8.157738095238096e-06, "loss": 37.212, "step": 3150 }, { "epoch": 75.02388059701492, "grad_norm": 31.642353057861328, "learning_rate": 8.154761904761905e-06, "loss": 43.9061, "step": 3151 }, { "epoch": 75.04776119402985, "grad_norm": 23.92624855041504, "learning_rate": 8.151785714285714e-06, "loss": 44.0244, "step": 3152 }, { "epoch": 75.07164179104478, "grad_norm": 23.756420135498047, "learning_rate": 8.148809523809525e-06, "loss": 44.7597, "step": 3153 }, { "epoch": 75.0955223880597, "grad_norm": 26.027414321899414, "learning_rate": 8.145833333333334e-06, "loss": 42.3933, "step": 3154 }, { "epoch": 75.11940298507463, "grad_norm": 18.252239227294922, "learning_rate": 8.142857142857143e-06, "loss": 43.1075, "step": 3155 }, { "epoch": 75.14328358208955, "grad_norm": 25.58303451538086, "learning_rate": 8.139880952380952e-06, "loss": 43.3715, "step": 3156 }, { "epoch": 75.16716417910447, "grad_norm": 24.198566436767578, "learning_rate": 8.136904761904763e-06, "loss": 42.042, "step": 3157 }, { "epoch": 75.1910447761194, "grad_norm": 21.632183074951172, "learning_rate": 8.133928571428572e-06, "loss": 42.4693, "step": 3158 }, { "epoch": 75.21492537313434, "grad_norm": 27.104801177978516, "learning_rate": 8.130952380952381e-06, "loss": 42.597, "step": 3159 }, { "epoch": 75.23880597014926, "grad_norm": 21.614917755126953, "learning_rate": 8.127976190476192e-06, "loss": 42.9729, "step": 3160 }, { "epoch": 75.26268656716418, "grad_norm": 27.62027359008789, "learning_rate": 8.125000000000001e-06, "loss": 43.3302, "step": 3161 }, { "epoch": 75.2865671641791, "grad_norm": 24.087974548339844, "learning_rate": 8.12202380952381e-06, "loss": 44.1364, "step": 3162 }, { "epoch": 75.31044776119403, "grad_norm": 21.590192794799805, "learning_rate": 8.119047619047619e-06, "loss": 42.7373, "step": 3163 }, { "epoch": 75.33432835820895, "grad_norm": 27.612075805664062, "learning_rate": 8.11607142857143e-06, "loss": 43.5758, "step": 3164 }, { "epoch": 75.35820895522389, "grad_norm": 18.209209442138672, "learning_rate": 8.113095238095239e-06, "loss": 43.309, "step": 3165 }, { "epoch": 75.38208955223881, "grad_norm": 28.845134735107422, "learning_rate": 8.110119047619048e-06, "loss": 43.3125, "step": 3166 }, { "epoch": 75.40597014925373, "grad_norm": 20.03913116455078, "learning_rate": 8.107142857142859e-06, "loss": 44.666, "step": 3167 }, { "epoch": 75.42985074626866, "grad_norm": 29.69953155517578, "learning_rate": 8.104166666666668e-06, "loss": 43.3558, "step": 3168 }, { "epoch": 75.45373134328358, "grad_norm": 22.189376831054688, "learning_rate": 8.101190476190477e-06, "loss": 43.6229, "step": 3169 }, { "epoch": 75.4776119402985, "grad_norm": 23.93678092956543, "learning_rate": 8.098214285714286e-06, "loss": 42.9279, "step": 3170 }, { "epoch": 75.50149253731344, "grad_norm": 21.489761352539062, "learning_rate": 8.095238095238097e-06, "loss": 43.4537, "step": 3171 }, { "epoch": 75.52537313432836, "grad_norm": 18.95380210876465, "learning_rate": 8.092261904761906e-06, "loss": 42.9752, "step": 3172 }, { "epoch": 75.54925373134328, "grad_norm": 26.20965576171875, "learning_rate": 8.089285714285715e-06, "loss": 42.7511, "step": 3173 }, { "epoch": 75.57313432835821, "grad_norm": 19.629926681518555, "learning_rate": 8.086309523809525e-06, "loss": 43.7784, "step": 3174 }, { "epoch": 75.59701492537313, "grad_norm": 25.866622924804688, "learning_rate": 8.083333333333334e-06, "loss": 42.7349, "step": 3175 }, { "epoch": 75.62089552238805, "grad_norm": 24.383323669433594, "learning_rate": 8.080357142857143e-06, "loss": 42.5395, "step": 3176 }, { "epoch": 75.64477611940299, "grad_norm": 19.74950408935547, "learning_rate": 8.077380952380953e-06, "loss": 43.1058, "step": 3177 }, { "epoch": 75.66865671641791, "grad_norm": 28.67831039428711, "learning_rate": 8.074404761904763e-06, "loss": 43.5871, "step": 3178 }, { "epoch": 75.69253731343284, "grad_norm": 23.102951049804688, "learning_rate": 8.071428571428572e-06, "loss": 42.76, "step": 3179 }, { "epoch": 75.71641791044776, "grad_norm": 35.02995681762695, "learning_rate": 8.068452380952381e-06, "loss": 43.8252, "step": 3180 }, { "epoch": 75.74029850746268, "grad_norm": 24.358551025390625, "learning_rate": 8.065476190476192e-06, "loss": 43.1074, "step": 3181 }, { "epoch": 75.7641791044776, "grad_norm": 30.14754295349121, "learning_rate": 8.062500000000001e-06, "loss": 43.8415, "step": 3182 }, { "epoch": 75.78805970149254, "grad_norm": 24.45053482055664, "learning_rate": 8.05952380952381e-06, "loss": 43.0215, "step": 3183 }, { "epoch": 75.81194029850747, "grad_norm": 37.40525436401367, "learning_rate": 8.05654761904762e-06, "loss": 42.961, "step": 3184 }, { "epoch": 75.83582089552239, "grad_norm": 24.555240631103516, "learning_rate": 8.05357142857143e-06, "loss": 44.2708, "step": 3185 }, { "epoch": 75.85970149253731, "grad_norm": 37.460670471191406, "learning_rate": 8.050595238095239e-06, "loss": 43.5956, "step": 3186 }, { "epoch": 75.88358208955223, "grad_norm": 32.54770278930664, "learning_rate": 8.047619047619048e-06, "loss": 42.3289, "step": 3187 }, { "epoch": 75.90746268656716, "grad_norm": 38.01876449584961, "learning_rate": 8.044642857142859e-06, "loss": 42.9972, "step": 3188 }, { "epoch": 75.9313432835821, "grad_norm": 30.63246726989746, "learning_rate": 8.041666666666668e-06, "loss": 42.5397, "step": 3189 }, { "epoch": 75.95522388059702, "grad_norm": 27.40627670288086, "learning_rate": 8.038690476190477e-06, "loss": 41.377, "step": 3190 }, { "epoch": 75.97910447761194, "grad_norm": 26.620893478393555, "learning_rate": 8.035714285714286e-06, "loss": 42.9367, "step": 3191 }, { "epoch": 76.0, "grad_norm": 31.36514663696289, "learning_rate": 8.032738095238097e-06, "loss": 37.8523, "step": 3192 }, { "epoch": 76.02388059701492, "grad_norm": 27.55282974243164, "learning_rate": 8.029761904761906e-06, "loss": 43.2614, "step": 3193 }, { "epoch": 76.04776119402985, "grad_norm": 36.373634338378906, "learning_rate": 8.026785714285715e-06, "loss": 42.1966, "step": 3194 }, { "epoch": 76.07164179104478, "grad_norm": 29.89250373840332, "learning_rate": 8.023809523809526e-06, "loss": 43.0278, "step": 3195 }, { "epoch": 76.0955223880597, "grad_norm": 28.84893226623535, "learning_rate": 8.020833333333335e-06, "loss": 44.8331, "step": 3196 }, { "epoch": 76.11940298507463, "grad_norm": 27.258445739746094, "learning_rate": 8.017857142857144e-06, "loss": 42.5176, "step": 3197 }, { "epoch": 76.14328358208955, "grad_norm": 30.8077449798584, "learning_rate": 8.014880952380953e-06, "loss": 43.3045, "step": 3198 }, { "epoch": 76.16716417910447, "grad_norm": 28.528837203979492, "learning_rate": 8.011904761904763e-06, "loss": 43.1302, "step": 3199 }, { "epoch": 76.1910447761194, "grad_norm": 30.751039505004883, "learning_rate": 8.008928571428572e-06, "loss": 43.7287, "step": 3200 }, { "epoch": 76.21492537313434, "grad_norm": 27.781261444091797, "learning_rate": 8.005952380952382e-06, "loss": 43.3939, "step": 3201 }, { "epoch": 76.23880597014926, "grad_norm": 27.45984649658203, "learning_rate": 8.002976190476192e-06, "loss": 43.6411, "step": 3202 }, { "epoch": 76.26268656716418, "grad_norm": 26.628419876098633, "learning_rate": 8.000000000000001e-06, "loss": 42.6454, "step": 3203 }, { "epoch": 76.2865671641791, "grad_norm": 36.02729034423828, "learning_rate": 7.99702380952381e-06, "loss": 43.2459, "step": 3204 }, { "epoch": 76.31044776119403, "grad_norm": 28.480478286743164, "learning_rate": 7.99404761904762e-06, "loss": 42.7675, "step": 3205 }, { "epoch": 76.33432835820895, "grad_norm": 31.36353874206543, "learning_rate": 7.99107142857143e-06, "loss": 42.8571, "step": 3206 }, { "epoch": 76.35820895522389, "grad_norm": 29.178728103637695, "learning_rate": 7.98809523809524e-06, "loss": 42.7477, "step": 3207 }, { "epoch": 76.38208955223881, "grad_norm": 28.539457321166992, "learning_rate": 7.985119047619048e-06, "loss": 44.1444, "step": 3208 }, { "epoch": 76.40597014925373, "grad_norm": 26.178895950317383, "learning_rate": 7.982142857142859e-06, "loss": 42.7187, "step": 3209 }, { "epoch": 76.42985074626866, "grad_norm": 30.825010299682617, "learning_rate": 7.979166666666668e-06, "loss": 43.478, "step": 3210 }, { "epoch": 76.45373134328358, "grad_norm": 27.317245483398438, "learning_rate": 7.976190476190477e-06, "loss": 43.479, "step": 3211 }, { "epoch": 76.4776119402985, "grad_norm": 31.42888641357422, "learning_rate": 7.973214285714286e-06, "loss": 43.7278, "step": 3212 }, { "epoch": 76.50149253731344, "grad_norm": 28.949392318725586, "learning_rate": 7.970238095238097e-06, "loss": 43.6134, "step": 3213 }, { "epoch": 76.52537313432836, "grad_norm": 36.61643981933594, "learning_rate": 7.967261904761904e-06, "loss": 44.1841, "step": 3214 }, { "epoch": 76.54925373134328, "grad_norm": 31.78457260131836, "learning_rate": 7.964285714285715e-06, "loss": 43.9995, "step": 3215 }, { "epoch": 76.57313432835821, "grad_norm": 29.883163452148438, "learning_rate": 7.961309523809524e-06, "loss": 42.596, "step": 3216 }, { "epoch": 76.59701492537313, "grad_norm": 27.458534240722656, "learning_rate": 7.958333333333333e-06, "loss": 43.7156, "step": 3217 }, { "epoch": 76.62089552238805, "grad_norm": 26.423311233520508, "learning_rate": 7.955357142857144e-06, "loss": 42.2925, "step": 3218 }, { "epoch": 76.64477611940299, "grad_norm": 22.850927352905273, "learning_rate": 7.952380952380953e-06, "loss": 43.3146, "step": 3219 }, { "epoch": 76.66865671641791, "grad_norm": 32.23415756225586, "learning_rate": 7.949404761904762e-06, "loss": 43.4622, "step": 3220 }, { "epoch": 76.69253731343284, "grad_norm": 25.596759796142578, "learning_rate": 7.946428571428571e-06, "loss": 42.6238, "step": 3221 }, { "epoch": 76.71641791044776, "grad_norm": 28.371593475341797, "learning_rate": 7.943452380952382e-06, "loss": 41.2267, "step": 3222 }, { "epoch": 76.74029850746268, "grad_norm": 24.369253158569336, "learning_rate": 7.94047619047619e-06, "loss": 43.24, "step": 3223 }, { "epoch": 76.7641791044776, "grad_norm": 34.42658996582031, "learning_rate": 7.9375e-06, "loss": 42.8095, "step": 3224 }, { "epoch": 76.78805970149254, "grad_norm": 26.35492515563965, "learning_rate": 7.93452380952381e-06, "loss": 42.0312, "step": 3225 }, { "epoch": 76.81194029850747, "grad_norm": 33.34773254394531, "learning_rate": 7.93154761904762e-06, "loss": 43.4483, "step": 3226 }, { "epoch": 76.83582089552239, "grad_norm": 31.470170974731445, "learning_rate": 7.928571428571429e-06, "loss": 43.9896, "step": 3227 }, { "epoch": 76.85970149253731, "grad_norm": 28.38050651550293, "learning_rate": 7.925595238095238e-06, "loss": 43.9711, "step": 3228 }, { "epoch": 76.88358208955223, "grad_norm": NaN, "learning_rate": 7.922619047619048e-06, "loss": 75.7577, "step": 3229 }, { "epoch": 76.90746268656716, "grad_norm": 21.927776336669922, "learning_rate": 7.922619047619048e-06, "loss": 42.1852, "step": 3230 }, { "epoch": 76.9313432835821, "grad_norm": 28.636518478393555, "learning_rate": 7.919642857142857e-06, "loss": 43.103, "step": 3231 }, { "epoch": 76.95522388059702, "grad_norm": 25.48936653137207, "learning_rate": 7.916666666666667e-06, "loss": 43.1688, "step": 3232 }, { "epoch": 76.97910447761194, "grad_norm": 29.641143798828125, "learning_rate": 7.913690476190477e-06, "loss": 41.7518, "step": 3233 }, { "epoch": 77.0, "grad_norm": 22.023099899291992, "learning_rate": 7.910714285714286e-06, "loss": 38.1447, "step": 3234 }, { "epoch": 77.02388059701492, "grad_norm": 35.88689041137695, "learning_rate": 7.907738095238095e-06, "loss": 43.1578, "step": 3235 }, { "epoch": 77.04776119402985, "grad_norm": 34.37343978881836, "learning_rate": 7.904761904761904e-06, "loss": 43.0582, "step": 3236 }, { "epoch": 77.07164179104478, "grad_norm": 18.577016830444336, "learning_rate": 7.901785714285715e-06, "loss": 42.1815, "step": 3237 }, { "epoch": 77.0955223880597, "grad_norm": 23.373125076293945, "learning_rate": 7.898809523809524e-06, "loss": 44.491, "step": 3238 }, { "epoch": 77.11940298507463, "grad_norm": 28.848159790039062, "learning_rate": 7.895833333333333e-06, "loss": 43.0114, "step": 3239 }, { "epoch": 77.14328358208955, "grad_norm": 19.10719108581543, "learning_rate": 7.892857142857144e-06, "loss": 42.862, "step": 3240 }, { "epoch": 77.16716417910447, "grad_norm": 34.79095458984375, "learning_rate": 7.889880952380953e-06, "loss": 43.7736, "step": 3241 }, { "epoch": 77.1910447761194, "grad_norm": 28.950021743774414, "learning_rate": 7.886904761904762e-06, "loss": 44.5221, "step": 3242 }, { "epoch": 77.21492537313434, "grad_norm": 29.437536239624023, "learning_rate": 7.883928571428571e-06, "loss": 43.0498, "step": 3243 }, { "epoch": 77.23880597014926, "grad_norm": 26.087984085083008, "learning_rate": 7.880952380952382e-06, "loss": 42.1991, "step": 3244 }, { "epoch": 77.26268656716418, "grad_norm": 30.868637084960938, "learning_rate": 7.877976190476191e-06, "loss": 43.1896, "step": 3245 }, { "epoch": 77.2865671641791, "grad_norm": 26.28648567199707, "learning_rate": 7.875e-06, "loss": 41.9695, "step": 3246 }, { "epoch": 77.31044776119403, "grad_norm": 27.738021850585938, "learning_rate": 7.87202380952381e-06, "loss": 42.9537, "step": 3247 }, { "epoch": 77.33432835820895, "grad_norm": 23.0654296875, "learning_rate": 7.86904761904762e-06, "loss": 43.1053, "step": 3248 }, { "epoch": 77.35820895522389, "grad_norm": 31.976926803588867, "learning_rate": 7.866071428571429e-06, "loss": 42.0648, "step": 3249 }, { "epoch": 77.38208955223881, "grad_norm": 28.690933227539062, "learning_rate": 7.863095238095238e-06, "loss": 43.0786, "step": 3250 }, { "epoch": 77.40597014925373, "grad_norm": 29.870180130004883, "learning_rate": 7.860119047619049e-06, "loss": 44.1362, "step": 3251 }, { "epoch": 77.42985074626866, "grad_norm": 29.524002075195312, "learning_rate": 7.857142857142858e-06, "loss": 42.635, "step": 3252 }, { "epoch": 77.45373134328358, "grad_norm": 24.833131790161133, "learning_rate": 7.854166666666667e-06, "loss": 43.1208, "step": 3253 }, { "epoch": 77.4776119402985, "grad_norm": 24.424755096435547, "learning_rate": 7.851190476190477e-06, "loss": 44.4682, "step": 3254 }, { "epoch": 77.50149253731344, "grad_norm": 30.417823791503906, "learning_rate": 7.848214285714287e-06, "loss": 45.1353, "step": 3255 }, { "epoch": 77.52537313432836, "grad_norm": 23.12209701538086, "learning_rate": 7.845238095238096e-06, "loss": 41.7736, "step": 3256 }, { "epoch": 77.54925373134328, "grad_norm": 30.454221725463867, "learning_rate": 7.842261904761905e-06, "loss": 42.6765, "step": 3257 }, { "epoch": 77.57313432835821, "grad_norm": 30.55715560913086, "learning_rate": 7.839285714285715e-06, "loss": 43.4168, "step": 3258 }, { "epoch": 77.59701492537313, "grad_norm": 26.72547149658203, "learning_rate": 7.836309523809524e-06, "loss": 42.5388, "step": 3259 }, { "epoch": 77.62089552238805, "grad_norm": 25.03418731689453, "learning_rate": 7.833333333333333e-06, "loss": 43.3748, "step": 3260 }, { "epoch": 77.64477611940299, "grad_norm": 28.706029891967773, "learning_rate": 7.830357142857144e-06, "loss": 43.7717, "step": 3261 }, { "epoch": 77.66865671641791, "grad_norm": 30.39940643310547, "learning_rate": 7.827380952380953e-06, "loss": 42.952, "step": 3262 }, { "epoch": 77.69253731343284, "grad_norm": 25.622882843017578, "learning_rate": 7.824404761904762e-06, "loss": 42.7133, "step": 3263 }, { "epoch": 77.71641791044776, "grad_norm": 25.120025634765625, "learning_rate": 7.821428571428571e-06, "loss": 42.2453, "step": 3264 }, { "epoch": 77.74029850746268, "grad_norm": 27.227832794189453, "learning_rate": 7.818452380952382e-06, "loss": 42.4094, "step": 3265 }, { "epoch": 77.7641791044776, "grad_norm": 23.663406372070312, "learning_rate": 7.815476190476191e-06, "loss": 43.7332, "step": 3266 }, { "epoch": 77.78805970149254, "grad_norm": 28.738086700439453, "learning_rate": 7.8125e-06, "loss": 43.7881, "step": 3267 }, { "epoch": 77.81194029850747, "grad_norm": 27.955598831176758, "learning_rate": 7.809523809523811e-06, "loss": 43.4782, "step": 3268 }, { "epoch": 77.83582089552239, "grad_norm": 24.79859161376953, "learning_rate": 7.80654761904762e-06, "loss": 41.0554, "step": 3269 }, { "epoch": 77.85970149253731, "grad_norm": 25.531471252441406, "learning_rate": 7.803571428571429e-06, "loss": 43.0072, "step": 3270 }, { "epoch": 77.88358208955223, "grad_norm": 27.746000289916992, "learning_rate": 7.800595238095238e-06, "loss": 43.8641, "step": 3271 }, { "epoch": 77.90746268656716, "grad_norm": 25.056262969970703, "learning_rate": 7.797619047619049e-06, "loss": 43.1316, "step": 3272 }, { "epoch": 77.9313432835821, "grad_norm": 30.888355255126953, "learning_rate": 7.794642857142858e-06, "loss": 43.482, "step": 3273 }, { "epoch": 77.95522388059702, "grad_norm": 22.501649856567383, "learning_rate": 7.791666666666667e-06, "loss": 43.4869, "step": 3274 }, { "epoch": 77.97910447761194, "grad_norm": 31.175397872924805, "learning_rate": 7.788690476190478e-06, "loss": 43.5349, "step": 3275 }, { "epoch": 78.0, "grad_norm": 20.901432037353516, "learning_rate": 7.785714285714287e-06, "loss": 36.2874, "step": 3276 }, { "epoch": 78.02388059701492, "grad_norm": 30.319852828979492, "learning_rate": 7.782738095238096e-06, "loss": 41.102, "step": 3277 }, { "epoch": 78.04776119402985, "grad_norm": 28.31625747680664, "learning_rate": 7.779761904761905e-06, "loss": 42.4304, "step": 3278 }, { "epoch": 78.07164179104478, "grad_norm": 26.445859909057617, "learning_rate": 7.776785714285716e-06, "loss": 42.8755, "step": 3279 }, { "epoch": 78.0955223880597, "grad_norm": 20.42568588256836, "learning_rate": 7.773809523809525e-06, "loss": 44.5072, "step": 3280 }, { "epoch": 78.11940298507463, "grad_norm": 28.535858154296875, "learning_rate": 7.770833333333334e-06, "loss": 42.325, "step": 3281 }, { "epoch": 78.14328358208955, "grad_norm": 21.800678253173828, "learning_rate": 7.767857142857144e-06, "loss": 44.3283, "step": 3282 }, { "epoch": 78.16716417910447, "grad_norm": 26.74295997619629, "learning_rate": 7.764880952380953e-06, "loss": 44.3208, "step": 3283 }, { "epoch": 78.1910447761194, "grad_norm": 28.9124755859375, "learning_rate": 7.761904761904762e-06, "loss": 42.8112, "step": 3284 }, { "epoch": 78.21492537313434, "grad_norm": 24.948265075683594, "learning_rate": 7.758928571428571e-06, "loss": 42.6617, "step": 3285 }, { "epoch": 78.23880597014926, "grad_norm": 25.038854598999023, "learning_rate": 7.755952380952382e-06, "loss": 42.6089, "step": 3286 }, { "epoch": 78.26268656716418, "grad_norm": 24.622905731201172, "learning_rate": 7.752976190476191e-06, "loss": 42.9201, "step": 3287 }, { "epoch": 78.2865671641791, "grad_norm": 22.999900817871094, "learning_rate": 7.75e-06, "loss": 43.2141, "step": 3288 }, { "epoch": 78.31044776119403, "grad_norm": 22.848161697387695, "learning_rate": 7.747023809523811e-06, "loss": 44.1053, "step": 3289 }, { "epoch": 78.33432835820895, "grad_norm": 16.15705108642578, "learning_rate": 7.74404761904762e-06, "loss": 43.7009, "step": 3290 }, { "epoch": 78.35820895522389, "grad_norm": 29.3355655670166, "learning_rate": 7.74107142857143e-06, "loss": 42.3037, "step": 3291 }, { "epoch": 78.38208955223881, "grad_norm": 19.516281127929688, "learning_rate": 7.738095238095238e-06, "loss": 42.6299, "step": 3292 }, { "epoch": 78.40597014925373, "grad_norm": 34.26980209350586, "learning_rate": 7.735119047619049e-06, "loss": 43.052, "step": 3293 }, { "epoch": 78.42985074626866, "grad_norm": 32.0604133605957, "learning_rate": 7.732142857142858e-06, "loss": 42.4497, "step": 3294 }, { "epoch": 78.45373134328358, "grad_norm": 23.038795471191406, "learning_rate": 7.729166666666667e-06, "loss": 42.5542, "step": 3295 }, { "epoch": 78.4776119402985, "grad_norm": 27.498064041137695, "learning_rate": 7.726190476190478e-06, "loss": 41.834, "step": 3296 }, { "epoch": 78.50149253731344, "grad_norm": 25.38565444946289, "learning_rate": 7.723214285714287e-06, "loss": 44.7325, "step": 3297 }, { "epoch": 78.52537313432836, "grad_norm": 21.209095001220703, "learning_rate": 7.720238095238096e-06, "loss": 44.6015, "step": 3298 }, { "epoch": 78.54925373134328, "grad_norm": 27.321908950805664, "learning_rate": 7.717261904761905e-06, "loss": 43.3014, "step": 3299 }, { "epoch": 78.57313432835821, "grad_norm": 20.742706298828125, "learning_rate": 7.714285714285716e-06, "loss": 44.1572, "step": 3300 }, { "epoch": 78.59701492537313, "grad_norm": 28.640583038330078, "learning_rate": 7.711309523809525e-06, "loss": 42.1555, "step": 3301 }, { "epoch": 78.62089552238805, "grad_norm": 30.252870559692383, "learning_rate": 7.708333333333334e-06, "loss": 43.4469, "step": 3302 }, { "epoch": 78.64477611940299, "grad_norm": 23.6368350982666, "learning_rate": 7.705357142857145e-06, "loss": 44.3375, "step": 3303 }, { "epoch": 78.66865671641791, "grad_norm": 22.434412002563477, "learning_rate": 7.702380952380954e-06, "loss": 42.8106, "step": 3304 }, { "epoch": 78.69253731343284, "grad_norm": 28.329635620117188, "learning_rate": 7.699404761904763e-06, "loss": 42.8968, "step": 3305 }, { "epoch": 78.71641791044776, "grad_norm": 21.02295684814453, "learning_rate": 7.696428571428572e-06, "loss": 42.1169, "step": 3306 }, { "epoch": 78.74029850746268, "grad_norm": 30.06182861328125, "learning_rate": 7.693452380952382e-06, "loss": 43.0741, "step": 3307 }, { "epoch": 78.7641791044776, "grad_norm": 22.40550994873047, "learning_rate": 7.690476190476191e-06, "loss": 42.8449, "step": 3308 }, { "epoch": 78.78805970149254, "grad_norm": 28.855802536010742, "learning_rate": 7.6875e-06, "loss": 43.0846, "step": 3309 }, { "epoch": 78.81194029850747, "grad_norm": 25.507308959960938, "learning_rate": 7.684523809523811e-06, "loss": 41.954, "step": 3310 }, { "epoch": 78.83582089552239, "grad_norm": 26.092424392700195, "learning_rate": 7.68154761904762e-06, "loss": 42.1684, "step": 3311 }, { "epoch": 78.85970149253731, "grad_norm": 24.099889755249023, "learning_rate": 7.67857142857143e-06, "loss": 43.9231, "step": 3312 }, { "epoch": 78.88358208955223, "grad_norm": 28.72806739807129, "learning_rate": 7.675595238095238e-06, "loss": 42.546, "step": 3313 }, { "epoch": 78.90746268656716, "grad_norm": 26.489227294921875, "learning_rate": 7.672619047619049e-06, "loss": 44.1023, "step": 3314 }, { "epoch": 78.9313432835821, "grad_norm": 29.59152603149414, "learning_rate": 7.669642857142858e-06, "loss": 43.7005, "step": 3315 }, { "epoch": 78.95522388059702, "grad_norm": 23.0878963470459, "learning_rate": 7.666666666666667e-06, "loss": 41.9249, "step": 3316 }, { "epoch": 78.97910447761194, "grad_norm": 29.851896286010742, "learning_rate": 7.663690476190478e-06, "loss": 42.2078, "step": 3317 }, { "epoch": 79.0, "grad_norm": 23.739883422851562, "learning_rate": 7.660714285714287e-06, "loss": 39.1357, "step": 3318 }, { "epoch": 79.02388059701492, "grad_norm": 23.394466400146484, "learning_rate": 7.657738095238096e-06, "loss": 43.7385, "step": 3319 }, { "epoch": 79.04776119402985, "grad_norm": 22.10674285888672, "learning_rate": 7.654761904761905e-06, "loss": 43.253, "step": 3320 }, { "epoch": 79.07164179104478, "grad_norm": 25.71041488647461, "learning_rate": 7.651785714285714e-06, "loss": 43.6012, "step": 3321 }, { "epoch": 79.0955223880597, "grad_norm": 18.054738998413086, "learning_rate": 7.648809523809523e-06, "loss": 42.5356, "step": 3322 }, { "epoch": 79.11940298507463, "grad_norm": 25.66161346435547, "learning_rate": 7.645833333333334e-06, "loss": 43.796, "step": 3323 }, { "epoch": 79.14328358208955, "grad_norm": 15.92872142791748, "learning_rate": 7.642857142857143e-06, "loss": 43.4924, "step": 3324 }, { "epoch": 79.16716417910447, "grad_norm": 26.33378791809082, "learning_rate": 7.639880952380952e-06, "loss": 41.9388, "step": 3325 }, { "epoch": 79.1910447761194, "grad_norm": 18.938690185546875, "learning_rate": 7.636904761904763e-06, "loss": 42.6458, "step": 3326 }, { "epoch": 79.21492537313434, "grad_norm": 21.968505859375, "learning_rate": 7.633928571428572e-06, "loss": 43.2856, "step": 3327 }, { "epoch": 79.23880597014926, "grad_norm": 21.652313232421875, "learning_rate": 7.630952380952381e-06, "loss": 43.1669, "step": 3328 }, { "epoch": 79.26268656716418, "grad_norm": 16.064531326293945, "learning_rate": 7.627976190476191e-06, "loss": 43.09, "step": 3329 }, { "epoch": 79.2865671641791, "grad_norm": 21.19333839416504, "learning_rate": 7.625e-06, "loss": 42.7371, "step": 3330 }, { "epoch": 79.31044776119403, "grad_norm": 19.381980895996094, "learning_rate": 7.62202380952381e-06, "loss": 43.129, "step": 3331 }, { "epoch": 79.33432835820895, "grad_norm": 17.10456085205078, "learning_rate": 7.61904761904762e-06, "loss": 42.795, "step": 3332 }, { "epoch": 79.35820895522389, "grad_norm": 18.57830810546875, "learning_rate": 7.616071428571429e-06, "loss": 43.1172, "step": 3333 }, { "epoch": 79.38208955223881, "grad_norm": 16.343597412109375, "learning_rate": 7.6130952380952386e-06, "loss": 44.1413, "step": 3334 }, { "epoch": 79.40597014925373, "grad_norm": 18.999656677246094, "learning_rate": 7.610119047619048e-06, "loss": 43.154, "step": 3335 }, { "epoch": 79.42985074626866, "grad_norm": 18.70110321044922, "learning_rate": 7.6071428571428575e-06, "loss": 43.2832, "step": 3336 }, { "epoch": 79.45373134328358, "grad_norm": 17.107995986938477, "learning_rate": 7.6041666666666666e-06, "loss": 42.6499, "step": 3337 }, { "epoch": 79.4776119402985, "grad_norm": 20.98540496826172, "learning_rate": 7.6011904761904765e-06, "loss": 42.6728, "step": 3338 }, { "epoch": 79.50149253731344, "grad_norm": 18.264223098754883, "learning_rate": 7.598214285714286e-06, "loss": 42.1924, "step": 3339 }, { "epoch": 79.52537313432836, "grad_norm": 22.478178024291992, "learning_rate": 7.595238095238095e-06, "loss": 43.1835, "step": 3340 }, { "epoch": 79.54925373134328, "grad_norm": 21.464313507080078, "learning_rate": 7.592261904761905e-06, "loss": 42.8992, "step": 3341 }, { "epoch": 79.57313432835821, "grad_norm": 23.627376556396484, "learning_rate": 7.589285714285714e-06, "loss": 43.3444, "step": 3342 }, { "epoch": 79.59701492537313, "grad_norm": 20.699804306030273, "learning_rate": 7.586309523809524e-06, "loss": 43.5294, "step": 3343 }, { "epoch": 79.62089552238805, "grad_norm": 27.1911678314209, "learning_rate": 7.583333333333333e-06, "loss": 42.3842, "step": 3344 }, { "epoch": 79.64477611940299, "grad_norm": 22.591445922851562, "learning_rate": 7.580357142857143e-06, "loss": 43.2132, "step": 3345 }, { "epoch": 79.66865671641791, "grad_norm": 23.79202651977539, "learning_rate": 7.577380952380953e-06, "loss": 42.7603, "step": 3346 }, { "epoch": 79.69253731343284, "grad_norm": 21.520214080810547, "learning_rate": 7.574404761904762e-06, "loss": 42.9868, "step": 3347 }, { "epoch": 79.71641791044776, "grad_norm": 21.92240333557129, "learning_rate": 7.571428571428572e-06, "loss": 42.5148, "step": 3348 }, { "epoch": 79.74029850746268, "grad_norm": 21.808698654174805, "learning_rate": 7.568452380952381e-06, "loss": 42.2734, "step": 3349 }, { "epoch": 79.7641791044776, "grad_norm": 21.703947067260742, "learning_rate": 7.565476190476191e-06, "loss": 43.9589, "step": 3350 }, { "epoch": 79.78805970149254, "grad_norm": 21.56643295288086, "learning_rate": 7.5625e-06, "loss": 42.249, "step": 3351 }, { "epoch": 79.81194029850747, "grad_norm": 20.325498580932617, "learning_rate": 7.55952380952381e-06, "loss": 42.5246, "step": 3352 }, { "epoch": 79.83582089552239, "grad_norm": 20.19651985168457, "learning_rate": 7.55654761904762e-06, "loss": 43.1353, "step": 3353 }, { "epoch": 79.85970149253731, "grad_norm": 15.062832832336426, "learning_rate": 7.553571428571429e-06, "loss": 42.7335, "step": 3354 }, { "epoch": 79.88358208955223, "grad_norm": 21.990650177001953, "learning_rate": 7.550595238095239e-06, "loss": 44.103, "step": 3355 }, { "epoch": 79.90746268656716, "grad_norm": 17.816457748413086, "learning_rate": 7.547619047619048e-06, "loss": 43.592, "step": 3356 }, { "epoch": 79.9313432835821, "grad_norm": 21.62665557861328, "learning_rate": 7.544642857142858e-06, "loss": 44.0372, "step": 3357 }, { "epoch": 79.95522388059702, "grad_norm": 20.444469451904297, "learning_rate": 7.541666666666667e-06, "loss": 42.7547, "step": 3358 }, { "epoch": 79.97910447761194, "grad_norm": 15.230064392089844, "learning_rate": 7.538690476190477e-06, "loss": 42.4287, "step": 3359 }, { "epoch": 80.0, "grad_norm": 18.977619171142578, "learning_rate": 7.5357142857142865e-06, "loss": 36.8674, "step": 3360 }, { "epoch": 80.0, "step": 3360, "total_flos": 1.6516474192825325e+17, "train_loss": 10.921977708453223, "train_runtime": 25778.6818, "train_samples_per_second": 16.609, "train_steps_per_second": 0.13 }, { "epoch": 80.02388059701492, "grad_norm": 20.951553344726562, "learning_rate": 1e-05, "loss": 42.8953, "step": 3361 }, { "epoch": 80.04776119402985, "grad_norm": Infinity, "learning_rate": 9.997354497354498e-06, "loss": 49.2702, "step": 3362 }, { "epoch": 80.07164179104478, "grad_norm": 272.02093505859375, "learning_rate": 9.997354497354498e-06, "loss": 48.7639, "step": 3363 }, { "epoch": 80.0955223880597, "grad_norm": 136.40426635742188, "learning_rate": 9.994708994708996e-06, "loss": 48.2845, "step": 3364 }, { "epoch": 80.11940298507463, "grad_norm": 69.2103500366211, "learning_rate": 9.992063492063493e-06, "loss": 45.905, "step": 3365 }, { "epoch": 80.14328358208955, "grad_norm": 42.27269744873047, "learning_rate": 9.989417989417989e-06, "loss": 44.495, "step": 3366 }, { "epoch": 80.16716417910447, "grad_norm": 78.32905578613281, "learning_rate": 9.986772486772488e-06, "loss": 43.787, "step": 3367 }, { "epoch": 80.1910447761194, "grad_norm": 53.60576248168945, "learning_rate": 9.984126984126986e-06, "loss": 44.9412, "step": 3368 }, { "epoch": 80.21492537313434, "grad_norm": 43.58672332763672, "learning_rate": 9.981481481481482e-06, "loss": 43.5559, "step": 3369 }, { "epoch": 80.23880597014926, "grad_norm": 52.74037170410156, "learning_rate": 9.97883597883598e-06, "loss": 43.7715, "step": 3370 }, { "epoch": 80.26268656716418, "grad_norm": 36.5859260559082, "learning_rate": 9.976190476190477e-06, "loss": 44.8368, "step": 3371 }, { "epoch": 80.2865671641791, "grad_norm": 41.1060676574707, "learning_rate": 9.973544973544974e-06, "loss": 44.2442, "step": 3372 }, { "epoch": 80.31044776119403, "grad_norm": 29.22023582458496, "learning_rate": 9.970899470899472e-06, "loss": 44.9361, "step": 3373 }, { "epoch": 80.33432835820895, "grad_norm": 23.876710891723633, "learning_rate": 9.968253968253969e-06, "loss": 43.0819, "step": 3374 }, { "epoch": 80.35820895522389, "grad_norm": 29.575992584228516, "learning_rate": 9.965608465608467e-06, "loss": 43.4547, "step": 3375 }, { "epoch": 80.38208955223881, "grad_norm": 30.555126190185547, "learning_rate": 9.962962962962964e-06, "loss": 42.7816, "step": 3376 }, { "epoch": 80.40597014925373, "grad_norm": 22.153589248657227, "learning_rate": 9.960317460317462e-06, "loss": 43.225, "step": 3377 }, { "epoch": 80.42985074626866, "grad_norm": 22.4864501953125, "learning_rate": 9.957671957671959e-06, "loss": 44.3476, "step": 3378 }, { "epoch": 80.45373134328358, "grad_norm": 28.664342880249023, "learning_rate": 9.955026455026457e-06, "loss": 43.8263, "step": 3379 }, { "epoch": 80.4776119402985, "grad_norm": 20.183809280395508, "learning_rate": 9.952380952380954e-06, "loss": 43.0054, "step": 3380 }, { "epoch": 80.50149253731344, "grad_norm": 20.122495651245117, "learning_rate": 9.94973544973545e-06, "loss": 42.8467, "step": 3381 }, { "epoch": 80.52537313432836, "grad_norm": 18.21672821044922, "learning_rate": 9.947089947089947e-06, "loss": 43.1002, "step": 3382 }, { "epoch": 80.54925373134328, "grad_norm": 19.279260635375977, "learning_rate": 9.944444444444445e-06, "loss": 43.057, "step": 3383 }, { "epoch": 80.57313432835821, "grad_norm": 16.66730308532715, "learning_rate": 9.941798941798942e-06, "loss": 41.9396, "step": 3384 }, { "epoch": 80.59701492537313, "grad_norm": 23.94289779663086, "learning_rate": 9.93915343915344e-06, "loss": 41.9997, "step": 3385 }, { "epoch": 80.62089552238805, "grad_norm": 19.543209075927734, "learning_rate": 9.936507936507937e-06, "loss": 43.4446, "step": 3386 }, { "epoch": 80.64477611940299, "grad_norm": 16.7114315032959, "learning_rate": 9.933862433862435e-06, "loss": 42.8548, "step": 3387 }, { "epoch": 80.66865671641791, "grad_norm": 14.687740325927734, "learning_rate": 9.931216931216932e-06, "loss": 43.4851, "step": 3388 }, { "epoch": 80.69253731343284, "grad_norm": 20.930234909057617, "learning_rate": 9.92857142857143e-06, "loss": 43.216, "step": 3389 }, { "epoch": 80.71641791044776, "grad_norm": 18.500185012817383, "learning_rate": 9.925925925925927e-06, "loss": 43.32, "step": 3390 }, { "epoch": 80.74029850746268, "grad_norm": 17.255064010620117, "learning_rate": 9.923280423280423e-06, "loss": 41.8527, "step": 3391 }, { "epoch": 80.7641791044776, "grad_norm": 23.286033630371094, "learning_rate": 9.920634920634922e-06, "loss": 42.4732, "step": 3392 }, { "epoch": 80.78805970149254, "grad_norm": 21.66954803466797, "learning_rate": 9.917989417989418e-06, "loss": 43.0689, "step": 3393 }, { "epoch": 80.81194029850747, "grad_norm": 15.510072708129883, "learning_rate": 9.915343915343916e-06, "loss": 42.6028, "step": 3394 }, { "epoch": 80.83582089552239, "grad_norm": 17.338539123535156, "learning_rate": 9.912698412698413e-06, "loss": 43.066, "step": 3395 }, { "epoch": 80.85970149253731, "grad_norm": 28.546316146850586, "learning_rate": 9.91005291005291e-06, "loss": 42.7705, "step": 3396 }, { "epoch": 80.88358208955223, "grad_norm": 21.883974075317383, "learning_rate": 9.907407407407408e-06, "loss": 42.3245, "step": 3397 }, { "epoch": 80.90746268656716, "grad_norm": 23.212677001953125, "learning_rate": 9.904761904761906e-06, "loss": 43.1431, "step": 3398 }, { "epoch": 80.9313432835821, "grad_norm": 19.58159828186035, "learning_rate": 9.902116402116403e-06, "loss": 43.5287, "step": 3399 }, { "epoch": 80.95522388059702, "grad_norm": 26.139862060546875, "learning_rate": 9.8994708994709e-06, "loss": 42.9908, "step": 3400 }, { "epoch": 80.97910447761194, "grad_norm": 16.672977447509766, "learning_rate": 9.896825396825398e-06, "loss": 42.1315, "step": 3401 }, { "epoch": 81.0, "grad_norm": 24.852455139160156, "learning_rate": 9.894179894179896e-06, "loss": 36.8278, "step": 3402 }, { "epoch": 81.02388059701492, "grad_norm": 22.26006317138672, "learning_rate": 9.891534391534391e-06, "loss": 42.4729, "step": 3403 }, { "epoch": 81.04776119402985, "grad_norm": 16.017719268798828, "learning_rate": 9.88888888888889e-06, "loss": 42.9225, "step": 3404 }, { "epoch": 81.07164179104478, "grad_norm": 28.550519943237305, "learning_rate": 9.886243386243386e-06, "loss": 42.6745, "step": 3405 }, { "epoch": 81.0955223880597, "grad_norm": 23.507572174072266, "learning_rate": 9.883597883597884e-06, "loss": 42.0028, "step": 3406 }, { "epoch": 81.11940298507463, "grad_norm": 21.06671905517578, "learning_rate": 9.880952380952381e-06, "loss": 43.0596, "step": 3407 }, { "epoch": 81.14328358208955, "grad_norm": 30.52378273010254, "learning_rate": 9.878306878306879e-06, "loss": 42.6651, "step": 3408 }, { "epoch": 81.16716417910447, "grad_norm": 20.8646183013916, "learning_rate": 9.875661375661376e-06, "loss": 42.5492, "step": 3409 }, { "epoch": 81.1910447761194, "grad_norm": 24.76753044128418, "learning_rate": 9.873015873015874e-06, "loss": 44.1658, "step": 3410 }, { "epoch": 81.21492537313434, "grad_norm": 24.59670066833496, "learning_rate": 9.870370370370371e-06, "loss": 41.993, "step": 3411 }, { "epoch": 81.23880597014926, "grad_norm": 18.1619815826416, "learning_rate": 9.867724867724869e-06, "loss": 41.729, "step": 3412 }, { "epoch": 81.26268656716418, "grad_norm": 25.726171493530273, "learning_rate": 9.865079365079366e-06, "loss": 43.4774, "step": 3413 }, { "epoch": 81.2865671641791, "grad_norm": 19.582408905029297, "learning_rate": 9.862433862433864e-06, "loss": 44.2081, "step": 3414 }, { "epoch": 81.31044776119403, "grad_norm": 19.20425033569336, "learning_rate": 9.85978835978836e-06, "loss": 45.2273, "step": 3415 }, { "epoch": 81.33432835820895, "grad_norm": 24.18745994567871, "learning_rate": 9.857142857142859e-06, "loss": 43.2535, "step": 3416 }, { "epoch": 81.35820895522389, "grad_norm": 20.09618377685547, "learning_rate": 9.854497354497355e-06, "loss": 42.837, "step": 3417 }, { "epoch": 81.38208955223881, "grad_norm": 18.357542037963867, "learning_rate": 9.851851851851852e-06, "loss": 42.3722, "step": 3418 }, { "epoch": 81.40597014925373, "grad_norm": 21.53424644470215, "learning_rate": 9.849206349206351e-06, "loss": 42.6014, "step": 3419 }, { "epoch": 81.42985074626866, "grad_norm": 23.138153076171875, "learning_rate": 9.846560846560847e-06, "loss": 43.1802, "step": 3420 }, { "epoch": 81.45373134328358, "grad_norm": NaN, "learning_rate": 9.843915343915345e-06, "loss": 60.8525, "step": 3421 }, { "epoch": 81.4776119402985, "grad_norm": 16.697940826416016, "learning_rate": 9.843915343915345e-06, "loss": 42.6524, "step": 3422 }, { "epoch": 81.50149253731344, "grad_norm": 21.829591751098633, "learning_rate": 9.841269841269842e-06, "loss": 42.8111, "step": 3423 }, { "epoch": 81.52537313432836, "grad_norm": 24.891218185424805, "learning_rate": 9.83862433862434e-06, "loss": 43.6078, "step": 3424 }, { "epoch": 81.54925373134328, "grad_norm": 21.53104019165039, "learning_rate": 9.835978835978837e-06, "loss": 42.8522, "step": 3425 }, { "epoch": 81.57313432835821, "grad_norm": 24.85852813720703, "learning_rate": 9.833333333333333e-06, "loss": 42.5736, "step": 3426 }, { "epoch": 81.59701492537313, "grad_norm": 25.954561233520508, "learning_rate": 9.830687830687832e-06, "loss": 42.513, "step": 3427 }, { "epoch": 81.62089552238805, "grad_norm": 18.79954719543457, "learning_rate": 9.828042328042328e-06, "loss": 42.4569, "step": 3428 }, { "epoch": 81.64477611940299, "grad_norm": 21.777231216430664, "learning_rate": 9.825396825396825e-06, "loss": 41.9235, "step": 3429 }, { "epoch": 81.66865671641791, "grad_norm": 20.84613037109375, "learning_rate": 9.822751322751325e-06, "loss": 43.7221, "step": 3430 }, { "epoch": 81.69253731343284, "grad_norm": 25.095165252685547, "learning_rate": 9.82010582010582e-06, "loss": 43.7676, "step": 3431 }, { "epoch": 81.71641791044776, "grad_norm": 20.732393264770508, "learning_rate": 9.817460317460318e-06, "loss": 42.3845, "step": 3432 }, { "epoch": 81.74029850746268, "grad_norm": NaN, "learning_rate": 9.814814814814815e-06, "loss": 42.1237, "step": 3433 }, { "epoch": 81.7641791044776, "grad_norm": 33.96809768676758, "learning_rate": 9.814814814814815e-06, "loss": 43.6781, "step": 3434 }, { "epoch": 81.78805970149254, "grad_norm": 20.83742904663086, "learning_rate": 9.812169312169313e-06, "loss": 43.1676, "step": 3435 }, { "epoch": 81.81194029850747, "grad_norm": 37.817081451416016, "learning_rate": 9.80952380952381e-06, "loss": 42.5989, "step": 3436 }, { "epoch": 81.83582089552239, "grad_norm": 26.07498550415039, "learning_rate": 9.806878306878308e-06, "loss": 43.613, "step": 3437 }, { "epoch": 81.85970149253731, "grad_norm": 32.35169982910156, "learning_rate": 9.804232804232805e-06, "loss": 43.4166, "step": 3438 }, { "epoch": 81.88358208955223, "grad_norm": 23.49301528930664, "learning_rate": 9.801587301587301e-06, "loss": 40.9932, "step": 3439 }, { "epoch": 81.90746268656716, "grad_norm": 28.475976943969727, "learning_rate": 9.7989417989418e-06, "loss": 44.0779, "step": 3440 }, { "epoch": 81.9313432835821, "grad_norm": 20.77143669128418, "learning_rate": 9.796296296296298e-06, "loss": 43.0358, "step": 3441 }, { "epoch": 81.95522388059702, "grad_norm": 27.558744430541992, "learning_rate": 9.793650793650794e-06, "loss": 42.6501, "step": 3442 }, { "epoch": 81.97910447761194, "grad_norm": 17.57852554321289, "learning_rate": 9.791005291005293e-06, "loss": 43.0594, "step": 3443 }, { "epoch": 82.0, "grad_norm": 30.414134979248047, "learning_rate": 9.788359788359789e-06, "loss": 37.7772, "step": 3444 }, { "epoch": 82.02388059701492, "grad_norm": 29.184572219848633, "learning_rate": 9.785714285714286e-06, "loss": 43.0878, "step": 3445 }, { "epoch": 82.04776119402985, "grad_norm": 24.36541748046875, "learning_rate": 9.783068783068784e-06, "loss": 43.1851, "step": 3446 }, { "epoch": 82.07164179104478, "grad_norm": 24.232807159423828, "learning_rate": 9.780423280423281e-06, "loss": 43.4104, "step": 3447 }, { "epoch": 82.0955223880597, "grad_norm": 29.002002716064453, "learning_rate": 9.777777777777779e-06, "loss": 44.6274, "step": 3448 }, { "epoch": 82.11940298507463, "grad_norm": 22.997961044311523, "learning_rate": 9.775132275132276e-06, "loss": 43.2128, "step": 3449 }, { "epoch": 82.14328358208955, "grad_norm": 26.34942626953125, "learning_rate": 9.772486772486774e-06, "loss": 42.6116, "step": 3450 }, { "epoch": 82.16716417910447, "grad_norm": 19.555774688720703, "learning_rate": 9.769841269841271e-06, "loss": 42.9207, "step": 3451 }, { "epoch": 82.1910447761194, "grad_norm": 25.108083724975586, "learning_rate": 9.767195767195769e-06, "loss": 41.7188, "step": 3452 }, { "epoch": 82.21492537313434, "grad_norm": 20.387653350830078, "learning_rate": 9.764550264550266e-06, "loss": 42.8712, "step": 3453 }, { "epoch": 82.23880597014926, "grad_norm": 24.493921279907227, "learning_rate": 9.761904761904762e-06, "loss": 43.6475, "step": 3454 }, { "epoch": 82.26268656716418, "grad_norm": 23.366165161132812, "learning_rate": 9.759259259259261e-06, "loss": 42.5025, "step": 3455 }, { "epoch": 82.2865671641791, "grad_norm": 25.831466674804688, "learning_rate": 9.756613756613757e-06, "loss": 44.1183, "step": 3456 }, { "epoch": 82.31044776119403, "grad_norm": 20.5382137298584, "learning_rate": 9.753968253968254e-06, "loss": 42.0874, "step": 3457 }, { "epoch": 82.33432835820895, "grad_norm": 23.923063278198242, "learning_rate": 9.751322751322752e-06, "loss": 44.2198, "step": 3458 }, { "epoch": 82.35820895522389, "grad_norm": 21.77039909362793, "learning_rate": 9.74867724867725e-06, "loss": 42.8486, "step": 3459 }, { "epoch": 82.38208955223881, "grad_norm": 19.86173439025879, "learning_rate": 9.746031746031747e-06, "loss": 43.104, "step": 3460 }, { "epoch": 82.40597014925373, "grad_norm": 20.714754104614258, "learning_rate": 9.743386243386244e-06, "loss": 41.789, "step": 3461 }, { "epoch": 82.42985074626866, "grad_norm": 24.748607635498047, "learning_rate": 9.740740740740742e-06, "loss": 41.7835, "step": 3462 }, { "epoch": 82.45373134328358, "grad_norm": 19.247220993041992, "learning_rate": 9.73809523809524e-06, "loss": 42.3253, "step": 3463 }, { "epoch": 82.4776119402985, "grad_norm": 21.964488983154297, "learning_rate": 9.735449735449735e-06, "loss": 40.6579, "step": 3464 }, { "epoch": 82.50149253731344, "grad_norm": 19.75965118408203, "learning_rate": 9.732804232804234e-06, "loss": 42.2777, "step": 3465 }, { "epoch": 82.52537313432836, "grad_norm": 19.871715545654297, "learning_rate": 9.73015873015873e-06, "loss": 41.8654, "step": 3466 }, { "epoch": 82.54925373134328, "grad_norm": 17.353679656982422, "learning_rate": 9.727513227513228e-06, "loss": 43.1572, "step": 3467 }, { "epoch": 82.57313432835821, "grad_norm": 22.952226638793945, "learning_rate": 9.724867724867725e-06, "loss": 42.2348, "step": 3468 }, { "epoch": 82.59701492537313, "grad_norm": 19.62160873413086, "learning_rate": 9.722222222222223e-06, "loss": 43.7133, "step": 3469 }, { "epoch": 82.62089552238805, "grad_norm": NaN, "learning_rate": 9.71957671957672e-06, "loss": 44.3913, "step": 3470 }, { "epoch": 82.64477611940299, "grad_norm": 22.301387786865234, "learning_rate": 9.71957671957672e-06, "loss": 42.7776, "step": 3471 }, { "epoch": 82.66865671641791, "grad_norm": 23.42523956298828, "learning_rate": 9.716931216931218e-06, "loss": 43.9875, "step": 3472 }, { "epoch": 82.69253731343284, "grad_norm": 19.187870025634766, "learning_rate": 9.714285714285715e-06, "loss": 43.6333, "step": 3473 }, { "epoch": 82.71641791044776, "grad_norm": 17.408340454101562, "learning_rate": 9.711640211640213e-06, "loss": 42.3257, "step": 3474 }, { "epoch": 82.74029850746268, "grad_norm": 17.102418899536133, "learning_rate": 9.70899470899471e-06, "loss": 41.7486, "step": 3475 }, { "epoch": 82.7641791044776, "grad_norm": 17.715524673461914, "learning_rate": 9.706349206349208e-06, "loss": 43.9781, "step": 3476 }, { "epoch": 82.78805970149254, "grad_norm": 22.915067672729492, "learning_rate": 9.703703703703703e-06, "loss": 43.0049, "step": 3477 }, { "epoch": 82.81194029850747, "grad_norm": 18.104154586791992, "learning_rate": 9.701058201058203e-06, "loss": 43.0062, "step": 3478 }, { "epoch": 82.83582089552239, "grad_norm": 14.81946086883545, "learning_rate": 9.698412698412698e-06, "loss": 42.0968, "step": 3479 }, { "epoch": 82.85970149253731, "grad_norm": 19.58578872680664, "learning_rate": 9.695767195767196e-06, "loss": 43.6563, "step": 3480 }, { "epoch": 82.88358208955223, "grad_norm": 17.979524612426758, "learning_rate": 9.693121693121693e-06, "loss": 41.9954, "step": 3481 }, { "epoch": 82.90746268656716, "grad_norm": 17.92389488220215, "learning_rate": 9.690476190476191e-06, "loss": 42.0242, "step": 3482 }, { "epoch": 82.9313432835821, "grad_norm": 22.026195526123047, "learning_rate": 9.687830687830688e-06, "loss": 43.2985, "step": 3483 }, { "epoch": 82.95522388059702, "grad_norm": 15.080731391906738, "learning_rate": 9.685185185185186e-06, "loss": 42.8814, "step": 3484 }, { "epoch": 82.97910447761194, "grad_norm": 23.170284271240234, "learning_rate": 9.682539682539683e-06, "loss": 42.4875, "step": 3485 }, { "epoch": 83.0, "grad_norm": 15.19926929473877, "learning_rate": 9.679894179894181e-06, "loss": 38.3047, "step": 3486 }, { "epoch": 83.02388059701492, "grad_norm": 20.842618942260742, "learning_rate": 9.677248677248678e-06, "loss": 41.9214, "step": 3487 }, { "epoch": 83.04776119402985, "grad_norm": 19.11284637451172, "learning_rate": 9.674603174603176e-06, "loss": 43.2375, "step": 3488 }, { "epoch": 83.07164179104478, "grad_norm": 19.39193344116211, "learning_rate": 9.671957671957672e-06, "loss": 43.5418, "step": 3489 }, { "epoch": 83.0955223880597, "grad_norm": 19.154869079589844, "learning_rate": 9.669312169312171e-06, "loss": 42.4917, "step": 3490 }, { "epoch": 83.11940298507463, "grad_norm": 27.682418823242188, "learning_rate": 9.666666666666667e-06, "loss": 43.22, "step": 3491 }, { "epoch": 83.14328358208955, "grad_norm": 19.741304397583008, "learning_rate": 9.664021164021164e-06, "loss": 42.6503, "step": 3492 }, { "epoch": 83.16716417910447, "grad_norm": 23.25188446044922, "learning_rate": 9.661375661375663e-06, "loss": 42.7449, "step": 3493 }, { "epoch": 83.1910447761194, "grad_norm": 25.500925064086914, "learning_rate": 9.65873015873016e-06, "loss": 43.8239, "step": 3494 }, { "epoch": 83.21492537313434, "grad_norm": 22.653488159179688, "learning_rate": 9.656084656084657e-06, "loss": 42.4962, "step": 3495 }, { "epoch": 83.23880597014926, "grad_norm": 21.660871505737305, "learning_rate": 9.653439153439154e-06, "loss": 44.1403, "step": 3496 }, { "epoch": 83.26268656716418, "grad_norm": 24.922666549682617, "learning_rate": 9.650793650793652e-06, "loss": 42.4295, "step": 3497 }, { "epoch": 83.2865671641791, "grad_norm": 20.24859619140625, "learning_rate": 9.64814814814815e-06, "loss": 41.7125, "step": 3498 }, { "epoch": 83.31044776119403, "grad_norm": 16.770278930664062, "learning_rate": 9.645502645502647e-06, "loss": 43.0386, "step": 3499 }, { "epoch": 83.33432835820895, "grad_norm": 20.553585052490234, "learning_rate": 9.642857142857144e-06, "loss": 43.2005, "step": 3500 }, { "epoch": 83.35820895522389, "grad_norm": 22.309749603271484, "learning_rate": 9.64021164021164e-06, "loss": 43.9736, "step": 3501 }, { "epoch": 83.38208955223881, "grad_norm": 16.99924659729004, "learning_rate": 9.63756613756614e-06, "loss": 42.9804, "step": 3502 }, { "epoch": 83.40597014925373, "grad_norm": 17.541120529174805, "learning_rate": 9.634920634920637e-06, "loss": 41.9332, "step": 3503 }, { "epoch": 83.42985074626866, "grad_norm": 19.222923278808594, "learning_rate": 9.632275132275132e-06, "loss": 43.163, "step": 3504 }, { "epoch": 83.45373134328358, "grad_norm": 23.178749084472656, "learning_rate": 9.62962962962963e-06, "loss": 41.4791, "step": 3505 }, { "epoch": 83.4776119402985, "grad_norm": 24.103410720825195, "learning_rate": 9.626984126984127e-06, "loss": 43.5107, "step": 3506 }, { "epoch": 83.50149253731344, "grad_norm": 16.439075469970703, "learning_rate": 9.624338624338625e-06, "loss": 43.6286, "step": 3507 }, { "epoch": 83.52537313432836, "grad_norm": 29.148473739624023, "learning_rate": 9.621693121693122e-06, "loss": 44.0076, "step": 3508 }, { "epoch": 83.54925373134328, "grad_norm": 23.33673667907715, "learning_rate": 9.61904761904762e-06, "loss": 42.0299, "step": 3509 }, { "epoch": 83.57313432835821, "grad_norm": 20.69951820373535, "learning_rate": 9.616402116402117e-06, "loss": 41.9305, "step": 3510 }, { "epoch": 83.59701492537313, "grad_norm": 28.55817413330078, "learning_rate": 9.613756613756613e-06, "loss": 42.112, "step": 3511 }, { "epoch": 83.62089552238805, "grad_norm": 20.63089942932129, "learning_rate": 9.611111111111112e-06, "loss": 42.5737, "step": 3512 }, { "epoch": 83.64477611940299, "grad_norm": 18.186328887939453, "learning_rate": 9.60846560846561e-06, "loss": 42.6654, "step": 3513 }, { "epoch": 83.66865671641791, "grad_norm": 30.312583923339844, "learning_rate": 9.605820105820106e-06, "loss": 41.6198, "step": 3514 }, { "epoch": 83.69253731343284, "grad_norm": 22.397600173950195, "learning_rate": 9.603174603174605e-06, "loss": 43.7027, "step": 3515 }, { "epoch": 83.71641791044776, "grad_norm": 22.637603759765625, "learning_rate": 9.6005291005291e-06, "loss": 43.3998, "step": 3516 }, { "epoch": 83.74029850746268, "grad_norm": 24.366125106811523, "learning_rate": 9.597883597883598e-06, "loss": 42.6906, "step": 3517 }, { "epoch": 83.7641791044776, "grad_norm": 21.425613403320312, "learning_rate": 9.595238095238096e-06, "loss": 42.7173, "step": 3518 }, { "epoch": 83.78805970149254, "grad_norm": 18.075485229492188, "learning_rate": 9.592592592592593e-06, "loss": 42.9601, "step": 3519 }, { "epoch": 83.81194029850747, "grad_norm": 19.24690818786621, "learning_rate": 9.58994708994709e-06, "loss": 41.9579, "step": 3520 }, { "epoch": 83.83582089552239, "grad_norm": 21.10234832763672, "learning_rate": 9.587301587301588e-06, "loss": 42.6078, "step": 3521 }, { "epoch": 83.85970149253731, "grad_norm": 21.595741271972656, "learning_rate": 9.584656084656086e-06, "loss": 43.3926, "step": 3522 }, { "epoch": 83.88358208955223, "grad_norm": 14.618033409118652, "learning_rate": 9.582010582010583e-06, "loss": 42.7237, "step": 3523 }, { "epoch": 83.90746268656716, "grad_norm": 18.805774688720703, "learning_rate": 9.57936507936508e-06, "loss": 43.6884, "step": 3524 }, { "epoch": 83.9313432835821, "grad_norm": 17.666229248046875, "learning_rate": 9.576719576719578e-06, "loss": 43.3069, "step": 3525 }, { "epoch": 83.95522388059702, "grad_norm": NaN, "learning_rate": 9.574074074074074e-06, "loss": 47.9701, "step": 3526 }, { "epoch": 83.97910447761194, "grad_norm": 18.41876792907715, "learning_rate": 9.574074074074074e-06, "loss": 42.0814, "step": 3527 }, { "epoch": 84.0, "grad_norm": 14.344976425170898, "learning_rate": 9.571428571428573e-06, "loss": 36.702, "step": 3528 }, { "epoch": 84.02388059701492, "grad_norm": 19.47123146057129, "learning_rate": 9.568783068783069e-06, "loss": 43.0682, "step": 3529 }, { "epoch": 84.04776119402985, "grad_norm": 18.708087921142578, "learning_rate": 9.566137566137567e-06, "loss": 43.4093, "step": 3530 }, { "epoch": 84.07164179104478, "grad_norm": 21.98741340637207, "learning_rate": 9.563492063492064e-06, "loss": 42.619, "step": 3531 }, { "epoch": 84.0955223880597, "grad_norm": 21.4478816986084, "learning_rate": 9.560846560846561e-06, "loss": 43.3145, "step": 3532 }, { "epoch": 84.11940298507463, "grad_norm": 21.093963623046875, "learning_rate": 9.558201058201059e-06, "loss": 43.1162, "step": 3533 }, { "epoch": 84.14328358208955, "grad_norm": 18.37552833557129, "learning_rate": 9.555555555555556e-06, "loss": 42.4734, "step": 3534 }, { "epoch": 84.16716417910447, "grad_norm": 13.956351280212402, "learning_rate": 9.552910052910054e-06, "loss": 42.4351, "step": 3535 }, { "epoch": 84.1910447761194, "grad_norm": 20.104270935058594, "learning_rate": 9.550264550264551e-06, "loss": 43.2507, "step": 3536 }, { "epoch": 84.21492537313434, "grad_norm": 20.69384002685547, "learning_rate": 9.547619047619049e-06, "loss": 42.8764, "step": 3537 }, { "epoch": 84.23880597014926, "grad_norm": 26.53329086303711, "learning_rate": 9.544973544973546e-06, "loss": 42.4139, "step": 3538 }, { "epoch": 84.26268656716418, "grad_norm": 11.859530448913574, "learning_rate": 9.542328042328042e-06, "loss": 42.4525, "step": 3539 }, { "epoch": 84.2865671641791, "grad_norm": 22.784093856811523, "learning_rate": 9.539682539682541e-06, "loss": 42.6754, "step": 3540 }, { "epoch": 84.31044776119403, "grad_norm": 22.02342987060547, "learning_rate": 9.537037037037037e-06, "loss": 42.8119, "step": 3541 }, { "epoch": 84.33432835820895, "grad_norm": 16.383922576904297, "learning_rate": 9.534391534391535e-06, "loss": 41.9982, "step": 3542 }, { "epoch": 84.35820895522389, "grad_norm": 18.745128631591797, "learning_rate": 9.531746031746032e-06, "loss": 43.0496, "step": 3543 }, { "epoch": 84.38208955223881, "grad_norm": 33.664764404296875, "learning_rate": 9.52910052910053e-06, "loss": 42.5116, "step": 3544 }, { "epoch": 84.40597014925373, "grad_norm": 18.74268341064453, "learning_rate": 9.526455026455027e-06, "loss": 43.642, "step": 3545 }, { "epoch": 84.42985074626866, "grad_norm": 30.136598587036133, "learning_rate": 9.523809523809525e-06, "loss": 42.8695, "step": 3546 }, { "epoch": 84.45373134328358, "grad_norm": 22.268802642822266, "learning_rate": 9.521164021164022e-06, "loss": 42.6697, "step": 3547 }, { "epoch": 84.4776119402985, "grad_norm": 22.149734497070312, "learning_rate": 9.51851851851852e-06, "loss": 43.0171, "step": 3548 }, { "epoch": 84.50149253731344, "grad_norm": 33.4512825012207, "learning_rate": 9.515873015873016e-06, "loss": 43.2588, "step": 3549 }, { "epoch": 84.52537313432836, "grad_norm": 22.5905704498291, "learning_rate": 9.513227513227515e-06, "loss": 43.2581, "step": 3550 }, { "epoch": 84.54925373134328, "grad_norm": 38.85606384277344, "learning_rate": 9.51058201058201e-06, "loss": 42.2418, "step": 3551 }, { "epoch": 84.57313432835821, "grad_norm": 27.77952003479004, "learning_rate": 9.507936507936508e-06, "loss": 43.4077, "step": 3552 }, { "epoch": 84.59701492537313, "grad_norm": 46.536651611328125, "learning_rate": 9.505291005291006e-06, "loss": 42.1365, "step": 3553 }, { "epoch": 84.62089552238805, "grad_norm": 32.448482513427734, "learning_rate": 9.502645502645503e-06, "loss": 43.362, "step": 3554 }, { "epoch": 84.64477611940299, "grad_norm": 43.40568161010742, "learning_rate": 9.5e-06, "loss": 42.4134, "step": 3555 }, { "epoch": 84.66865671641791, "grad_norm": 44.625125885009766, "learning_rate": 9.497354497354498e-06, "loss": 42.3841, "step": 3556 }, { "epoch": 84.69253731343284, "grad_norm": 30.825876235961914, "learning_rate": 9.494708994708996e-06, "loss": 43.0973, "step": 3557 }, { "epoch": 84.71641791044776, "grad_norm": 32.886775970458984, "learning_rate": 9.492063492063493e-06, "loss": 42.6478, "step": 3558 }, { "epoch": 84.74029850746268, "grad_norm": 35.800621032714844, "learning_rate": 9.48941798941799e-06, "loss": 42.1319, "step": 3559 }, { "epoch": 84.7641791044776, "grad_norm": 27.23737907409668, "learning_rate": 9.486772486772488e-06, "loss": 42.0883, "step": 3560 }, { "epoch": 84.78805970149254, "grad_norm": 40.162166595458984, "learning_rate": 9.484126984126984e-06, "loss": 42.5786, "step": 3561 }, { "epoch": 84.81194029850747, "grad_norm": 32.1665153503418, "learning_rate": 9.481481481481483e-06, "loss": 41.7711, "step": 3562 }, { "epoch": 84.83582089552239, "grad_norm": 34.32803726196289, "learning_rate": 9.478835978835979e-06, "loss": 43.3193, "step": 3563 }, { "epoch": 84.85970149253731, "grad_norm": 34.155452728271484, "learning_rate": 9.476190476190476e-06, "loss": 43.2305, "step": 3564 }, { "epoch": 84.88358208955223, "grad_norm": 31.642534255981445, "learning_rate": 9.473544973544975e-06, "loss": 44.1911, "step": 3565 }, { "epoch": 84.90746268656716, "grad_norm": 33.413291931152344, "learning_rate": 9.470899470899471e-06, "loss": 41.0447, "step": 3566 }, { "epoch": 84.9313432835821, "grad_norm": 35.05025100708008, "learning_rate": 9.468253968253969e-06, "loss": 43.3446, "step": 3567 }, { "epoch": 84.95522388059702, "grad_norm": 28.946184158325195, "learning_rate": 9.465608465608466e-06, "loss": 42.4865, "step": 3568 }, { "epoch": 84.97910447761194, "grad_norm": 38.28304672241211, "learning_rate": 9.462962962962964e-06, "loss": 42.6287, "step": 3569 }, { "epoch": 85.0, "grad_norm": 27.42157745361328, "learning_rate": 9.460317460317461e-06, "loss": 37.3853, "step": 3570 }, { "epoch": 85.02388059701492, "grad_norm": 40.84228515625, "learning_rate": 9.457671957671959e-06, "loss": 43.8201, "step": 3571 }, { "epoch": 85.04776119402985, "grad_norm": 36.39906692504883, "learning_rate": 9.455026455026456e-06, "loss": 41.5254, "step": 3572 }, { "epoch": 85.07164179104478, "grad_norm": 29.795923233032227, "learning_rate": 9.452380952380952e-06, "loss": 42.4395, "step": 3573 }, { "epoch": 85.0955223880597, "grad_norm": 25.486753463745117, "learning_rate": 9.449735449735451e-06, "loss": 42.8189, "step": 3574 }, { "epoch": 85.11940298507463, "grad_norm": 37.790260314941406, "learning_rate": 9.447089947089949e-06, "loss": 42.8718, "step": 3575 }, { "epoch": 85.14328358208955, "grad_norm": 29.528602600097656, "learning_rate": 9.444444444444445e-06, "loss": 43.3714, "step": 3576 }, { "epoch": 85.16716417910447, "grad_norm": 37.71443176269531, "learning_rate": 9.441798941798944e-06, "loss": 42.4381, "step": 3577 }, { "epoch": 85.1910447761194, "grad_norm": 36.625, "learning_rate": 9.43915343915344e-06, "loss": 41.7229, "step": 3578 }, { "epoch": 85.21492537313434, "grad_norm": 30.335342407226562, "learning_rate": 9.436507936507937e-06, "loss": 41.8887, "step": 3579 }, { "epoch": 85.23880597014926, "grad_norm": 24.375329971313477, "learning_rate": 9.433862433862435e-06, "loss": 42.6894, "step": 3580 }, { "epoch": 85.26268656716418, "grad_norm": 34.27681350708008, "learning_rate": 9.431216931216932e-06, "loss": 42.4825, "step": 3581 }, { "epoch": 85.2865671641791, "grad_norm": 27.515474319458008, "learning_rate": 9.42857142857143e-06, "loss": 41.3011, "step": 3582 }, { "epoch": 85.31044776119403, "grad_norm": 39.355350494384766, "learning_rate": 9.425925925925925e-06, "loss": 42.1456, "step": 3583 }, { "epoch": 85.33432835820895, "grad_norm": 34.957523345947266, "learning_rate": 9.423280423280425e-06, "loss": 42.9466, "step": 3584 }, { "epoch": 85.35820895522389, "grad_norm": 30.264474868774414, "learning_rate": 9.420634920634922e-06, "loss": 42.5819, "step": 3585 }, { "epoch": 85.38208955223881, "grad_norm": 27.88845443725586, "learning_rate": 9.417989417989418e-06, "loss": 41.4037, "step": 3586 }, { "epoch": 85.40597014925373, "grad_norm": 30.240957260131836, "learning_rate": 9.415343915343917e-06, "loss": 44.7681, "step": 3587 }, { "epoch": 85.42985074626866, "grad_norm": 23.867399215698242, "learning_rate": 9.412698412698413e-06, "loss": 41.2699, "step": 3588 }, { "epoch": 85.45373134328358, "grad_norm": 39.2992057800293, "learning_rate": 9.41005291005291e-06, "loss": 42.2639, "step": 3589 }, { "epoch": 85.4776119402985, "grad_norm": 32.746673583984375, "learning_rate": 9.407407407407408e-06, "loss": 43.3612, "step": 3590 }, { "epoch": 85.50149253731344, "grad_norm": 33.791748046875, "learning_rate": 9.404761904761905e-06, "loss": 43.1554, "step": 3591 }, { "epoch": 85.52537313432836, "grad_norm": 35.11564254760742, "learning_rate": 9.402116402116403e-06, "loss": 43.4265, "step": 3592 }, { "epoch": 85.54925373134328, "grad_norm": 27.411352157592773, "learning_rate": 9.3994708994709e-06, "loss": 42.7959, "step": 3593 }, { "epoch": 85.57313432835821, "grad_norm": 27.369596481323242, "learning_rate": 9.396825396825398e-06, "loss": 44.1557, "step": 3594 }, { "epoch": 85.59701492537313, "grad_norm": 30.399707794189453, "learning_rate": 9.394179894179895e-06, "loss": 42.5034, "step": 3595 }, { "epoch": 85.62089552238805, "grad_norm": 24.180538177490234, "learning_rate": 9.391534391534393e-06, "loss": 42.256, "step": 3596 }, { "epoch": 85.64477611940299, "grad_norm": 35.2861328125, "learning_rate": 9.38888888888889e-06, "loss": 43.6244, "step": 3597 }, { "epoch": 85.66865671641791, "grad_norm": 28.855852127075195, "learning_rate": 9.386243386243386e-06, "loss": 43.046, "step": 3598 }, { "epoch": 85.69253731343284, "grad_norm": 33.648170471191406, "learning_rate": 9.383597883597885e-06, "loss": 42.0113, "step": 3599 }, { "epoch": 85.71641791044776, "grad_norm": 30.42345428466797, "learning_rate": 9.380952380952381e-06, "loss": 42.4, "step": 3600 }, { "epoch": 85.74029850746268, "grad_norm": 34.80357360839844, "learning_rate": 9.378306878306879e-06, "loss": 41.5381, "step": 3601 }, { "epoch": 85.7641791044776, "grad_norm": 29.07464027404785, "learning_rate": 9.375661375661376e-06, "loss": 43.5597, "step": 3602 }, { "epoch": 85.78805970149254, "grad_norm": 35.02674865722656, "learning_rate": 9.373015873015874e-06, "loss": 42.0479, "step": 3603 }, { "epoch": 85.81194029850747, "grad_norm": 29.9696044921875, "learning_rate": 9.370370370370371e-06, "loss": 42.6829, "step": 3604 }, { "epoch": 85.83582089552239, "grad_norm": 31.754671096801758, "learning_rate": 9.367724867724869e-06, "loss": 42.824, "step": 3605 }, { "epoch": 85.85970149253731, "grad_norm": 30.765913009643555, "learning_rate": 9.365079365079366e-06, "loss": 42.8912, "step": 3606 }, { "epoch": 85.88358208955223, "grad_norm": 33.485015869140625, "learning_rate": 9.362433862433864e-06, "loss": 42.7802, "step": 3607 }, { "epoch": 85.90746268656716, "grad_norm": 27.535614013671875, "learning_rate": 9.359788359788361e-06, "loss": 43.0182, "step": 3608 }, { "epoch": 85.9313432835821, "grad_norm": 28.8901309967041, "learning_rate": 9.357142857142859e-06, "loss": 43.1223, "step": 3609 }, { "epoch": 85.95522388059702, "grad_norm": 27.991809844970703, "learning_rate": 9.354497354497354e-06, "loss": 43.4701, "step": 3610 }, { "epoch": 85.97910447761194, "grad_norm": 33.3857536315918, "learning_rate": 9.351851851851854e-06, "loss": 43.115, "step": 3611 }, { "epoch": 86.0, "grad_norm": 25.59542465209961, "learning_rate": 9.34920634920635e-06, "loss": 37.2275, "step": 3612 }, { "epoch": 86.02388059701492, "grad_norm": 32.959251403808594, "learning_rate": 9.346560846560847e-06, "loss": 41.951, "step": 3613 }, { "epoch": 86.04776119402985, "grad_norm": 26.431245803833008, "learning_rate": 9.343915343915344e-06, "loss": 40.553, "step": 3614 }, { "epoch": 86.07164179104478, "grad_norm": 31.518957138061523, "learning_rate": 9.341269841269842e-06, "loss": 42.7644, "step": 3615 }, { "epoch": 86.0955223880597, "grad_norm": 30.063220977783203, "learning_rate": 9.33862433862434e-06, "loss": 41.5891, "step": 3616 }, { "epoch": 86.11940298507463, "grad_norm": 32.649227142333984, "learning_rate": 9.335978835978837e-06, "loss": 43.2927, "step": 3617 }, { "epoch": 86.14328358208955, "grad_norm": 28.61098861694336, "learning_rate": 9.333333333333334e-06, "loss": 42.4467, "step": 3618 }, { "epoch": 86.16716417910447, "grad_norm": 30.715784072875977, "learning_rate": 9.330687830687832e-06, "loss": 41.0933, "step": 3619 }, { "epoch": 86.1910447761194, "grad_norm": 27.99184799194336, "learning_rate": 9.32804232804233e-06, "loss": 42.218, "step": 3620 }, { "epoch": 86.21492537313434, "grad_norm": 32.13215255737305, "learning_rate": 9.325396825396827e-06, "loss": 42.4495, "step": 3621 }, { "epoch": 86.23880597014926, "grad_norm": 28.051559448242188, "learning_rate": 9.322751322751323e-06, "loss": 43.2451, "step": 3622 }, { "epoch": 86.26268656716418, "grad_norm": 33.464115142822266, "learning_rate": 9.32010582010582e-06, "loss": 43.6584, "step": 3623 }, { "epoch": 86.2865671641791, "grad_norm": 28.151817321777344, "learning_rate": 9.317460317460318e-06, "loss": 42.722, "step": 3624 }, { "epoch": 86.31044776119403, "grad_norm": NaN, "learning_rate": 9.314814814814815e-06, "loss": 37.1565, "step": 3625 }, { "epoch": 86.33432835820895, "grad_norm": 29.83131217956543, "learning_rate": 9.314814814814815e-06, "loss": 43.1632, "step": 3626 }, { "epoch": 86.35820895522389, "grad_norm": 24.83383560180664, "learning_rate": 9.312169312169313e-06, "loss": 43.5542, "step": 3627 }, { "epoch": 86.38208955223881, "grad_norm": 33.05693817138672, "learning_rate": 9.30952380952381e-06, "loss": 42.7432, "step": 3628 }, { "epoch": 86.40597014925373, "grad_norm": 24.293209075927734, "learning_rate": 9.306878306878308e-06, "loss": 42.9506, "step": 3629 }, { "epoch": 86.42985074626866, "grad_norm": 33.47346496582031, "learning_rate": 9.304232804232805e-06, "loss": 42.2723, "step": 3630 }, { "epoch": 86.45373134328358, "grad_norm": 28.47313117980957, "learning_rate": 9.301587301587303e-06, "loss": 43.7464, "step": 3631 }, { "epoch": 86.4776119402985, "grad_norm": 32.237510681152344, "learning_rate": 9.2989417989418e-06, "loss": 42.8704, "step": 3632 }, { "epoch": 86.50149253731344, "grad_norm": 25.842601776123047, "learning_rate": 9.296296296296296e-06, "loss": 41.6084, "step": 3633 }, { "epoch": 86.52537313432836, "grad_norm": 27.513307571411133, "learning_rate": 9.293650793650795e-06, "loss": 42.7922, "step": 3634 }, { "epoch": 86.54925373134328, "grad_norm": 25.676212310791016, "learning_rate": 9.291005291005291e-06, "loss": 42.0415, "step": 3635 }, { "epoch": 86.57313432835821, "grad_norm": 29.911081314086914, "learning_rate": 9.288359788359788e-06, "loss": 43.0526, "step": 3636 }, { "epoch": 86.59701492537313, "grad_norm": 21.788707733154297, "learning_rate": 9.285714285714288e-06, "loss": 42.2228, "step": 3637 }, { "epoch": 86.62089552238805, "grad_norm": 34.92530822753906, "learning_rate": 9.283068783068783e-06, "loss": 42.6756, "step": 3638 }, { "epoch": 86.64477611940299, "grad_norm": 31.41309928894043, "learning_rate": 9.280423280423281e-06, "loss": 43.3258, "step": 3639 }, { "epoch": 86.66865671641791, "grad_norm": 27.432342529296875, "learning_rate": 9.277777777777778e-06, "loss": 43.0612, "step": 3640 }, { "epoch": 86.69253731343284, "grad_norm": 25.92644691467285, "learning_rate": 9.275132275132276e-06, "loss": 41.7141, "step": 3641 }, { "epoch": 86.71641791044776, "grad_norm": 27.26793098449707, "learning_rate": 9.272486772486773e-06, "loss": 42.8127, "step": 3642 }, { "epoch": 86.74029850746268, "grad_norm": 22.45132827758789, "learning_rate": 9.26984126984127e-06, "loss": 43.4623, "step": 3643 }, { "epoch": 86.7641791044776, "grad_norm": 29.31770896911621, "learning_rate": 9.267195767195768e-06, "loss": 43.428, "step": 3644 }, { "epoch": 86.78805970149254, "grad_norm": 26.000110626220703, "learning_rate": 9.264550264550264e-06, "loss": 43.7773, "step": 3645 }, { "epoch": 86.81194029850747, "grad_norm": 30.51299476623535, "learning_rate": 9.261904761904763e-06, "loss": 43.2915, "step": 3646 }, { "epoch": 86.83582089552239, "grad_norm": 25.712812423706055, "learning_rate": 9.25925925925926e-06, "loss": 42.6203, "step": 3647 }, { "epoch": 86.85970149253731, "grad_norm": 32.85362243652344, "learning_rate": 9.256613756613757e-06, "loss": 42.1768, "step": 3648 }, { "epoch": 86.88358208955223, "grad_norm": 30.07919692993164, "learning_rate": 9.253968253968256e-06, "loss": 42.4139, "step": 3649 }, { "epoch": 86.90746268656716, "grad_norm": 31.38039779663086, "learning_rate": 9.251322751322752e-06, "loss": 42.23, "step": 3650 }, { "epoch": 86.9313432835821, "grad_norm": 26.601993560791016, "learning_rate": 9.248677248677249e-06, "loss": 42.2522, "step": 3651 }, { "epoch": 86.95522388059702, "grad_norm": 31.616823196411133, "learning_rate": 9.246031746031747e-06, "loss": 43.3183, "step": 3652 }, { "epoch": 86.97910447761194, "grad_norm": 25.606231689453125, "learning_rate": 9.243386243386244e-06, "loss": 42.7862, "step": 3653 }, { "epoch": 87.0, "grad_norm": 22.20980453491211, "learning_rate": 9.240740740740742e-06, "loss": 37.7077, "step": 3654 }, { "epoch": 87.02388059701492, "grad_norm": 24.519224166870117, "learning_rate": 9.238095238095239e-06, "loss": 42.4255, "step": 3655 }, { "epoch": 87.04776119402985, "grad_norm": 27.409582138061523, "learning_rate": 9.235449735449737e-06, "loss": 42.0198, "step": 3656 }, { "epoch": 87.07164179104478, "grad_norm": 20.307886123657227, "learning_rate": 9.232804232804234e-06, "loss": 41.6037, "step": 3657 }, { "epoch": 87.0955223880597, "grad_norm": 24.046375274658203, "learning_rate": 9.230158730158732e-06, "loss": 43.9297, "step": 3658 }, { "epoch": 87.11940298507463, "grad_norm": 23.58251953125, "learning_rate": 9.227513227513229e-06, "loss": 43.4211, "step": 3659 }, { "epoch": 87.14328358208955, "grad_norm": 20.67659568786621, "learning_rate": 9.224867724867725e-06, "loss": 42.7205, "step": 3660 }, { "epoch": 87.16716417910447, "grad_norm": 18.82547950744629, "learning_rate": 9.222222222222224e-06, "loss": 42.8921, "step": 3661 }, { "epoch": 87.1910447761194, "grad_norm": 21.20027732849121, "learning_rate": 9.21957671957672e-06, "loss": 41.0809, "step": 3662 }, { "epoch": 87.21492537313434, "grad_norm": 20.002410888671875, "learning_rate": 9.216931216931217e-06, "loss": 42.0559, "step": 3663 }, { "epoch": 87.23880597014926, "grad_norm": 16.792434692382812, "learning_rate": 9.214285714285715e-06, "loss": 40.659, "step": 3664 }, { "epoch": 87.26268656716418, "grad_norm": 20.209190368652344, "learning_rate": 9.211640211640212e-06, "loss": 42.387, "step": 3665 }, { "epoch": 87.2865671641791, "grad_norm": 17.87749481201172, "learning_rate": 9.20899470899471e-06, "loss": 41.6863, "step": 3666 }, { "epoch": 87.31044776119403, "grad_norm": 16.422809600830078, "learning_rate": 9.206349206349207e-06, "loss": 43.5165, "step": 3667 }, { "epoch": 87.33432835820895, "grad_norm": 17.762025833129883, "learning_rate": 9.203703703703705e-06, "loss": 41.3489, "step": 3668 }, { "epoch": 87.35820895522389, "grad_norm": 18.185434341430664, "learning_rate": 9.201058201058202e-06, "loss": 42.9896, "step": 3669 }, { "epoch": 87.38208955223881, "grad_norm": 15.573823928833008, "learning_rate": 9.198412698412698e-06, "loss": 42.5428, "step": 3670 }, { "epoch": 87.40597014925373, "grad_norm": 21.007041931152344, "learning_rate": 9.195767195767197e-06, "loss": 41.6825, "step": 3671 }, { "epoch": 87.42985074626866, "grad_norm": 21.610292434692383, "learning_rate": 9.193121693121693e-06, "loss": 42.8643, "step": 3672 }, { "epoch": 87.45373134328358, "grad_norm": 16.124156951904297, "learning_rate": 9.19047619047619e-06, "loss": 42.5377, "step": 3673 }, { "epoch": 87.4776119402985, "grad_norm": 22.14504051208496, "learning_rate": 9.187830687830688e-06, "loss": 42.878, "step": 3674 }, { "epoch": 87.50149253731344, "grad_norm": 17.33942222595215, "learning_rate": 9.185185185185186e-06, "loss": 44.3817, "step": 3675 }, { "epoch": 87.52537313432836, "grad_norm": 21.361644744873047, "learning_rate": 9.182539682539683e-06, "loss": 42.913, "step": 3676 }, { "epoch": 87.54925373134328, "grad_norm": 18.6135196685791, "learning_rate": 9.17989417989418e-06, "loss": 42.8328, "step": 3677 }, { "epoch": 87.57313432835821, "grad_norm": 23.618101119995117, "learning_rate": 9.177248677248678e-06, "loss": 42.4581, "step": 3678 }, { "epoch": 87.59701492537313, "grad_norm": 18.788637161254883, "learning_rate": 9.174603174603176e-06, "loss": 43.5344, "step": 3679 }, { "epoch": 87.62089552238805, "grad_norm": 17.69763946533203, "learning_rate": 9.171957671957673e-06, "loss": 42.8437, "step": 3680 }, { "epoch": 87.64477611940299, "grad_norm": 19.06989097595215, "learning_rate": 9.16931216931217e-06, "loss": 42.3788, "step": 3681 }, { "epoch": 87.66865671641791, "grad_norm": 18.462968826293945, "learning_rate": 9.166666666666666e-06, "loss": 42.759, "step": 3682 }, { "epoch": 87.69253731343284, "grad_norm": 21.524621963500977, "learning_rate": 9.164021164021166e-06, "loss": 43.1027, "step": 3683 }, { "epoch": 87.71641791044776, "grad_norm": 18.747453689575195, "learning_rate": 9.161375661375661e-06, "loss": 43.0803, "step": 3684 }, { "epoch": 87.74029850746268, "grad_norm": 21.170255661010742, "learning_rate": 9.158730158730159e-06, "loss": 42.641, "step": 3685 }, { "epoch": 87.7641791044776, "grad_norm": 19.89739990234375, "learning_rate": 9.156084656084656e-06, "loss": 42.5469, "step": 3686 }, { "epoch": 87.78805970149254, "grad_norm": 22.9807071685791, "learning_rate": 9.153439153439154e-06, "loss": 42.5137, "step": 3687 }, { "epoch": 87.81194029850747, "grad_norm": 19.036230087280273, "learning_rate": 9.150793650793651e-06, "loss": 42.8328, "step": 3688 }, { "epoch": 87.83582089552239, "grad_norm": 23.97933006286621, "learning_rate": 9.148148148148149e-06, "loss": 42.9784, "step": 3689 }, { "epoch": 87.85970149253731, "grad_norm": 18.06254768371582, "learning_rate": 9.145502645502646e-06, "loss": 41.7068, "step": 3690 }, { "epoch": 87.88358208955223, "grad_norm": 19.88326072692871, "learning_rate": 9.142857142857144e-06, "loss": 43.8, "step": 3691 }, { "epoch": 87.90746268656716, "grad_norm": 20.145050048828125, "learning_rate": 9.140211640211641e-06, "loss": 43.2459, "step": 3692 }, { "epoch": 87.9313432835821, "grad_norm": 16.824399948120117, "learning_rate": 9.137566137566139e-06, "loss": 42.4406, "step": 3693 }, { "epoch": 87.95522388059702, "grad_norm": 20.99275779724121, "learning_rate": 9.134920634920635e-06, "loss": 42.2506, "step": 3694 }, { "epoch": 87.97910447761194, "grad_norm": 23.64455223083496, "learning_rate": 9.132275132275134e-06, "loss": 43.1451, "step": 3695 }, { "epoch": 88.0, "grad_norm": 17.736629486083984, "learning_rate": 9.12962962962963e-06, "loss": 36.9082, "step": 3696 }, { "epoch": 88.02388059701492, "grad_norm": 20.58110809326172, "learning_rate": 9.126984126984127e-06, "loss": 41.6838, "step": 3697 }, { "epoch": 88.04776119402985, "grad_norm": 21.2742977142334, "learning_rate": 9.124338624338626e-06, "loss": 43.8259, "step": 3698 }, { "epoch": 88.07164179104478, "grad_norm": 18.40839958190918, "learning_rate": 9.121693121693122e-06, "loss": 41.6561, "step": 3699 }, { "epoch": 88.0955223880597, "grad_norm": 25.24982261657715, "learning_rate": 9.11904761904762e-06, "loss": 43.4407, "step": 3700 }, { "epoch": 88.11940298507463, "grad_norm": 16.522397994995117, "learning_rate": 9.116402116402117e-06, "loss": 42.3175, "step": 3701 }, { "epoch": 88.14328358208955, "grad_norm": 23.80354881286621, "learning_rate": 9.113756613756615e-06, "loss": 41.8656, "step": 3702 }, { "epoch": 88.16716417910447, "grad_norm": 17.915058135986328, "learning_rate": 9.111111111111112e-06, "loss": 43.4793, "step": 3703 }, { "epoch": 88.1910447761194, "grad_norm": 24.271337509155273, "learning_rate": 9.108465608465608e-06, "loss": 42.3917, "step": 3704 }, { "epoch": 88.21492537313434, "grad_norm": 21.696147918701172, "learning_rate": 9.105820105820107e-06, "loss": 42.3141, "step": 3705 }, { "epoch": 88.23880597014926, "grad_norm": 23.576507568359375, "learning_rate": 9.103174603174603e-06, "loss": 42.9454, "step": 3706 }, { "epoch": 88.26268656716418, "grad_norm": 25.030128479003906, "learning_rate": 9.1005291005291e-06, "loss": 42.8441, "step": 3707 }, { "epoch": 88.2865671641791, "grad_norm": 21.148405075073242, "learning_rate": 9.0978835978836e-06, "loss": 43.0314, "step": 3708 }, { "epoch": 88.31044776119403, "grad_norm": 25.8000431060791, "learning_rate": 9.095238095238095e-06, "loss": 42.5864, "step": 3709 }, { "epoch": 88.33432835820895, "grad_norm": 15.713743209838867, "learning_rate": 9.092592592592593e-06, "loss": 42.8121, "step": 3710 }, { "epoch": 88.35820895522389, "grad_norm": 23.208627700805664, "learning_rate": 9.08994708994709e-06, "loss": 42.9846, "step": 3711 }, { "epoch": 88.38208955223881, "grad_norm": 17.478639602661133, "learning_rate": 9.087301587301588e-06, "loss": 42.2004, "step": 3712 }, { "epoch": 88.40597014925373, "grad_norm": 21.487903594970703, "learning_rate": 9.084656084656085e-06, "loss": 41.7275, "step": 3713 }, { "epoch": 88.42985074626866, "grad_norm": 27.780941009521484, "learning_rate": 9.082010582010583e-06, "loss": 42.1269, "step": 3714 }, { "epoch": 88.45373134328358, "grad_norm": 14.19015884399414, "learning_rate": 9.07936507936508e-06, "loss": 43.512, "step": 3715 }, { "epoch": 88.4776119402985, "grad_norm": 27.63198471069336, "learning_rate": 9.076719576719576e-06, "loss": 42.4196, "step": 3716 }, { "epoch": 88.50149253731344, "grad_norm": 21.5277099609375, "learning_rate": 9.074074074074075e-06, "loss": 41.8393, "step": 3717 }, { "epoch": 88.52537313432836, "grad_norm": 20.19924545288086, "learning_rate": 9.071428571428573e-06, "loss": 41.6486, "step": 3718 }, { "epoch": 88.54925373134328, "grad_norm": 22.75286865234375, "learning_rate": 9.068783068783069e-06, "loss": 43.3116, "step": 3719 }, { "epoch": 88.57313432835821, "grad_norm": 16.763381958007812, "learning_rate": 9.066137566137568e-06, "loss": 43.0704, "step": 3720 }, { "epoch": 88.59701492537313, "grad_norm": 23.842023849487305, "learning_rate": 9.063492063492064e-06, "loss": 43.7468, "step": 3721 }, { "epoch": 88.62089552238805, "grad_norm": 20.88597297668457, "learning_rate": 9.060846560846561e-06, "loss": 42.2398, "step": 3722 }, { "epoch": 88.64477611940299, "grad_norm": 19.333271026611328, "learning_rate": 9.058201058201059e-06, "loss": 41.7667, "step": 3723 }, { "epoch": 88.66865671641791, "grad_norm": 22.313888549804688, "learning_rate": 9.055555555555556e-06, "loss": 42.3198, "step": 3724 }, { "epoch": 88.69253731343284, "grad_norm": 20.26089096069336, "learning_rate": 9.052910052910054e-06, "loss": 42.9191, "step": 3725 }, { "epoch": 88.71641791044776, "grad_norm": 17.900373458862305, "learning_rate": 9.050264550264551e-06, "loss": 41.7498, "step": 3726 }, { "epoch": 88.74029850746268, "grad_norm": 22.735700607299805, "learning_rate": 9.047619047619049e-06, "loss": 41.4744, "step": 3727 }, { "epoch": 88.7641791044776, "grad_norm": 22.933048248291016, "learning_rate": 9.044973544973546e-06, "loss": 43.4595, "step": 3728 }, { "epoch": 88.78805970149254, "grad_norm": 15.648778915405273, "learning_rate": 9.042328042328044e-06, "loss": 43.4811, "step": 3729 }, { "epoch": 88.81194029850747, "grad_norm": 35.44391632080078, "learning_rate": 9.039682539682541e-06, "loss": 42.0879, "step": 3730 }, { "epoch": 88.83582089552239, "grad_norm": 26.575231552124023, "learning_rate": 9.037037037037037e-06, "loss": 41.6883, "step": 3731 }, { "epoch": 88.85970149253731, "grad_norm": 33.38102340698242, "learning_rate": 9.034391534391536e-06, "loss": 43.2903, "step": 3732 }, { "epoch": 88.88358208955223, "grad_norm": 26.297910690307617, "learning_rate": 9.031746031746032e-06, "loss": 42.744, "step": 3733 }, { "epoch": 88.90746268656716, "grad_norm": 25.057889938354492, "learning_rate": 9.02910052910053e-06, "loss": 42.076, "step": 3734 }, { "epoch": 88.9313432835821, "grad_norm": 21.162078857421875, "learning_rate": 9.026455026455027e-06, "loss": 42.397, "step": 3735 }, { "epoch": 88.95522388059702, "grad_norm": 21.846647262573242, "learning_rate": 9.023809523809524e-06, "loss": 42.7379, "step": 3736 }, { "epoch": 88.97910447761194, "grad_norm": 19.74768829345703, "learning_rate": 9.021164021164022e-06, "loss": 42.0906, "step": 3737 }, { "epoch": 89.0, "grad_norm": 18.839765548706055, "learning_rate": 9.01851851851852e-06, "loss": 37.881, "step": 3738 }, { "epoch": 89.02388059701492, "grad_norm": 22.15633201599121, "learning_rate": 9.015873015873017e-06, "loss": 42.5544, "step": 3739 }, { "epoch": 89.04776119402985, "grad_norm": 18.709840774536133, "learning_rate": 9.013227513227514e-06, "loss": 43.2949, "step": 3740 }, { "epoch": 89.07164179104478, "grad_norm": 22.922399520874023, "learning_rate": 9.010582010582012e-06, "loss": 41.9215, "step": 3741 }, { "epoch": 89.0955223880597, "grad_norm": 18.445695877075195, "learning_rate": 9.00793650793651e-06, "loss": 42.994, "step": 3742 }, { "epoch": 89.11940298507463, "grad_norm": 22.694503784179688, "learning_rate": 9.005291005291005e-06, "loss": 42.4024, "step": 3743 }, { "epoch": 89.14328358208955, "grad_norm": 23.259532928466797, "learning_rate": 9.002645502645503e-06, "loss": 41.9366, "step": 3744 }, { "epoch": 89.16716417910447, "grad_norm": 24.131465911865234, "learning_rate": 9e-06, "loss": 42.9172, "step": 3745 }, { "epoch": 89.1910447761194, "grad_norm": 21.01772117614746, "learning_rate": 8.997354497354498e-06, "loss": 42.0505, "step": 3746 }, { "epoch": 89.21492537313434, "grad_norm": 20.675086975097656, "learning_rate": 8.994708994708995e-06, "loss": 42.7076, "step": 3747 }, { "epoch": 89.23880597014926, "grad_norm": 22.289649963378906, "learning_rate": 8.992063492063493e-06, "loss": 42.4533, "step": 3748 }, { "epoch": 89.26268656716418, "grad_norm": 22.76655387878418, "learning_rate": 8.98941798941799e-06, "loss": 42.0269, "step": 3749 }, { "epoch": 89.2865671641791, "grad_norm": 19.732887268066406, "learning_rate": 8.986772486772488e-06, "loss": 44.2783, "step": 3750 }, { "epoch": 89.31044776119403, "grad_norm": 22.45815658569336, "learning_rate": 8.984126984126985e-06, "loss": 40.1901, "step": 3751 }, { "epoch": 89.33432835820895, "grad_norm": 24.511625289916992, "learning_rate": 8.981481481481483e-06, "loss": 43.0842, "step": 3752 }, { "epoch": 89.35820895522389, "grad_norm": 19.739845275878906, "learning_rate": 8.978835978835979e-06, "loss": 43.7219, "step": 3753 }, { "epoch": 89.38208955223881, "grad_norm": 26.18813133239746, "learning_rate": 8.976190476190478e-06, "loss": 43.5427, "step": 3754 }, { "epoch": 89.40597014925373, "grad_norm": 21.95644760131836, "learning_rate": 8.973544973544973e-06, "loss": 42.9161, "step": 3755 }, { "epoch": 89.42985074626866, "grad_norm": 22.270849227905273, "learning_rate": 8.970899470899471e-06, "loss": 42.6121, "step": 3756 }, { "epoch": 89.45373134328358, "grad_norm": 18.48128318786621, "learning_rate": 8.968253968253968e-06, "loss": 42.044, "step": 3757 }, { "epoch": 89.4776119402985, "grad_norm": 22.865985870361328, "learning_rate": 8.965608465608466e-06, "loss": 42.1096, "step": 3758 }, { "epoch": 89.50149253731344, "grad_norm": 19.26102066040039, "learning_rate": 8.962962962962963e-06, "loss": 42.5147, "step": 3759 }, { "epoch": 89.52537313432836, "grad_norm": 27.352407455444336, "learning_rate": 8.960317460317461e-06, "loss": 41.7614, "step": 3760 }, { "epoch": 89.54925373134328, "grad_norm": 21.059770584106445, "learning_rate": 8.957671957671958e-06, "loss": 41.5053, "step": 3761 }, { "epoch": 89.57313432835821, "grad_norm": 23.909198760986328, "learning_rate": 8.955026455026456e-06, "loss": 43.4126, "step": 3762 }, { "epoch": 89.59701492537313, "grad_norm": 28.529970169067383, "learning_rate": 8.952380952380953e-06, "loss": 43.489, "step": 3763 }, { "epoch": 89.62089552238805, "grad_norm": 22.008472442626953, "learning_rate": 8.949735449735451e-06, "loss": 42.5781, "step": 3764 }, { "epoch": 89.64477611940299, "grad_norm": NaN, "learning_rate": 8.947089947089947e-06, "loss": 37.0211, "step": 3765 }, { "epoch": 89.66865671641791, "grad_norm": 29.881391525268555, "learning_rate": 8.947089947089947e-06, "loss": 42.6102, "step": 3766 }, { "epoch": 89.69253731343284, "grad_norm": 24.919992446899414, "learning_rate": 8.944444444444446e-06, "loss": 42.7878, "step": 3767 }, { "epoch": 89.71641791044776, "grad_norm": 29.473249435424805, "learning_rate": 8.941798941798942e-06, "loss": 41.9105, "step": 3768 }, { "epoch": 89.74029850746268, "grad_norm": 20.71428871154785, "learning_rate": 8.93915343915344e-06, "loss": 42.0715, "step": 3769 }, { "epoch": 89.7641791044776, "grad_norm": 29.31629180908203, "learning_rate": 8.936507936507938e-06, "loss": 41.3888, "step": 3770 }, { "epoch": 89.78805970149254, "grad_norm": 22.29326057434082, "learning_rate": 8.933862433862434e-06, "loss": 43.0029, "step": 3771 }, { "epoch": 89.81194029850747, "grad_norm": NaN, "learning_rate": 8.931216931216932e-06, "loss": 49.4483, "step": 3772 }, { "epoch": 89.83582089552239, "grad_norm": 23.31702423095703, "learning_rate": 8.931216931216932e-06, "loss": 42.8926, "step": 3773 }, { "epoch": 89.85970149253731, "grad_norm": 26.894012451171875, "learning_rate": 8.92857142857143e-06, "loss": 41.3476, "step": 3774 }, { "epoch": 89.88358208955223, "grad_norm": 19.226701736450195, "learning_rate": 8.925925925925927e-06, "loss": 42.9396, "step": 3775 }, { "epoch": 89.90746268656716, "grad_norm": 26.918243408203125, "learning_rate": 8.923280423280424e-06, "loss": 41.7109, "step": 3776 }, { "epoch": 89.9313432835821, "grad_norm": 22.435697555541992, "learning_rate": 8.920634920634922e-06, "loss": 42.5026, "step": 3777 }, { "epoch": 89.95522388059702, "grad_norm": 19.455547332763672, "learning_rate": 8.91798941798942e-06, "loss": 42.4964, "step": 3778 }, { "epoch": 89.97910447761194, "grad_norm": 24.792171478271484, "learning_rate": 8.915343915343915e-06, "loss": 41.6366, "step": 3779 }, { "epoch": 90.0, "grad_norm": 14.4516019821167, "learning_rate": 8.912698412698414e-06, "loss": 36.7873, "step": 3780 }, { "epoch": 90.0, "step": 3780, "total_flos": 1.857999472723437e+17, "train_loss": 4.747417533713043, "train_runtime": 12850.2933, "train_samples_per_second": 37.484, "train_steps_per_second": 0.294 }, { "epoch": 90.02388059701492, "grad_norm": 26.03937339782715, "learning_rate": 1e-05, "loss": 42.4337, "step": 3781 }, { "epoch": 90.04776119402985, "grad_norm": Infinity, "learning_rate": 9.997835497835499e-06, "loss": 51.1491, "step": 3782 }, { "epoch": 90.07164179104478, "grad_norm": Infinity, "learning_rate": 9.997835497835499e-06, "loss": 53.2051, "step": 3783 }, { "epoch": 90.0955223880597, "grad_norm": 446.1357421875, "learning_rate": 9.997835497835499e-06, "loss": 51.5745, "step": 3784 }, { "epoch": 90.11940298507463, "grad_norm": 229.35903930664062, "learning_rate": 9.995670995670996e-06, "loss": 49.5899, "step": 3785 }, { "epoch": 90.14328358208955, "grad_norm": 109.18777465820312, "learning_rate": 9.993506493506494e-06, "loss": 45.889, "step": 3786 }, { "epoch": 90.16716417910447, "grad_norm": 79.82958221435547, "learning_rate": 9.991341991341992e-06, "loss": 44.3638, "step": 3787 }, { "epoch": 90.1910447761194, "grad_norm": 69.46668243408203, "learning_rate": 9.98917748917749e-06, "loss": 43.6641, "step": 3788 }, { "epoch": 90.21492537313434, "grad_norm": 56.4055290222168, "learning_rate": 9.987012987012988e-06, "loss": 45.0336, "step": 3789 }, { "epoch": 90.23880597014926, "grad_norm": 53.48906326293945, "learning_rate": 9.984848484848485e-06, "loss": 42.9807, "step": 3790 }, { "epoch": 90.26268656716418, "grad_norm": 38.25556564331055, "learning_rate": 9.982683982683983e-06, "loss": 44.1306, "step": 3791 }, { "epoch": 90.2865671641791, "grad_norm": 41.42750549316406, "learning_rate": 9.980519480519481e-06, "loss": 42.1205, "step": 3792 }, { "epoch": 90.31044776119403, "grad_norm": 34.52850341796875, "learning_rate": 9.978354978354979e-06, "loss": 43.3744, "step": 3793 }, { "epoch": 90.33432835820895, "grad_norm": 28.61484146118164, "learning_rate": 9.976190476190477e-06, "loss": 43.487, "step": 3794 }, { "epoch": 90.35820895522389, "grad_norm": 27.961273193359375, "learning_rate": 9.974025974025974e-06, "loss": 43.9663, "step": 3795 }, { "epoch": 90.38208955223881, "grad_norm": 27.92458152770996, "learning_rate": 9.971861471861472e-06, "loss": 43.2716, "step": 3796 }, { "epoch": 90.40597014925373, "grad_norm": 21.93165397644043, "learning_rate": 9.96969696969697e-06, "loss": 43.3704, "step": 3797 }, { "epoch": 90.42985074626866, "grad_norm": 27.053754806518555, "learning_rate": 9.967532467532468e-06, "loss": 42.7038, "step": 3798 }, { "epoch": 90.45373134328358, "grad_norm": 31.030607223510742, "learning_rate": 9.965367965367966e-06, "loss": 43.1343, "step": 3799 }, { "epoch": 90.4776119402985, "grad_norm": 24.048316955566406, "learning_rate": 9.963203463203463e-06, "loss": 42.1113, "step": 3800 }, { "epoch": 90.50149253731344, "grad_norm": 17.98249053955078, "learning_rate": 9.961038961038963e-06, "loss": 42.6117, "step": 3801 }, { "epoch": 90.52537313432836, "grad_norm": 20.080669403076172, "learning_rate": 9.95887445887446e-06, "loss": 42.4281, "step": 3802 }, { "epoch": 90.54925373134328, "grad_norm": 19.842525482177734, "learning_rate": 9.956709956709958e-06, "loss": 40.8022, "step": 3803 }, { "epoch": 90.57313432835821, "grad_norm": 20.453306198120117, "learning_rate": 9.954545454545456e-06, "loss": 42.8288, "step": 3804 }, { "epoch": 90.59701492537313, "grad_norm": 19.955123901367188, "learning_rate": 9.952380952380954e-06, "loss": 40.2546, "step": 3805 }, { "epoch": 90.62089552238805, "grad_norm": 17.246713638305664, "learning_rate": 9.950216450216452e-06, "loss": 42.0433, "step": 3806 }, { "epoch": 90.64477611940299, "grad_norm": 20.76253890991211, "learning_rate": 9.94805194805195e-06, "loss": 42.7741, "step": 3807 }, { "epoch": 90.66865671641791, "grad_norm": 21.001201629638672, "learning_rate": 9.945887445887446e-06, "loss": 43.6741, "step": 3808 }, { "epoch": 90.69253731343284, "grad_norm": 20.765684127807617, "learning_rate": 9.943722943722944e-06, "loss": 41.8182, "step": 3809 }, { "epoch": 90.71641791044776, "grad_norm": 16.794981002807617, "learning_rate": 9.941558441558441e-06, "loss": 42.6478, "step": 3810 }, { "epoch": 90.74029850746268, "grad_norm": 23.377695083618164, "learning_rate": 9.939393939393939e-06, "loss": 42.0878, "step": 3811 }, { "epoch": 90.7641791044776, "grad_norm": 23.543071746826172, "learning_rate": 9.937229437229437e-06, "loss": 42.4977, "step": 3812 }, { "epoch": 90.78805970149254, "grad_norm": 18.546525955200195, "learning_rate": 9.935064935064936e-06, "loss": 42.4457, "step": 3813 }, { "epoch": 90.81194029850747, "grad_norm": 25.244186401367188, "learning_rate": 9.932900432900434e-06, "loss": 42.4906, "step": 3814 }, { "epoch": 90.83582089552239, "grad_norm": 21.267963409423828, "learning_rate": 9.930735930735932e-06, "loss": 41.7433, "step": 3815 }, { "epoch": 90.85970149253731, "grad_norm": 19.291160583496094, "learning_rate": 9.92857142857143e-06, "loss": 41.7054, "step": 3816 }, { "epoch": 90.88358208955223, "grad_norm": 21.301227569580078, "learning_rate": 9.926406926406928e-06, "loss": 42.5566, "step": 3817 }, { "epoch": 90.90746268656716, "grad_norm": 19.511821746826172, "learning_rate": 9.924242424242425e-06, "loss": 41.5064, "step": 3818 }, { "epoch": 90.9313432835821, "grad_norm": 18.419504165649414, "learning_rate": 9.922077922077923e-06, "loss": 41.4675, "step": 3819 }, { "epoch": 90.95522388059702, "grad_norm": 19.577409744262695, "learning_rate": 9.919913419913421e-06, "loss": 43.4705, "step": 3820 }, { "epoch": 90.97910447761194, "grad_norm": 23.015262603759766, "learning_rate": 9.917748917748919e-06, "loss": 42.0356, "step": 3821 }, { "epoch": 91.0, "grad_norm": 17.785385131835938, "learning_rate": 9.915584415584417e-06, "loss": 37.6509, "step": 3822 }, { "epoch": 91.02388059701492, "grad_norm": 16.111051559448242, "learning_rate": 9.913419913419914e-06, "loss": 41.7977, "step": 3823 }, { "epoch": 91.04776119402985, "grad_norm": 22.09601593017578, "learning_rate": 9.911255411255412e-06, "loss": 42.5569, "step": 3824 }, { "epoch": 91.07164179104478, "grad_norm": 18.80573081970215, "learning_rate": 9.90909090909091e-06, "loss": 41.773, "step": 3825 }, { "epoch": 91.0955223880597, "grad_norm": 14.442939758300781, "learning_rate": 9.906926406926408e-06, "loss": 42.0426, "step": 3826 }, { "epoch": 91.11940298507463, "grad_norm": 21.839468002319336, "learning_rate": 9.904761904761906e-06, "loss": 41.9993, "step": 3827 }, { "epoch": 91.14328358208955, "grad_norm": 17.792217254638672, "learning_rate": 9.902597402597403e-06, "loss": 42.1515, "step": 3828 }, { "epoch": 91.16716417910447, "grad_norm": 15.722336769104004, "learning_rate": 9.900432900432901e-06, "loss": 42.2694, "step": 3829 }, { "epoch": 91.1910447761194, "grad_norm": 20.94297218322754, "learning_rate": 9.898268398268399e-06, "loss": 42.7043, "step": 3830 }, { "epoch": 91.21492537313434, "grad_norm": 16.2196044921875, "learning_rate": 9.896103896103897e-06, "loss": 42.4405, "step": 3831 }, { "epoch": 91.23880597014926, "grad_norm": 20.381193161010742, "learning_rate": 9.893939393939395e-06, "loss": 43.424, "step": 3832 }, { "epoch": 91.26268656716418, "grad_norm": 14.948447227478027, "learning_rate": 9.891774891774892e-06, "loss": 42.7289, "step": 3833 }, { "epoch": 91.2865671641791, "grad_norm": 17.548126220703125, "learning_rate": 9.88961038961039e-06, "loss": 41.9656, "step": 3834 }, { "epoch": 91.31044776119403, "grad_norm": 20.301937103271484, "learning_rate": 9.887445887445888e-06, "loss": 42.9516, "step": 3835 }, { "epoch": 91.33432835820895, "grad_norm": 18.3472900390625, "learning_rate": 9.885281385281386e-06, "loss": 42.281, "step": 3836 }, { "epoch": 91.35820895522389, "grad_norm": 15.503434181213379, "learning_rate": 9.883116883116885e-06, "loss": 42.502, "step": 3837 }, { "epoch": 91.38208955223881, "grad_norm": 21.448226928710938, "learning_rate": 9.880952380952381e-06, "loss": 43.0384, "step": 3838 }, { "epoch": 91.40597014925373, "grad_norm": 16.685815811157227, "learning_rate": 9.87878787878788e-06, "loss": 41.798, "step": 3839 }, { "epoch": 91.42985074626866, "grad_norm": 18.722484588623047, "learning_rate": 9.876623376623377e-06, "loss": 43.4082, "step": 3840 }, { "epoch": 91.45373134328358, "grad_norm": 19.54647445678711, "learning_rate": 9.874458874458875e-06, "loss": 42.2679, "step": 3841 }, { "epoch": 91.4776119402985, "grad_norm": 18.793495178222656, "learning_rate": 9.872294372294373e-06, "loss": 42.2962, "step": 3842 }, { "epoch": 91.50149253731344, "grad_norm": 16.687400817871094, "learning_rate": 9.87012987012987e-06, "loss": 44.2949, "step": 3843 }, { "epoch": 91.52537313432836, "grad_norm": 16.13211441040039, "learning_rate": 9.867965367965368e-06, "loss": 42.602, "step": 3844 }, { "epoch": 91.54925373134328, "grad_norm": 16.72748565673828, "learning_rate": 9.865800865800866e-06, "loss": 42.3636, "step": 3845 }, { "epoch": 91.57313432835821, "grad_norm": 22.206905364990234, "learning_rate": 9.863636363636364e-06, "loss": 43.1925, "step": 3846 }, { "epoch": 91.59701492537313, "grad_norm": 19.21588134765625, "learning_rate": 9.861471861471862e-06, "loss": 43.1342, "step": 3847 }, { "epoch": 91.62089552238805, "grad_norm": 19.708059310913086, "learning_rate": 9.85930735930736e-06, "loss": 42.7964, "step": 3848 }, { "epoch": 91.64477611940299, "grad_norm": 22.789594650268555, "learning_rate": 9.857142857142859e-06, "loss": 42.7767, "step": 3849 }, { "epoch": 91.66865671641791, "grad_norm": 17.048229217529297, "learning_rate": 9.854978354978357e-06, "loss": 42.6642, "step": 3850 }, { "epoch": 91.69253731343284, "grad_norm": 21.39427375793457, "learning_rate": 9.852813852813854e-06, "loss": 42.8962, "step": 3851 }, { "epoch": 91.71641791044776, "grad_norm": 25.67850112915039, "learning_rate": 9.850649350649352e-06, "loss": 42.4072, "step": 3852 }, { "epoch": 91.74029850746268, "grad_norm": 20.17367935180664, "learning_rate": 9.84848484848485e-06, "loss": 42.3302, "step": 3853 }, { "epoch": 91.7641791044776, "grad_norm": 16.018030166625977, "learning_rate": 9.846320346320348e-06, "loss": 42.6877, "step": 3854 }, { "epoch": 91.78805970149254, "grad_norm": 18.5965576171875, "learning_rate": 9.844155844155846e-06, "loss": 41.4104, "step": 3855 }, { "epoch": 91.81194029850747, "grad_norm": 17.651378631591797, "learning_rate": 9.841991341991343e-06, "loss": 42.1591, "step": 3856 }, { "epoch": 91.83582089552239, "grad_norm": 15.912792205810547, "learning_rate": 9.839826839826841e-06, "loss": 41.0675, "step": 3857 }, { "epoch": 91.85970149253731, "grad_norm": 20.338071823120117, "learning_rate": 9.837662337662337e-06, "loss": 43.0971, "step": 3858 }, { "epoch": 91.88358208955223, "grad_norm": 19.422807693481445, "learning_rate": 9.835497835497835e-06, "loss": 41.022, "step": 3859 }, { "epoch": 91.90746268656716, "grad_norm": 18.216012954711914, "learning_rate": 9.833333333333333e-06, "loss": 42.0068, "step": 3860 }, { "epoch": 91.9313432835821, "grad_norm": 17.68181610107422, "learning_rate": 9.831168831168832e-06, "loss": 42.778, "step": 3861 }, { "epoch": 91.95522388059702, "grad_norm": 20.660480499267578, "learning_rate": 9.82900432900433e-06, "loss": 42.8923, "step": 3862 }, { "epoch": 91.97910447761194, "grad_norm": 22.78632926940918, "learning_rate": 9.826839826839828e-06, "loss": 41.5412, "step": 3863 }, { "epoch": 92.0, "grad_norm": 17.660106658935547, "learning_rate": 9.824675324675326e-06, "loss": 36.8816, "step": 3864 }, { "epoch": 92.02388059701492, "grad_norm": 19.257198333740234, "learning_rate": 9.822510822510824e-06, "loss": 41.3789, "step": 3865 }, { "epoch": 92.04776119402985, "grad_norm": 17.690038681030273, "learning_rate": 9.820346320346321e-06, "loss": 41.8596, "step": 3866 }, { "epoch": 92.07164179104478, "grad_norm": 25.88194465637207, "learning_rate": 9.81818181818182e-06, "loss": 42.1967, "step": 3867 }, { "epoch": 92.0955223880597, "grad_norm": 18.971637725830078, "learning_rate": 9.816017316017317e-06, "loss": 41.5025, "step": 3868 }, { "epoch": 92.11940298507463, "grad_norm": 18.14025115966797, "learning_rate": 9.813852813852815e-06, "loss": 42.7121, "step": 3869 }, { "epoch": 92.14328358208955, "grad_norm": 24.20391845703125, "learning_rate": 9.811688311688313e-06, "loss": 42.9952, "step": 3870 }, { "epoch": 92.16716417910447, "grad_norm": 18.484018325805664, "learning_rate": 9.80952380952381e-06, "loss": 44.174, "step": 3871 }, { "epoch": 92.1910447761194, "grad_norm": 24.238615036010742, "learning_rate": 9.807359307359308e-06, "loss": 42.933, "step": 3872 }, { "epoch": 92.21492537313434, "grad_norm": 21.95537757873535, "learning_rate": 9.805194805194806e-06, "loss": 42.5797, "step": 3873 }, { "epoch": 92.23880597014926, "grad_norm": 16.300167083740234, "learning_rate": 9.803030303030304e-06, "loss": 41.8871, "step": 3874 }, { "epoch": 92.26268656716418, "grad_norm": 31.398351669311523, "learning_rate": 9.800865800865802e-06, "loss": 42.8308, "step": 3875 }, { "epoch": 92.2865671641791, "grad_norm": 21.76424789428711, "learning_rate": 9.7987012987013e-06, "loss": 42.1119, "step": 3876 }, { "epoch": 92.31044776119403, "grad_norm": 26.037975311279297, "learning_rate": 9.796536796536797e-06, "loss": 42.0092, "step": 3877 }, { "epoch": 92.33432835820895, "grad_norm": 26.393800735473633, "learning_rate": 9.794372294372295e-06, "loss": 43.9124, "step": 3878 }, { "epoch": 92.35820895522389, "grad_norm": 21.763713836669922, "learning_rate": 9.792207792207793e-06, "loss": 42.6169, "step": 3879 }, { "epoch": 92.38208955223881, "grad_norm": 28.867443084716797, "learning_rate": 9.79004329004329e-06, "loss": 43.093, "step": 3880 }, { "epoch": 92.40597014925373, "grad_norm": 20.59787940979004, "learning_rate": 9.787878787878788e-06, "loss": 43.4976, "step": 3881 }, { "epoch": 92.42985074626866, "grad_norm": 32.58126449584961, "learning_rate": 9.785714285714286e-06, "loss": 42.2799, "step": 3882 }, { "epoch": 92.45373134328358, "grad_norm": 18.00343132019043, "learning_rate": 9.783549783549784e-06, "loss": 42.9497, "step": 3883 }, { "epoch": 92.4776119402985, "grad_norm": 31.740930557250977, "learning_rate": 9.781385281385282e-06, "loss": 42.7341, "step": 3884 }, { "epoch": 92.50149253731344, "grad_norm": 24.078405380249023, "learning_rate": 9.779220779220781e-06, "loss": 43.1077, "step": 3885 }, { "epoch": 92.52537313432836, "grad_norm": 21.194313049316406, "learning_rate": 9.777056277056279e-06, "loss": 41.9059, "step": 3886 }, { "epoch": 92.54925373134328, "grad_norm": 30.298595428466797, "learning_rate": 9.774891774891775e-06, "loss": 41.5753, "step": 3887 }, { "epoch": 92.57313432835821, "grad_norm": 21.55902099609375, "learning_rate": 9.772727272727273e-06, "loss": 41.659, "step": 3888 }, { "epoch": 92.59701492537313, "grad_norm": 27.879924774169922, "learning_rate": 9.77056277056277e-06, "loss": 42.4026, "step": 3889 }, { "epoch": 92.62089552238805, "grad_norm": 20.100893020629883, "learning_rate": 9.768398268398269e-06, "loss": 42.3196, "step": 3890 }, { "epoch": 92.64477611940299, "grad_norm": 24.352115631103516, "learning_rate": 9.766233766233766e-06, "loss": 42.4063, "step": 3891 }, { "epoch": 92.66865671641791, "grad_norm": 24.65276336669922, "learning_rate": 9.764069264069264e-06, "loss": 41.6774, "step": 3892 }, { "epoch": 92.69253731343284, "grad_norm": 18.95211410522461, "learning_rate": 9.761904761904762e-06, "loss": 40.4774, "step": 3893 }, { "epoch": 92.71641791044776, "grad_norm": 37.48885726928711, "learning_rate": 9.75974025974026e-06, "loss": 42.0188, "step": 3894 }, { "epoch": 92.74029850746268, "grad_norm": 27.999391555786133, "learning_rate": 9.757575757575758e-06, "loss": 41.9417, "step": 3895 }, { "epoch": 92.7641791044776, "grad_norm": 41.38749694824219, "learning_rate": 9.755411255411255e-06, "loss": 42.3823, "step": 3896 }, { "epoch": 92.78805970149254, "grad_norm": 30.16627311706543, "learning_rate": 9.753246753246755e-06, "loss": 42.6722, "step": 3897 }, { "epoch": 92.81194029850747, "grad_norm": 42.71925735473633, "learning_rate": 9.751082251082253e-06, "loss": 42.7932, "step": 3898 }, { "epoch": 92.83582089552239, "grad_norm": 42.11480712890625, "learning_rate": 9.74891774891775e-06, "loss": 42.3812, "step": 3899 }, { "epoch": 92.85970149253731, "grad_norm": 23.51568031311035, "learning_rate": 9.746753246753248e-06, "loss": 42.0872, "step": 3900 }, { "epoch": 92.88358208955223, "grad_norm": 29.64082145690918, "learning_rate": 9.744588744588746e-06, "loss": 42.7743, "step": 3901 }, { "epoch": 92.90746268656716, "grad_norm": 24.687829971313477, "learning_rate": 9.742424242424244e-06, "loss": 42.151, "step": 3902 }, { "epoch": 92.9313432835821, "grad_norm": 23.673076629638672, "learning_rate": 9.740259740259742e-06, "loss": 42.949, "step": 3903 }, { "epoch": 92.95522388059702, "grad_norm": 29.738771438598633, "learning_rate": 9.73809523809524e-06, "loss": 41.3754, "step": 3904 }, { "epoch": 92.97910447761194, "grad_norm": 23.26430320739746, "learning_rate": 9.735930735930737e-06, "loss": 42.2649, "step": 3905 }, { "epoch": 93.0, "grad_norm": 33.02578353881836, "learning_rate": 9.733766233766235e-06, "loss": 36.7133, "step": 3906 }, { "epoch": 93.02388059701492, "grad_norm": 29.762083053588867, "learning_rate": 9.731601731601731e-06, "loss": 42.1617, "step": 3907 }, { "epoch": 93.04776119402985, "grad_norm": 42.29904556274414, "learning_rate": 9.729437229437229e-06, "loss": 41.4727, "step": 3908 }, { "epoch": 93.07164179104478, "grad_norm": 35.2297477722168, "learning_rate": 9.727272727272728e-06, "loss": 41.8486, "step": 3909 }, { "epoch": 93.0955223880597, "grad_norm": 31.90110206604004, "learning_rate": 9.725108225108226e-06, "loss": 41.3951, "step": 3910 }, { "epoch": 93.11940298507463, "grad_norm": 33.118011474609375, "learning_rate": 9.722943722943724e-06, "loss": 42.8038, "step": 3911 }, { "epoch": 93.14328358208955, "grad_norm": 28.162616729736328, "learning_rate": 9.720779220779222e-06, "loss": 42.2424, "step": 3912 }, { "epoch": 93.16716417910447, "grad_norm": 26.799827575683594, "learning_rate": 9.71861471861472e-06, "loss": 41.9939, "step": 3913 }, { "epoch": 93.1910447761194, "grad_norm": 36.02149200439453, "learning_rate": 9.716450216450217e-06, "loss": 43.0555, "step": 3914 }, { "epoch": 93.21492537313434, "grad_norm": 30.073331832885742, "learning_rate": 9.714285714285715e-06, "loss": 40.7799, "step": 3915 }, { "epoch": 93.23880597014926, "grad_norm": 32.572547912597656, "learning_rate": 9.712121212121213e-06, "loss": 42.139, "step": 3916 }, { "epoch": 93.26268656716418, "grad_norm": 30.6304988861084, "learning_rate": 9.70995670995671e-06, "loss": 42.702, "step": 3917 }, { "epoch": 93.2865671641791, "grad_norm": 33.230812072753906, "learning_rate": 9.707792207792209e-06, "loss": 42.4281, "step": 3918 }, { "epoch": 93.31044776119403, "grad_norm": 29.524002075195312, "learning_rate": 9.705627705627706e-06, "loss": 42.5262, "step": 3919 }, { "epoch": 93.33432835820895, "grad_norm": 29.51606559753418, "learning_rate": 9.703463203463204e-06, "loss": 41.8173, "step": 3920 }, { "epoch": 93.35820895522389, "grad_norm": 22.32621192932129, "learning_rate": 9.701298701298702e-06, "loss": 43.059, "step": 3921 }, { "epoch": 93.38208955223881, "grad_norm": 36.80875778198242, "learning_rate": 9.6991341991342e-06, "loss": 41.8935, "step": 3922 }, { "epoch": 93.40597014925373, "grad_norm": 30.580604553222656, "learning_rate": 9.696969696969698e-06, "loss": 43.2128, "step": 3923 }, { "epoch": 93.42985074626866, "grad_norm": 29.170934677124023, "learning_rate": 9.694805194805195e-06, "loss": 41.6993, "step": 3924 }, { "epoch": 93.45373134328358, "grad_norm": 28.69053840637207, "learning_rate": 9.692640692640693e-06, "loss": 43.051, "step": 3925 }, { "epoch": 93.4776119402985, "grad_norm": 29.881338119506836, "learning_rate": 9.690476190476191e-06, "loss": 41.1923, "step": 3926 }, { "epoch": 93.50149253731344, "grad_norm": 25.122774124145508, "learning_rate": 9.688311688311689e-06, "loss": 42.4061, "step": 3927 }, { "epoch": 93.52537313432836, "grad_norm": 34.054847717285156, "learning_rate": 9.686147186147187e-06, "loss": 42.4354, "step": 3928 }, { "epoch": 93.54925373134328, "grad_norm": 29.546493530273438, "learning_rate": 9.683982683982684e-06, "loss": 41.9759, "step": 3929 }, { "epoch": 93.57313432835821, "grad_norm": 32.49911880493164, "learning_rate": 9.681818181818182e-06, "loss": 43.3769, "step": 3930 }, { "epoch": 93.59701492537313, "grad_norm": 28.943012237548828, "learning_rate": 9.67965367965368e-06, "loss": 41.6171, "step": 3931 }, { "epoch": 93.62089552238805, "grad_norm": 32.4178466796875, "learning_rate": 9.67748917748918e-06, "loss": 42.6111, "step": 3932 }, { "epoch": 93.64477611940299, "grad_norm": 30.295703887939453, "learning_rate": 9.675324675324677e-06, "loss": 41.6904, "step": 3933 }, { "epoch": 93.66865671641791, "grad_norm": 31.419668197631836, "learning_rate": 9.673160173160175e-06, "loss": 42.3374, "step": 3934 }, { "epoch": 93.69253731343284, "grad_norm": 29.994272232055664, "learning_rate": 9.670995670995673e-06, "loss": 42.1553, "step": 3935 }, { "epoch": 93.71641791044776, "grad_norm": 30.031116485595703, "learning_rate": 9.66883116883117e-06, "loss": 42.1101, "step": 3936 }, { "epoch": 93.74029850746268, "grad_norm": 28.21011734008789, "learning_rate": 9.666666666666667e-06, "loss": 42.0604, "step": 3937 }, { "epoch": 93.7641791044776, "grad_norm": 32.34469985961914, "learning_rate": 9.664502164502165e-06, "loss": 42.4025, "step": 3938 }, { "epoch": 93.78805970149254, "grad_norm": 25.2736759185791, "learning_rate": 9.662337662337662e-06, "loss": 42.7677, "step": 3939 }, { "epoch": 93.81194029850747, "grad_norm": 35.72128677368164, "learning_rate": 9.66017316017316e-06, "loss": 43.4687, "step": 3940 }, { "epoch": 93.83582089552239, "grad_norm": 30.39203453063965, "learning_rate": 9.658008658008658e-06, "loss": 41.7504, "step": 3941 }, { "epoch": 93.85970149253731, "grad_norm": 26.031253814697266, "learning_rate": 9.655844155844156e-06, "loss": 41.6092, "step": 3942 }, { "epoch": 93.88358208955223, "grad_norm": 23.05304718017578, "learning_rate": 9.653679653679654e-06, "loss": 42.4116, "step": 3943 }, { "epoch": 93.90746268656716, "grad_norm": 27.849210739135742, "learning_rate": 9.651515151515153e-06, "loss": 43.2295, "step": 3944 }, { "epoch": 93.9313432835821, "grad_norm": 25.089933395385742, "learning_rate": 9.64935064935065e-06, "loss": 42.6244, "step": 3945 }, { "epoch": 93.95522388059702, "grad_norm": 32.90645217895508, "learning_rate": 9.647186147186149e-06, "loss": 42.7992, "step": 3946 }, { "epoch": 93.97910447761194, "grad_norm": 28.58262825012207, "learning_rate": 9.645021645021646e-06, "loss": 43.0072, "step": 3947 }, { "epoch": 94.0, "grad_norm": 23.826631546020508, "learning_rate": 9.642857142857144e-06, "loss": 37.1225, "step": 3948 }, { "epoch": 94.02388059701492, "grad_norm": 28.149904251098633, "learning_rate": 9.640692640692642e-06, "loss": 42.374, "step": 3949 }, { "epoch": 94.04776119402985, "grad_norm": 28.40786361694336, "learning_rate": 9.63852813852814e-06, "loss": 41.6844, "step": 3950 }, { "epoch": 94.07164179104478, "grad_norm": 25.789466857910156, "learning_rate": 9.636363636363638e-06, "loss": 41.9359, "step": 3951 }, { "epoch": 94.0955223880597, "grad_norm": 31.53352928161621, "learning_rate": 9.634199134199135e-06, "loss": 41.4059, "step": 3952 }, { "epoch": 94.11940298507463, "grad_norm": 25.65757179260254, "learning_rate": 9.632034632034633e-06, "loss": 42.8445, "step": 3953 }, { "epoch": 94.14328358208955, "grad_norm": 35.67771911621094, "learning_rate": 9.629870129870131e-06, "loss": 43.0635, "step": 3954 }, { "epoch": 94.16716417910447, "grad_norm": 31.19240951538086, "learning_rate": 9.627705627705629e-06, "loss": 42.4725, "step": 3955 }, { "epoch": 94.1910447761194, "grad_norm": 31.1099853515625, "learning_rate": 9.625541125541127e-06, "loss": 42.6572, "step": 3956 }, { "epoch": 94.21492537313434, "grad_norm": 28.18238639831543, "learning_rate": 9.623376623376624e-06, "loss": 40.6298, "step": 3957 }, { "epoch": 94.23880597014926, "grad_norm": 25.916431427001953, "learning_rate": 9.621212121212122e-06, "loss": 42.1036, "step": 3958 }, { "epoch": 94.26268656716418, "grad_norm": 25.19932746887207, "learning_rate": 9.61904761904762e-06, "loss": 42.7877, "step": 3959 }, { "epoch": 94.2865671641791, "grad_norm": 31.23909568786621, "learning_rate": 9.616883116883118e-06, "loss": 42.3302, "step": 3960 }, { "epoch": 94.31044776119403, "grad_norm": 27.547996520996094, "learning_rate": 9.614718614718616e-06, "loss": 42.9115, "step": 3961 }, { "epoch": 94.33432835820895, "grad_norm": 33.331939697265625, "learning_rate": 9.612554112554113e-06, "loss": 42.9594, "step": 3962 }, { "epoch": 94.35820895522389, "grad_norm": 26.780292510986328, "learning_rate": 9.610389610389611e-06, "loss": 43.8544, "step": 3963 }, { "epoch": 94.38208955223881, "grad_norm": 25.683496475219727, "learning_rate": 9.608225108225109e-06, "loss": 41.3053, "step": 3964 }, { "epoch": 94.40597014925373, "grad_norm": 22.268705368041992, "learning_rate": 9.606060606060607e-06, "loss": 41.5663, "step": 3965 }, { "epoch": 94.42985074626866, "grad_norm": 26.915376663208008, "learning_rate": 9.603896103896105e-06, "loss": 42.8438, "step": 3966 }, { "epoch": 94.45373134328358, "grad_norm": 18.383493423461914, "learning_rate": 9.601731601731602e-06, "loss": 42.499, "step": 3967 }, { "epoch": 94.4776119402985, "grad_norm": 36.09028244018555, "learning_rate": 9.5995670995671e-06, "loss": 42.8744, "step": 3968 }, { "epoch": 94.50149253731344, "grad_norm": 27.188034057617188, "learning_rate": 9.597402597402598e-06, "loss": 41.8915, "step": 3969 }, { "epoch": 94.52537313432836, "grad_norm": 30.428661346435547, "learning_rate": 9.595238095238096e-06, "loss": 41.9762, "step": 3970 }, { "epoch": 94.54925373134328, "grad_norm": 25.777450561523438, "learning_rate": 9.593073593073594e-06, "loss": 43.0853, "step": 3971 }, { "epoch": 94.57313432835821, "grad_norm": 28.07237434387207, "learning_rate": 9.590909090909091e-06, "loss": 42.7039, "step": 3972 }, { "epoch": 94.59701492537313, "grad_norm": 22.956628799438477, "learning_rate": 9.588744588744589e-06, "loss": 42.8252, "step": 3973 }, { "epoch": 94.62089552238805, "grad_norm": 30.87279510498047, "learning_rate": 9.586580086580087e-06, "loss": 42.8992, "step": 3974 }, { "epoch": 94.64477611940299, "grad_norm": 24.29635238647461, "learning_rate": 9.584415584415585e-06, "loss": 41.1669, "step": 3975 }, { "epoch": 94.66865671641791, "grad_norm": 31.342975616455078, "learning_rate": 9.582251082251083e-06, "loss": 42.9206, "step": 3976 }, { "epoch": 94.69253731343284, "grad_norm": 32.55195999145508, "learning_rate": 9.58008658008658e-06, "loss": 41.6277, "step": 3977 }, { "epoch": 94.71641791044776, "grad_norm": 29.774578094482422, "learning_rate": 9.577922077922078e-06, "loss": 42.2551, "step": 3978 }, { "epoch": 94.74029850746268, "grad_norm": 25.456302642822266, "learning_rate": 9.575757575757576e-06, "loss": 40.1482, "step": 3979 }, { "epoch": 94.7641791044776, "grad_norm": 25.847124099731445, "learning_rate": 9.573593073593075e-06, "loss": 42.2201, "step": 3980 }, { "epoch": 94.78805970149254, "grad_norm": 27.12795066833496, "learning_rate": 9.571428571428573e-06, "loss": 41.8479, "step": 3981 }, { "epoch": 94.81194029850747, "grad_norm": 24.278888702392578, "learning_rate": 9.569264069264071e-06, "loss": 42.6692, "step": 3982 }, { "epoch": 94.83582089552239, "grad_norm": 22.567380905151367, "learning_rate": 9.567099567099569e-06, "loss": 42.3215, "step": 3983 }, { "epoch": 94.85970149253731, "grad_norm": 23.813114166259766, "learning_rate": 9.564935064935067e-06, "loss": 42.6284, "step": 3984 }, { "epoch": 94.88358208955223, "grad_norm": 19.152956008911133, "learning_rate": 9.562770562770564e-06, "loss": 41.7055, "step": 3985 }, { "epoch": 94.90746268656716, "grad_norm": 25.253353118896484, "learning_rate": 9.56060606060606e-06, "loss": 42.5487, "step": 3986 }, { "epoch": 94.9313432835821, "grad_norm": 21.04471206665039, "learning_rate": 9.558441558441558e-06, "loss": 44.019, "step": 3987 }, { "epoch": 94.95522388059702, "grad_norm": NaN, "learning_rate": 9.556277056277056e-06, "loss": 47.5805, "step": 3988 }, { "epoch": 94.97910447761194, "grad_norm": 20.38011932373047, "learning_rate": 9.556277056277056e-06, "loss": 40.8306, "step": 3989 }, { "epoch": 95.0, "grad_norm": 20.988080978393555, "learning_rate": 9.554112554112554e-06, "loss": 35.8475, "step": 3990 }, { "epoch": 95.02388059701492, "grad_norm": 25.182218551635742, "learning_rate": 9.551948051948052e-06, "loss": 42.7702, "step": 3991 }, { "epoch": 95.04776119402985, "grad_norm": 18.022729873657227, "learning_rate": 9.54978354978355e-06, "loss": 41.3642, "step": 3992 }, { "epoch": 95.07164179104478, "grad_norm": 28.234127044677734, "learning_rate": 9.547619047619049e-06, "loss": 41.819, "step": 3993 }, { "epoch": 95.0955223880597, "grad_norm": 22.71247100830078, "learning_rate": 9.545454545454547e-06, "loss": 43.0423, "step": 3994 }, { "epoch": 95.11940298507463, "grad_norm": 26.776891708374023, "learning_rate": 9.543290043290045e-06, "loss": 42.4988, "step": 3995 }, { "epoch": 95.14328358208955, "grad_norm": 21.445236206054688, "learning_rate": 9.541125541125542e-06, "loss": 41.4199, "step": 3996 }, { "epoch": 95.16716417910447, "grad_norm": 23.514680862426758, "learning_rate": 9.53896103896104e-06, "loss": 42.3048, "step": 3997 }, { "epoch": 95.1910447761194, "grad_norm": 19.648818969726562, "learning_rate": 9.536796536796538e-06, "loss": 41.8681, "step": 3998 }, { "epoch": 95.21492537313434, "grad_norm": 21.146074295043945, "learning_rate": 9.534632034632036e-06, "loss": 42.3901, "step": 3999 }, { "epoch": 95.23880597014926, "grad_norm": 17.257108688354492, "learning_rate": 9.532467532467534e-06, "loss": 42.5485, "step": 4000 }, { "epoch": 95.26268656716418, "grad_norm": 20.980907440185547, "learning_rate": 9.530303030303031e-06, "loss": 42.5298, "step": 4001 }, { "epoch": 95.2865671641791, "grad_norm": 22.18124771118164, "learning_rate": 9.52813852813853e-06, "loss": 42.7032, "step": 4002 }, { "epoch": 95.31044776119403, "grad_norm": 20.432281494140625, "learning_rate": 9.525974025974027e-06, "loss": 43.449, "step": 4003 }, { "epoch": 95.33432835820895, "grad_norm": 19.2701473236084, "learning_rate": 9.523809523809525e-06, "loss": 40.555, "step": 4004 }, { "epoch": 95.35820895522389, "grad_norm": 19.681455612182617, "learning_rate": 9.521645021645023e-06, "loss": 41.2141, "step": 4005 }, { "epoch": 95.38208955223881, "grad_norm": 18.39265251159668, "learning_rate": 9.51948051948052e-06, "loss": 42.6937, "step": 4006 }, { "epoch": 95.40597014925373, "grad_norm": 19.818313598632812, "learning_rate": 9.517316017316018e-06, "loss": 43.3448, "step": 4007 }, { "epoch": 95.42985074626866, "grad_norm": 22.540481567382812, "learning_rate": 9.515151515151516e-06, "loss": 42.9516, "step": 4008 }, { "epoch": 95.45373134328358, "grad_norm": 19.422515869140625, "learning_rate": 9.512987012987014e-06, "loss": 42.7121, "step": 4009 }, { "epoch": 95.4776119402985, "grad_norm": 22.789037704467773, "learning_rate": 9.510822510822512e-06, "loss": 42.5243, "step": 4010 }, { "epoch": 95.50149253731344, "grad_norm": 18.70187759399414, "learning_rate": 9.50865800865801e-06, "loss": 40.3263, "step": 4011 }, { "epoch": 95.52537313432836, "grad_norm": 24.231351852416992, "learning_rate": 9.506493506493507e-06, "loss": 42.1699, "step": 4012 }, { "epoch": 95.54925373134328, "grad_norm": 26.356748580932617, "learning_rate": 9.504329004329005e-06, "loss": 42.181, "step": 4013 }, { "epoch": 95.57313432835821, "grad_norm": 18.702556610107422, "learning_rate": 9.502164502164503e-06, "loss": 42.4881, "step": 4014 }, { "epoch": 95.59701492537313, "grad_norm": 27.878799438476562, "learning_rate": 9.5e-06, "loss": 42.2801, "step": 4015 }, { "epoch": 95.62089552238805, "grad_norm": 20.791034698486328, "learning_rate": 9.497835497835498e-06, "loss": 41.909, "step": 4016 }, { "epoch": 95.64477611940299, "grad_norm": 24.874574661254883, "learning_rate": 9.495670995670996e-06, "loss": 42.2108, "step": 4017 }, { "epoch": 95.66865671641791, "grad_norm": 18.562255859375, "learning_rate": 9.493506493506494e-06, "loss": 41.687, "step": 4018 }, { "epoch": 95.69253731343284, "grad_norm": 27.460060119628906, "learning_rate": 9.491341991341992e-06, "loss": 42.3688, "step": 4019 }, { "epoch": 95.71641791044776, "grad_norm": 21.485797882080078, "learning_rate": 9.48917748917749e-06, "loss": 42.6037, "step": 4020 }, { "epoch": 95.74029850746268, "grad_norm": 29.475221633911133, "learning_rate": 9.487012987012987e-06, "loss": 39.9582, "step": 4021 }, { "epoch": 95.7641791044776, "grad_norm": 24.83645248413086, "learning_rate": 9.484848484848485e-06, "loss": 42.7876, "step": 4022 }, { "epoch": 95.78805970149254, "grad_norm": 29.321386337280273, "learning_rate": 9.482683982683983e-06, "loss": 42.1032, "step": 4023 }, { "epoch": 95.81194029850747, "grad_norm": 26.891469955444336, "learning_rate": 9.48051948051948e-06, "loss": 42.557, "step": 4024 }, { "epoch": 95.83582089552239, "grad_norm": 27.05336570739746, "learning_rate": 9.478354978354978e-06, "loss": 42.9743, "step": 4025 }, { "epoch": 95.85970149253731, "grad_norm": 25.014963150024414, "learning_rate": 9.476190476190476e-06, "loss": 43.1592, "step": 4026 }, { "epoch": 95.88358208955223, "grad_norm": 25.66219711303711, "learning_rate": 9.474025974025974e-06, "loss": 41.8458, "step": 4027 }, { "epoch": 95.90746268656716, "grad_norm": 22.460660934448242, "learning_rate": 9.471861471861472e-06, "loss": 42.1439, "step": 4028 }, { "epoch": 95.9313432835821, "grad_norm": 19.01448631286621, "learning_rate": 9.469696969696971e-06, "loss": 42.2933, "step": 4029 }, { "epoch": 95.95522388059702, "grad_norm": 21.85147476196289, "learning_rate": 9.46753246753247e-06, "loss": 42.1108, "step": 4030 }, { "epoch": 95.97910447761194, "grad_norm": 18.99871826171875, "learning_rate": 9.465367965367967e-06, "loss": 42.5071, "step": 4031 }, { "epoch": 96.0, "grad_norm": 16.825069427490234, "learning_rate": 9.463203463203465e-06, "loss": 37.1366, "step": 4032 }, { "epoch": 96.02388059701492, "grad_norm": 19.010360717773438, "learning_rate": 9.461038961038963e-06, "loss": 42.3766, "step": 4033 }, { "epoch": 96.04776119402985, "grad_norm": 22.50554656982422, "learning_rate": 9.45887445887446e-06, "loss": 42.5748, "step": 4034 }, { "epoch": 96.07164179104478, "grad_norm": 16.554548263549805, "learning_rate": 9.456709956709958e-06, "loss": 41.9278, "step": 4035 }, { "epoch": 96.0955223880597, "grad_norm": 23.447858810424805, "learning_rate": 9.454545454545456e-06, "loss": 42.3679, "step": 4036 }, { "epoch": 96.11940298507463, "grad_norm": 23.394611358642578, "learning_rate": 9.452380952380952e-06, "loss": 42.4519, "step": 4037 }, { "epoch": 96.14328358208955, "grad_norm": 17.726774215698242, "learning_rate": 9.45021645021645e-06, "loss": 41.8001, "step": 4038 }, { "epoch": 96.16716417910447, "grad_norm": 19.8607177734375, "learning_rate": 9.448051948051948e-06, "loss": 42.2731, "step": 4039 }, { "epoch": 96.1910447761194, "grad_norm": 24.878158569335938, "learning_rate": 9.445887445887445e-06, "loss": 42.4626, "step": 4040 }, { "epoch": 96.21492537313434, "grad_norm": 18.564037322998047, "learning_rate": 9.443722943722945e-06, "loss": 42.4094, "step": 4041 }, { "epoch": 96.23880597014926, "grad_norm": 29.672882080078125, "learning_rate": 9.441558441558443e-06, "loss": 41.8399, "step": 4042 }, { "epoch": 96.26268656716418, "grad_norm": 21.15955924987793, "learning_rate": 9.43939393939394e-06, "loss": 41.8022, "step": 4043 }, { "epoch": 96.2865671641791, "grad_norm": 19.90737533569336, "learning_rate": 9.437229437229438e-06, "loss": 41.5356, "step": 4044 }, { "epoch": 96.31044776119403, "grad_norm": 27.035198211669922, "learning_rate": 9.435064935064936e-06, "loss": 42.3891, "step": 4045 }, { "epoch": 96.33432835820895, "grad_norm": 19.44938850402832, "learning_rate": 9.432900432900434e-06, "loss": 41.7612, "step": 4046 }, { "epoch": 96.35820895522389, "grad_norm": 32.34653091430664, "learning_rate": 9.430735930735932e-06, "loss": 42.9741, "step": 4047 }, { "epoch": 96.38208955223881, "grad_norm": 23.551259994506836, "learning_rate": 9.42857142857143e-06, "loss": 41.3423, "step": 4048 }, { "epoch": 96.40597014925373, "grad_norm": 36.44496536254883, "learning_rate": 9.426406926406927e-06, "loss": 42.8346, "step": 4049 }, { "epoch": 96.42985074626866, "grad_norm": 28.864904403686523, "learning_rate": 9.424242424242425e-06, "loss": 41.8315, "step": 4050 }, { "epoch": 96.45373134328358, "grad_norm": 35.26904296875, "learning_rate": 9.422077922077923e-06, "loss": 41.5353, "step": 4051 }, { "epoch": 96.4776119402985, "grad_norm": 32.65912628173828, "learning_rate": 9.41991341991342e-06, "loss": 41.5928, "step": 4052 }, { "epoch": 96.50149253731344, "grad_norm": 31.7542667388916, "learning_rate": 9.417748917748919e-06, "loss": 41.4377, "step": 4053 }, { "epoch": 96.52537313432836, "grad_norm": 31.60584259033203, "learning_rate": 9.415584415584416e-06, "loss": 42.9119, "step": 4054 }, { "epoch": 96.54925373134328, "grad_norm": 31.597043991088867, "learning_rate": 9.413419913419914e-06, "loss": 42.2946, "step": 4055 }, { "epoch": 96.57313432835821, "grad_norm": 25.871496200561523, "learning_rate": 9.411255411255412e-06, "loss": 42.3518, "step": 4056 }, { "epoch": 96.59701492537313, "grad_norm": 38.121971130371094, "learning_rate": 9.40909090909091e-06, "loss": 41.3268, "step": 4057 }, { "epoch": 96.62089552238805, "grad_norm": 31.4708309173584, "learning_rate": 9.406926406926408e-06, "loss": 42.001, "step": 4058 }, { "epoch": 96.64477611940299, "grad_norm": 32.240604400634766, "learning_rate": 9.404761904761905e-06, "loss": 43.7004, "step": 4059 }, { "epoch": 96.66865671641791, "grad_norm": 29.972900390625, "learning_rate": 9.402597402597403e-06, "loss": 40.8066, "step": 4060 }, { "epoch": 96.69253731343284, "grad_norm": 28.71061897277832, "learning_rate": 9.400432900432901e-06, "loss": 43.052, "step": 4061 }, { "epoch": 96.71641791044776, "grad_norm": 23.861024856567383, "learning_rate": 9.398268398268399e-06, "loss": 42.5682, "step": 4062 }, { "epoch": 96.74029850746268, "grad_norm": 34.21725845336914, "learning_rate": 9.396103896103896e-06, "loss": 42.418, "step": 4063 }, { "epoch": 96.7641791044776, "grad_norm": 22.93166732788086, "learning_rate": 9.393939393939396e-06, "loss": 42.3199, "step": 4064 }, { "epoch": 96.78805970149254, "grad_norm": 35.91544723510742, "learning_rate": 9.391774891774894e-06, "loss": 40.5579, "step": 4065 }, { "epoch": 96.81194029850747, "grad_norm": 29.065799713134766, "learning_rate": 9.38961038961039e-06, "loss": 40.6409, "step": 4066 }, { "epoch": 96.83582089552239, "grad_norm": 33.4009895324707, "learning_rate": 9.387445887445888e-06, "loss": 42.934, "step": 4067 }, { "epoch": 96.85970149253731, "grad_norm": 32.16798782348633, "learning_rate": 9.385281385281385e-06, "loss": 42.4209, "step": 4068 }, { "epoch": 96.88358208955223, "grad_norm": 27.158573150634766, "learning_rate": 9.383116883116883e-06, "loss": 42.3285, "step": 4069 }, { "epoch": 96.90746268656716, "grad_norm": 28.05286407470703, "learning_rate": 9.380952380952381e-06, "loss": 43.6253, "step": 4070 }, { "epoch": 96.9313432835821, "grad_norm": 31.17296028137207, "learning_rate": 9.378787878787879e-06, "loss": 43.8199, "step": 4071 }, { "epoch": 96.95522388059702, "grad_norm": 25.182817459106445, "learning_rate": 9.376623376623377e-06, "loss": 41.0505, "step": 4072 }, { "epoch": 96.97910447761194, "grad_norm": 35.5045166015625, "learning_rate": 9.374458874458874e-06, "loss": 42.9265, "step": 4073 }, { "epoch": 97.0, "grad_norm": 23.445880889892578, "learning_rate": 9.372294372294372e-06, "loss": 36.5814, "step": 4074 }, { "epoch": 97.02388059701492, "grad_norm": 28.6851806640625, "learning_rate": 9.37012987012987e-06, "loss": 41.6689, "step": 4075 }, { "epoch": 97.04776119402985, "grad_norm": 22.152568817138672, "learning_rate": 9.36796536796537e-06, "loss": 41.6459, "step": 4076 }, { "epoch": 97.07164179104478, "grad_norm": 35.39872360229492, "learning_rate": 9.365800865800867e-06, "loss": 41.9915, "step": 4077 }, { "epoch": 97.0955223880597, "grad_norm": 27.264184951782227, "learning_rate": 9.363636363636365e-06, "loss": 42.6117, "step": 4078 }, { "epoch": 97.11940298507463, "grad_norm": 36.01545715332031, "learning_rate": 9.361471861471863e-06, "loss": 43.7312, "step": 4079 }, { "epoch": 97.14328358208955, "grad_norm": 34.436134338378906, "learning_rate": 9.35930735930736e-06, "loss": 42.597, "step": 4080 }, { "epoch": 97.16716417910447, "grad_norm": 24.796520233154297, "learning_rate": 9.357142857142859e-06, "loss": 42.432, "step": 4081 }, { "epoch": 97.1910447761194, "grad_norm": 26.330299377441406, "learning_rate": 9.354978354978356e-06, "loss": 42.1124, "step": 4082 }, { "epoch": 97.21492537313434, "grad_norm": 27.518465042114258, "learning_rate": 9.352813852813854e-06, "loss": 41.3868, "step": 4083 }, { "epoch": 97.23880597014926, "grad_norm": 25.9599552154541, "learning_rate": 9.350649350649352e-06, "loss": 40.6964, "step": 4084 }, { "epoch": 97.26268656716418, "grad_norm": 33.074974060058594, "learning_rate": 9.34848484848485e-06, "loss": 42.1326, "step": 4085 }, { "epoch": 97.2865671641791, "grad_norm": 29.895139694213867, "learning_rate": 9.346320346320346e-06, "loss": 42.1873, "step": 4086 }, { "epoch": 97.31044776119403, "grad_norm": 32.33000946044922, "learning_rate": 9.344155844155844e-06, "loss": 42.5366, "step": 4087 }, { "epoch": 97.33432835820895, "grad_norm": 28.283353805541992, "learning_rate": 9.341991341991343e-06, "loss": 41.8857, "step": 4088 }, { "epoch": 97.35820895522389, "grad_norm": 27.200963973999023, "learning_rate": 9.339826839826841e-06, "loss": 41.4329, "step": 4089 }, { "epoch": 97.38208955223881, "grad_norm": 27.918405532836914, "learning_rate": 9.337662337662339e-06, "loss": 41.4236, "step": 4090 }, { "epoch": 97.40597014925373, "grad_norm": 24.885950088500977, "learning_rate": 9.335497835497837e-06, "loss": 41.8926, "step": 4091 }, { "epoch": 97.42985074626866, "grad_norm": 24.703994750976562, "learning_rate": 9.333333333333334e-06, "loss": 42.3685, "step": 4092 }, { "epoch": 97.45373134328358, "grad_norm": 32.68978500366211, "learning_rate": 9.331168831168832e-06, "loss": 41.5668, "step": 4093 }, { "epoch": 97.4776119402985, "grad_norm": 27.5683536529541, "learning_rate": 9.32900432900433e-06, "loss": 42.4125, "step": 4094 }, { "epoch": 97.50149253731344, "grad_norm": 30.541976928710938, "learning_rate": 9.326839826839828e-06, "loss": 40.7424, "step": 4095 }, { "epoch": 97.52537313432836, "grad_norm": 28.704875946044922, "learning_rate": 9.324675324675326e-06, "loss": 42.0617, "step": 4096 }, { "epoch": 97.54925373134328, "grad_norm": 29.45570945739746, "learning_rate": 9.322510822510823e-06, "loss": 42.2572, "step": 4097 }, { "epoch": 97.57313432835821, "grad_norm": 29.299041748046875, "learning_rate": 9.320346320346321e-06, "loss": 42.5461, "step": 4098 }, { "epoch": 97.59701492537313, "grad_norm": 28.30889320373535, "learning_rate": 9.318181818181819e-06, "loss": 41.9226, "step": 4099 }, { "epoch": 97.62089552238805, "grad_norm": 23.587907791137695, "learning_rate": 9.316017316017317e-06, "loss": 42.0195, "step": 4100 }, { "epoch": 97.64477611940299, "grad_norm": 31.324934005737305, "learning_rate": 9.313852813852815e-06, "loss": 41.731, "step": 4101 }, { "epoch": 97.66865671641791, "grad_norm": 25.146387100219727, "learning_rate": 9.311688311688312e-06, "loss": 41.8452, "step": 4102 }, { "epoch": 97.69253731343284, "grad_norm": NaN, "learning_rate": 9.30952380952381e-06, "loss": 73.1578, "step": 4103 }, { "epoch": 97.71641791044776, "grad_norm": 33.619197845458984, "learning_rate": 9.30952380952381e-06, "loss": 42.6151, "step": 4104 }, { "epoch": 97.74029850746268, "grad_norm": 30.636676788330078, "learning_rate": 9.307359307359308e-06, "loss": 43.1022, "step": 4105 }, { "epoch": 97.7641791044776, "grad_norm": 30.259347915649414, "learning_rate": 9.305194805194806e-06, "loss": 42.0399, "step": 4106 }, { "epoch": 97.78805970149254, "grad_norm": 28.927536010742188, "learning_rate": 9.303030303030303e-06, "loss": 42.5658, "step": 4107 }, { "epoch": 97.81194029850747, "grad_norm": 27.93010139465332, "learning_rate": 9.300865800865801e-06, "loss": 41.5662, "step": 4108 }, { "epoch": 97.83582089552239, "grad_norm": 25.34616470336914, "learning_rate": 9.298701298701299e-06, "loss": 43.0076, "step": 4109 }, { "epoch": 97.85970149253731, "grad_norm": 28.407508850097656, "learning_rate": 9.296536796536797e-06, "loss": 43.035, "step": 4110 }, { "epoch": 97.88358208955223, "grad_norm": 22.58799934387207, "learning_rate": 9.294372294372295e-06, "loss": 42.5904, "step": 4111 }, { "epoch": 97.90746268656716, "grad_norm": 30.51255989074707, "learning_rate": 9.292207792207792e-06, "loss": 40.6314, "step": 4112 }, { "epoch": 97.9313432835821, "grad_norm": NaN, "learning_rate": 9.290043290043292e-06, "loss": 47.9418, "step": 4113 }, { "epoch": 97.95522388059702, "grad_norm": 24.9912166595459, "learning_rate": 9.290043290043292e-06, "loss": 42.5057, "step": 4114 }, { "epoch": 97.97910447761194, "grad_norm": 29.492568969726562, "learning_rate": 9.28787878787879e-06, "loss": 42.4723, "step": 4115 }, { "epoch": 98.0, "grad_norm": 22.984312057495117, "learning_rate": 9.285714285714288e-06, "loss": 36.1324, "step": 4116 }, { "epoch": 98.02388059701492, "grad_norm": 26.956518173217773, "learning_rate": 9.283549783549785e-06, "loss": 42.6798, "step": 4117 }, { "epoch": 98.04776119402985, "grad_norm": 23.24462890625, "learning_rate": 9.281385281385281e-06, "loss": 42.5043, "step": 4118 }, { "epoch": 98.07164179104478, "grad_norm": 32.33470153808594, "learning_rate": 9.27922077922078e-06, "loss": 42.0607, "step": 4119 }, { "epoch": 98.0955223880597, "grad_norm": 30.606536865234375, "learning_rate": 9.277056277056277e-06, "loss": 42.3543, "step": 4120 }, { "epoch": 98.11940298507463, "grad_norm": 26.795475006103516, "learning_rate": 9.274891774891775e-06, "loss": 41.33, "step": 4121 }, { "epoch": 98.14328358208955, "grad_norm": 23.049283981323242, "learning_rate": 9.272727272727273e-06, "loss": 41.2262, "step": 4122 }, { "epoch": 98.16716417910447, "grad_norm": 30.961490631103516, "learning_rate": 9.27056277056277e-06, "loss": 42.3126, "step": 4123 }, { "epoch": 98.1910447761194, "grad_norm": 25.457870483398438, "learning_rate": 9.268398268398268e-06, "loss": 43.0498, "step": 4124 }, { "epoch": 98.21492537313434, "grad_norm": 28.787675857543945, "learning_rate": 9.266233766233766e-06, "loss": 41.5441, "step": 4125 }, { "epoch": 98.23880597014926, "grad_norm": 23.33895492553711, "learning_rate": 9.264069264069266e-06, "loss": 41.2298, "step": 4126 }, { "epoch": 98.26268656716418, "grad_norm": 28.43191146850586, "learning_rate": 9.261904761904763e-06, "loss": 43.8188, "step": 4127 }, { "epoch": 98.2865671641791, "grad_norm": 22.150148391723633, "learning_rate": 9.259740259740261e-06, "loss": 41.9418, "step": 4128 }, { "epoch": 98.31044776119403, "grad_norm": 32.84375762939453, "learning_rate": 9.257575757575759e-06, "loss": 42.181, "step": 4129 }, { "epoch": 98.33432835820895, "grad_norm": 27.58066177368164, "learning_rate": 9.255411255411257e-06, "loss": 41.9053, "step": 4130 }, { "epoch": 98.35820895522389, "grad_norm": 26.275638580322266, "learning_rate": 9.253246753246755e-06, "loss": 42.643, "step": 4131 }, { "epoch": 98.38208955223881, "grad_norm": 26.407045364379883, "learning_rate": 9.251082251082252e-06, "loss": 41.2759, "step": 4132 }, { "epoch": 98.40597014925373, "grad_norm": 28.262874603271484, "learning_rate": 9.24891774891775e-06, "loss": 41.2746, "step": 4133 }, { "epoch": 98.42985074626866, "grad_norm": 25.495405197143555, "learning_rate": 9.246753246753248e-06, "loss": 41.17, "step": 4134 }, { "epoch": 98.45373134328358, "grad_norm": 30.302942276000977, "learning_rate": 9.244588744588746e-06, "loss": 40.8692, "step": 4135 }, { "epoch": 98.4776119402985, "grad_norm": 26.874711990356445, "learning_rate": 9.242424242424244e-06, "loss": 42.9695, "step": 4136 }, { "epoch": 98.50149253731344, "grad_norm": 27.96731948852539, "learning_rate": 9.240259740259741e-06, "loss": 41.0995, "step": 4137 }, { "epoch": 98.52537313432836, "grad_norm": 26.49541664123535, "learning_rate": 9.238095238095239e-06, "loss": 42.3258, "step": 4138 }, { "epoch": 98.54925373134328, "grad_norm": 24.790346145629883, "learning_rate": 9.235930735930737e-06, "loss": 42.5989, "step": 4139 }, { "epoch": 98.57313432835821, "grad_norm": 22.83180809020996, "learning_rate": 9.233766233766235e-06, "loss": 41.4101, "step": 4140 }, { "epoch": 98.59701492537313, "grad_norm": 27.18695640563965, "learning_rate": 9.231601731601733e-06, "loss": 42.1914, "step": 4141 }, { "epoch": 98.62089552238805, "grad_norm": 23.35308074951172, "learning_rate": 9.22943722943723e-06, "loss": 42.3357, "step": 4142 }, { "epoch": 98.64477611940299, "grad_norm": 32.9411735534668, "learning_rate": 9.227272727272728e-06, "loss": 42.2151, "step": 4143 }, { "epoch": 98.66865671641791, "grad_norm": 28.968116760253906, "learning_rate": 9.225108225108226e-06, "loss": 42.5766, "step": 4144 }, { "epoch": 98.69253731343284, "grad_norm": 26.254579544067383, "learning_rate": 9.222943722943724e-06, "loss": 42.5968, "step": 4145 }, { "epoch": 98.71641791044776, "grad_norm": 27.665916442871094, "learning_rate": 9.220779220779221e-06, "loss": 41.0831, "step": 4146 }, { "epoch": 98.74029850746268, "grad_norm": 29.594675064086914, "learning_rate": 9.21861471861472e-06, "loss": 42.1963, "step": 4147 }, { "epoch": 98.7641791044776, "grad_norm": 23.506603240966797, "learning_rate": 9.216450216450217e-06, "loss": 41.9209, "step": 4148 }, { "epoch": 98.78805970149254, "grad_norm": 32.939395904541016, "learning_rate": 9.214285714285715e-06, "loss": 42.0637, "step": 4149 }, { "epoch": 98.81194029850747, "grad_norm": 27.35706901550293, "learning_rate": 9.212121212121213e-06, "loss": 42.4936, "step": 4150 }, { "epoch": 98.83582089552239, "grad_norm": 31.6049861907959, "learning_rate": 9.20995670995671e-06, "loss": 43.5351, "step": 4151 }, { "epoch": 98.85970149253731, "grad_norm": 26.57269287109375, "learning_rate": 9.207792207792208e-06, "loss": 42.2598, "step": 4152 }, { "epoch": 98.88358208955223, "grad_norm": 30.60957908630371, "learning_rate": 9.205627705627706e-06, "loss": 42.3751, "step": 4153 }, { "epoch": 98.90746268656716, "grad_norm": 28.574939727783203, "learning_rate": 9.203463203463204e-06, "loss": 41.8665, "step": 4154 }, { "epoch": 98.9313432835821, "grad_norm": 24.66292953491211, "learning_rate": 9.201298701298702e-06, "loss": 42.2066, "step": 4155 }, { "epoch": 98.95522388059702, "grad_norm": 23.727333068847656, "learning_rate": 9.1991341991342e-06, "loss": 41.3947, "step": 4156 }, { "epoch": 98.97910447761194, "grad_norm": 27.1662654876709, "learning_rate": 9.196969696969697e-06, "loss": 42.752, "step": 4157 }, { "epoch": 99.0, "grad_norm": 19.463891983032227, "learning_rate": 9.194805194805195e-06, "loss": 35.6173, "step": 4158 }, { "epoch": 99.02388059701492, "grad_norm": 31.107654571533203, "learning_rate": 9.192640692640693e-06, "loss": 42.7329, "step": 4159 }, { "epoch": 99.04776119402985, "grad_norm": 26.082523345947266, "learning_rate": 9.19047619047619e-06, "loss": 43.3724, "step": 4160 }, { "epoch": 99.07164179104478, "grad_norm": 23.824567794799805, "learning_rate": 9.188311688311688e-06, "loss": 42.6574, "step": 4161 }, { "epoch": 99.0955223880597, "grad_norm": 23.710350036621094, "learning_rate": 9.186147186147188e-06, "loss": 41.6831, "step": 4162 }, { "epoch": 99.11940298507463, "grad_norm": 28.668537139892578, "learning_rate": 9.183982683982686e-06, "loss": 41.099, "step": 4163 }, { "epoch": 99.14328358208955, "grad_norm": 21.060327529907227, "learning_rate": 9.181818181818184e-06, "loss": 43.0679, "step": 4164 }, { "epoch": 99.16716417910447, "grad_norm": 25.86065673828125, "learning_rate": 9.179653679653681e-06, "loss": 42.248, "step": 4165 }, { "epoch": 99.1910447761194, "grad_norm": 20.043672561645508, "learning_rate": 9.177489177489179e-06, "loss": 41.114, "step": 4166 }, { "epoch": 99.21492537313434, "grad_norm": 25.1352481842041, "learning_rate": 9.175324675324675e-06, "loss": 40.9968, "step": 4167 }, { "epoch": 99.23880597014926, "grad_norm": 20.042200088500977, "learning_rate": 9.173160173160173e-06, "loss": 41.9535, "step": 4168 }, { "epoch": 99.26268656716418, "grad_norm": 27.261369705200195, "learning_rate": 9.17099567099567e-06, "loss": 42.6293, "step": 4169 }, { "epoch": 99.2865671641791, "grad_norm": 23.163576126098633, "learning_rate": 9.168831168831169e-06, "loss": 41.9948, "step": 4170 }, { "epoch": 99.31044776119403, "grad_norm": 27.297080993652344, "learning_rate": 9.166666666666666e-06, "loss": 41.4716, "step": 4171 }, { "epoch": 99.33432835820895, "grad_norm": 22.44979476928711, "learning_rate": 9.164502164502164e-06, "loss": 42.406, "step": 4172 }, { "epoch": 99.35820895522389, "grad_norm": 23.482084274291992, "learning_rate": 9.162337662337664e-06, "loss": 41.5008, "step": 4173 }, { "epoch": 99.38208955223881, "grad_norm": 22.505319595336914, "learning_rate": 9.160173160173162e-06, "loss": 40.9368, "step": 4174 }, { "epoch": 99.40597014925373, "grad_norm": 24.250532150268555, "learning_rate": 9.15800865800866e-06, "loss": 40.7122, "step": 4175 }, { "epoch": 99.42985074626866, "grad_norm": 23.2113037109375, "learning_rate": 9.155844155844157e-06, "loss": 41.7559, "step": 4176 }, { "epoch": 99.45373134328358, "grad_norm": 18.7581787109375, "learning_rate": 9.153679653679655e-06, "loss": 41.661, "step": 4177 }, { "epoch": 99.4776119402985, "grad_norm": 17.8604793548584, "learning_rate": 9.151515151515153e-06, "loss": 41.51, "step": 4178 }, { "epoch": 99.50149253731344, "grad_norm": 16.258312225341797, "learning_rate": 9.14935064935065e-06, "loss": 41.2024, "step": 4179 }, { "epoch": 99.52537313432836, "grad_norm": 16.66613006591797, "learning_rate": 9.147186147186148e-06, "loss": 42.5017, "step": 4180 }, { "epoch": 99.54925373134328, "grad_norm": 15.366393089294434, "learning_rate": 9.145021645021646e-06, "loss": 41.6167, "step": 4181 }, { "epoch": 99.57313432835821, "grad_norm": 23.028663635253906, "learning_rate": 9.142857142857144e-06, "loss": 42.308, "step": 4182 }, { "epoch": 99.59701492537313, "grad_norm": 16.91287612915039, "learning_rate": 9.140692640692642e-06, "loss": 43.1037, "step": 4183 }, { "epoch": 99.62089552238805, "grad_norm": 19.781919479370117, "learning_rate": 9.13852813852814e-06, "loss": 42.3187, "step": 4184 }, { "epoch": 99.64477611940299, "grad_norm": 18.985305786132812, "learning_rate": 9.136363636363637e-06, "loss": 41.971, "step": 4185 }, { "epoch": 99.66865671641791, "grad_norm": 17.393688201904297, "learning_rate": 9.134199134199135e-06, "loss": 41.1467, "step": 4186 }, { "epoch": 99.69253731343284, "grad_norm": 19.685924530029297, "learning_rate": 9.132034632034633e-06, "loss": 41.822, "step": 4187 }, { "epoch": 99.71641791044776, "grad_norm": 19.761327743530273, "learning_rate": 9.12987012987013e-06, "loss": 42.1768, "step": 4188 }, { "epoch": 99.74029850746268, "grad_norm": 16.2159423828125, "learning_rate": 9.127705627705628e-06, "loss": 42.9327, "step": 4189 }, { "epoch": 99.7641791044776, "grad_norm": 21.257530212402344, "learning_rate": 9.125541125541126e-06, "loss": 42.556, "step": 4190 }, { "epoch": 99.78805970149254, "grad_norm": NaN, "learning_rate": 9.123376623376624e-06, "loss": 53.9793, "step": 4191 }, { "epoch": 99.81194029850747, "grad_norm": 19.869991302490234, "learning_rate": 9.123376623376624e-06, "loss": 41.4833, "step": 4192 }, { "epoch": 99.83582089552239, "grad_norm": 17.66855239868164, "learning_rate": 9.121212121212122e-06, "loss": 41.6514, "step": 4193 }, { "epoch": 99.85970149253731, "grad_norm": 19.992225646972656, "learning_rate": 9.11904761904762e-06, "loss": 43.4129, "step": 4194 }, { "epoch": 99.88358208955223, "grad_norm": 23.21436882019043, "learning_rate": 9.116883116883117e-06, "loss": 43.2426, "step": 4195 }, { "epoch": 99.90746268656716, "grad_norm": 18.16109848022461, "learning_rate": 9.114718614718615e-06, "loss": 41.9741, "step": 4196 }, { "epoch": 99.9313432835821, "grad_norm": 22.761810302734375, "learning_rate": 9.112554112554113e-06, "loss": 41.4668, "step": 4197 }, { "epoch": 99.95522388059702, "grad_norm": 21.3942928314209, "learning_rate": 9.110389610389611e-06, "loss": 41.6686, "step": 4198 }, { "epoch": 99.97910447761194, "grad_norm": 17.734172821044922, "learning_rate": 9.108225108225109e-06, "loss": 41.746, "step": 4199 }, { "epoch": 100.0, "grad_norm": 22.795557022094727, "learning_rate": 9.106060606060606e-06, "loss": 37.4113, "step": 4200 }, { "epoch": 100.02388059701492, "grad_norm": 18.693927764892578, "learning_rate": 9.103896103896104e-06, "loss": 41.1692, "step": 4201 }, { "epoch": 100.04776119402985, "grad_norm": 15.947311401367188, "learning_rate": 9.101731601731602e-06, "loss": 43.5011, "step": 4202 }, { "epoch": 100.07164179104478, "grad_norm": 24.349090576171875, "learning_rate": 9.0995670995671e-06, "loss": 41.954, "step": 4203 }, { "epoch": 100.0955223880597, "grad_norm": 18.305612564086914, "learning_rate": 9.097402597402598e-06, "loss": 41.7676, "step": 4204 }, { "epoch": 100.11940298507463, "grad_norm": 29.68235206604004, "learning_rate": 9.095238095238095e-06, "loss": 40.8579, "step": 4205 }, { "epoch": 100.14328358208955, "grad_norm": 24.512508392333984, "learning_rate": 9.093073593073593e-06, "loss": 40.7238, "step": 4206 }, { "epoch": 100.16716417910447, "grad_norm": 24.545705795288086, "learning_rate": 9.090909090909091e-06, "loss": 42.7197, "step": 4207 }, { "epoch": 100.1910447761194, "grad_norm": 18.792917251586914, "learning_rate": 9.088744588744589e-06, "loss": 40.8385, "step": 4208 }, { "epoch": 100.21492537313434, "grad_norm": 21.766145706176758, "learning_rate": 9.086580086580087e-06, "loss": 41.3234, "step": 4209 }, { "epoch": 100.23880597014926, "grad_norm": 17.32309341430664, "learning_rate": 9.084415584415586e-06, "loss": 40.6989, "step": 4210 }, { "epoch": 100.26268656716418, "grad_norm": 17.80112648010254, "learning_rate": 9.082251082251084e-06, "loss": 41.0043, "step": 4211 }, { "epoch": 100.2865671641791, "grad_norm": 15.762267112731934, "learning_rate": 9.080086580086582e-06, "loss": 42.5453, "step": 4212 }, { "epoch": 100.31044776119403, "grad_norm": 15.99219036102295, "learning_rate": 9.07792207792208e-06, "loss": 41.9223, "step": 4213 }, { "epoch": 100.33432835820895, "grad_norm": 21.16149139404297, "learning_rate": 9.075757575757577e-06, "loss": 41.7332, "step": 4214 }, { "epoch": 100.35820895522389, "grad_norm": 16.26340675354004, "learning_rate": 9.073593073593075e-06, "loss": 41.9333, "step": 4215 }, { "epoch": 100.38208955223881, "grad_norm": 22.789945602416992, "learning_rate": 9.071428571428573e-06, "loss": 41.5922, "step": 4216 }, { "epoch": 100.40597014925373, "grad_norm": 20.777421951293945, "learning_rate": 9.06926406926407e-06, "loss": 42.4934, "step": 4217 }, { "epoch": 100.42985074626866, "grad_norm": 20.417619705200195, "learning_rate": 9.067099567099567e-06, "loss": 42.0611, "step": 4218 }, { "epoch": 100.45373134328358, "grad_norm": 17.323135375976562, "learning_rate": 9.064935064935065e-06, "loss": 41.4595, "step": 4219 }, { "epoch": 100.4776119402985, "grad_norm": 17.62958335876465, "learning_rate": 9.062770562770562e-06, "loss": 42.1578, "step": 4220 }, { "epoch": 100.50149253731344, "grad_norm": 19.73848533630371, "learning_rate": 9.06060606060606e-06, "loss": 40.6611, "step": 4221 }, { "epoch": 100.52537313432836, "grad_norm": 15.945398330688477, "learning_rate": 9.05844155844156e-06, "loss": 41.9703, "step": 4222 }, { "epoch": 100.54925373134328, "grad_norm": 31.24019432067871, "learning_rate": 9.056277056277057e-06, "loss": 42.1433, "step": 4223 }, { "epoch": 100.57313432835821, "grad_norm": 21.933677673339844, "learning_rate": 9.054112554112555e-06, "loss": 41.873, "step": 4224 }, { "epoch": 100.59701492537313, "grad_norm": 31.41733741760254, "learning_rate": 9.051948051948053e-06, "loss": 42.7139, "step": 4225 }, { "epoch": 100.62089552238805, "grad_norm": 21.998600006103516, "learning_rate": 9.049783549783551e-06, "loss": 42.7483, "step": 4226 }, { "epoch": 100.64477611940299, "grad_norm": 34.37179183959961, "learning_rate": 9.047619047619049e-06, "loss": 41.3319, "step": 4227 }, { "epoch": 100.66865671641791, "grad_norm": 27.14617156982422, "learning_rate": 9.045454545454546e-06, "loss": 42.022, "step": 4228 }, { "epoch": 100.69253731343284, "grad_norm": 37.454708099365234, "learning_rate": 9.043290043290044e-06, "loss": 41.9875, "step": 4229 }, { "epoch": 100.71641791044776, "grad_norm": 32.32929229736328, "learning_rate": 9.041125541125542e-06, "loss": 43.1461, "step": 4230 }, { "epoch": 100.74029850746268, "grad_norm": 33.369842529296875, "learning_rate": 9.03896103896104e-06, "loss": 42.1309, "step": 4231 }, { "epoch": 100.7641791044776, "grad_norm": 26.55228042602539, "learning_rate": 9.036796536796538e-06, "loss": 42.6242, "step": 4232 }, { "epoch": 100.78805970149254, "grad_norm": 30.329452514648438, "learning_rate": 9.034632034632035e-06, "loss": 41.174, "step": 4233 }, { "epoch": 100.81194029850747, "grad_norm": 32.0432014465332, "learning_rate": 9.032467532467533e-06, "loss": 43.1256, "step": 4234 }, { "epoch": 100.83582089552239, "grad_norm": 29.122236251831055, "learning_rate": 9.030303030303031e-06, "loss": 41.3778, "step": 4235 }, { "epoch": 100.85970149253731, "grad_norm": 24.6899471282959, "learning_rate": 9.028138528138529e-06, "loss": 42.2167, "step": 4236 }, { "epoch": 100.88358208955223, "grad_norm": 31.051576614379883, "learning_rate": 9.025974025974027e-06, "loss": 42.5137, "step": 4237 }, { "epoch": 100.90746268656716, "grad_norm": 27.56793785095215, "learning_rate": 9.023809523809524e-06, "loss": 42.2763, "step": 4238 }, { "epoch": 100.9313432835821, "grad_norm": 35.045108795166016, "learning_rate": 9.021645021645022e-06, "loss": 43.3116, "step": 4239 }, { "epoch": 100.95522388059702, "grad_norm": 28.35376739501953, "learning_rate": 9.01948051948052e-06, "loss": 42.4737, "step": 4240 }, { "epoch": 100.97910447761194, "grad_norm": 29.537580490112305, "learning_rate": 9.017316017316018e-06, "loss": 42.2073, "step": 4241 }, { "epoch": 101.0, "grad_norm": 24.736759185791016, "learning_rate": 9.015151515151516e-06, "loss": 37.5375, "step": 4242 }, { "epoch": 101.02388059701492, "grad_norm": 27.93048667907715, "learning_rate": 9.012987012987013e-06, "loss": 42.1642, "step": 4243 }, { "epoch": 101.04776119402985, "grad_norm": 24.460664749145508, "learning_rate": 9.010822510822511e-06, "loss": 42.1769, "step": 4244 }, { "epoch": 101.07164179104478, "grad_norm": 22.52399253845215, "learning_rate": 9.008658008658009e-06, "loss": 41.99, "step": 4245 }, { "epoch": 101.0955223880597, "grad_norm": 19.33254623413086, "learning_rate": 9.006493506493509e-06, "loss": 40.422, "step": 4246 }, { "epoch": 101.11940298507463, "grad_norm": 22.645910263061523, "learning_rate": 9.004329004329005e-06, "loss": 42.8041, "step": 4247 }, { "epoch": 101.14328358208955, "grad_norm": 20.89433479309082, "learning_rate": 9.002164502164502e-06, "loss": 43.0258, "step": 4248 }, { "epoch": 101.16716417910447, "grad_norm": 19.612567901611328, "learning_rate": 9e-06, "loss": 41.4478, "step": 4249 }, { "epoch": 101.1910447761194, "grad_norm": 19.565265655517578, "learning_rate": 8.997835497835498e-06, "loss": 42.6328, "step": 4250 }, { "epoch": 101.21492537313434, "grad_norm": 20.93030548095703, "learning_rate": 8.995670995670996e-06, "loss": 42.7268, "step": 4251 }, { "epoch": 101.23880597014926, "grad_norm": 18.67580795288086, "learning_rate": 8.993506493506494e-06, "loss": 43.3658, "step": 4252 }, { "epoch": 101.26268656716418, "grad_norm": 26.36067008972168, "learning_rate": 8.991341991341991e-06, "loss": 42.2089, "step": 4253 }, { "epoch": 101.2865671641791, "grad_norm": 19.841224670410156, "learning_rate": 8.98917748917749e-06, "loss": 40.842, "step": 4254 }, { "epoch": 101.31044776119403, "grad_norm": 26.14617156982422, "learning_rate": 8.987012987012987e-06, "loss": 43.1673, "step": 4255 }, { "epoch": 101.33432835820895, "grad_norm": 21.286962509155273, "learning_rate": 8.984848484848485e-06, "loss": 42.0463, "step": 4256 }, { "epoch": 101.35820895522389, "grad_norm": 26.335676193237305, "learning_rate": 8.982683982683983e-06, "loss": 41.8856, "step": 4257 }, { "epoch": 101.38208955223881, "grad_norm": 23.881567001342773, "learning_rate": 8.980519480519482e-06, "loss": 41.6253, "step": 4258 }, { "epoch": 101.40597014925373, "grad_norm": 21.65298843383789, "learning_rate": 8.97835497835498e-06, "loss": 42.0994, "step": 4259 }, { "epoch": 101.42985074626866, "grad_norm": 27.039722442626953, "learning_rate": 8.976190476190478e-06, "loss": 41.8836, "step": 4260 }, { "epoch": 101.45373134328358, "grad_norm": 20.1751766204834, "learning_rate": 8.974025974025975e-06, "loss": 41.1007, "step": 4261 }, { "epoch": 101.4776119402985, "grad_norm": 31.58852767944336, "learning_rate": 8.971861471861473e-06, "loss": 41.9793, "step": 4262 }, { "epoch": 101.50149253731344, "grad_norm": 21.907556533813477, "learning_rate": 8.969696969696971e-06, "loss": 41.509, "step": 4263 }, { "epoch": 101.52537313432836, "grad_norm": 32.310272216796875, "learning_rate": 8.967532467532469e-06, "loss": 41.1805, "step": 4264 }, { "epoch": 101.54925373134328, "grad_norm": 25.363170623779297, "learning_rate": 8.965367965367967e-06, "loss": 42.3668, "step": 4265 }, { "epoch": 101.57313432835821, "grad_norm": 29.320520401000977, "learning_rate": 8.963203463203464e-06, "loss": 41.7248, "step": 4266 }, { "epoch": 101.59701492537313, "grad_norm": 24.637983322143555, "learning_rate": 8.96103896103896e-06, "loss": 40.1595, "step": 4267 }, { "epoch": 101.62089552238805, "grad_norm": 32.69458770751953, "learning_rate": 8.958874458874458e-06, "loss": 41.6096, "step": 4268 }, { "epoch": 101.64477611940299, "grad_norm": 24.87364959716797, "learning_rate": 8.956709956709956e-06, "loss": 41.3295, "step": 4269 }, { "epoch": 101.66865671641791, "grad_norm": 31.5223445892334, "learning_rate": 8.954545454545456e-06, "loss": 42.1731, "step": 4270 }, { "epoch": 101.69253731343284, "grad_norm": 29.047664642333984, "learning_rate": 8.952380952380953e-06, "loss": 41.8301, "step": 4271 }, { "epoch": 101.71641791044776, "grad_norm": 31.420434951782227, "learning_rate": 8.950216450216451e-06, "loss": 41.6502, "step": 4272 }, { "epoch": 101.74029850746268, "grad_norm": 28.40896224975586, "learning_rate": 8.948051948051949e-06, "loss": 41.7585, "step": 4273 }, { "epoch": 101.7641791044776, "grad_norm": 32.256263732910156, "learning_rate": 8.945887445887447e-06, "loss": 41.8508, "step": 4274 }, { "epoch": 101.78805970149254, "grad_norm": 30.496904373168945, "learning_rate": 8.943722943722945e-06, "loss": 41.7192, "step": 4275 }, { "epoch": 101.81194029850747, "grad_norm": 31.20074462890625, "learning_rate": 8.941558441558442e-06, "loss": 42.687, "step": 4276 }, { "epoch": 101.83582089552239, "grad_norm": 27.639835357666016, "learning_rate": 8.93939393939394e-06, "loss": 41.7068, "step": 4277 }, { "epoch": 101.85970149253731, "grad_norm": 31.692638397216797, "learning_rate": 8.937229437229438e-06, "loss": 42.8243, "step": 4278 }, { "epoch": 101.88358208955223, "grad_norm": 28.27922248840332, "learning_rate": 8.935064935064936e-06, "loss": 41.8772, "step": 4279 }, { "epoch": 101.90746268656716, "grad_norm": 28.70676040649414, "learning_rate": 8.932900432900434e-06, "loss": 41.682, "step": 4280 }, { "epoch": 101.9313432835821, "grad_norm": 27.140151977539062, "learning_rate": 8.930735930735931e-06, "loss": 42.078, "step": 4281 }, { "epoch": 101.95522388059702, "grad_norm": 25.135448455810547, "learning_rate": 8.92857142857143e-06, "loss": 42.2035, "step": 4282 }, { "epoch": 101.97910447761194, "grad_norm": 22.988903045654297, "learning_rate": 8.926406926406927e-06, "loss": 41.2573, "step": 4283 }, { "epoch": 102.0, "grad_norm": 25.694786071777344, "learning_rate": 8.924242424242425e-06, "loss": 36.1049, "step": 4284 }, { "epoch": 102.02388059701492, "grad_norm": 24.528118133544922, "learning_rate": 8.922077922077923e-06, "loss": 42.162, "step": 4285 }, { "epoch": 102.04776119402985, "grad_norm": 27.563627243041992, "learning_rate": 8.91991341991342e-06, "loss": 41.7018, "step": 4286 }, { "epoch": 102.07164179104478, "grad_norm": 23.374286651611328, "learning_rate": 8.917748917748918e-06, "loss": 42.4075, "step": 4287 }, { "epoch": 102.0955223880597, "grad_norm": 28.673614501953125, "learning_rate": 8.915584415584416e-06, "loss": 41.8272, "step": 4288 }, { "epoch": 102.11940298507463, "grad_norm": 24.432859420776367, "learning_rate": 8.913419913419914e-06, "loss": 41.7054, "step": 4289 }, { "epoch": 102.14328358208955, "grad_norm": 26.83321189880371, "learning_rate": 8.911255411255412e-06, "loss": 42.2169, "step": 4290 }, { "epoch": 102.16716417910447, "grad_norm": 21.222537994384766, "learning_rate": 8.90909090909091e-06, "loss": 42.164, "step": 4291 }, { "epoch": 102.1910447761194, "grad_norm": 32.05888748168945, "learning_rate": 8.906926406926407e-06, "loss": 42.0759, "step": 4292 }, { "epoch": 102.21492537313434, "grad_norm": 22.959369659423828, "learning_rate": 8.904761904761905e-06, "loss": 43.0785, "step": 4293 }, { "epoch": 102.23880597014926, "grad_norm": 37.53632736206055, "learning_rate": 8.902597402597405e-06, "loss": 42.1665, "step": 4294 }, { "epoch": 102.26268656716418, "grad_norm": 29.86913299560547, "learning_rate": 8.900432900432902e-06, "loss": 41.3932, "step": 4295 }, { "epoch": 102.2865671641791, "grad_norm": 31.11789894104004, "learning_rate": 8.8982683982684e-06, "loss": 43.0771, "step": 4296 }, { "epoch": 102.31044776119403, "grad_norm": 27.745323181152344, "learning_rate": 8.896103896103896e-06, "loss": 41.1395, "step": 4297 }, { "epoch": 102.33432835820895, "grad_norm": 25.368127822875977, "learning_rate": 8.893939393939394e-06, "loss": 42.7978, "step": 4298 }, { "epoch": 102.35820895522389, "grad_norm": 24.081409454345703, "learning_rate": 8.891774891774892e-06, "loss": 41.4698, "step": 4299 }, { "epoch": 102.38208955223881, "grad_norm": 24.39154815673828, "learning_rate": 8.88961038961039e-06, "loss": 41.6765, "step": 4300 }, { "epoch": 102.40597014925373, "grad_norm": 21.794816970825195, "learning_rate": 8.887445887445887e-06, "loss": 40.9793, "step": 4301 }, { "epoch": 102.42985074626866, "grad_norm": 24.50321388244629, "learning_rate": 8.885281385281385e-06, "loss": 41.3914, "step": 4302 }, { "epoch": 102.45373134328358, "grad_norm": 21.492965698242188, "learning_rate": 8.883116883116883e-06, "loss": 42.1772, "step": 4303 }, { "epoch": 102.4776119402985, "grad_norm": 25.231094360351562, "learning_rate": 8.88095238095238e-06, "loss": 41.6758, "step": 4304 }, { "epoch": 102.50149253731344, "grad_norm": 21.51530647277832, "learning_rate": 8.87878787878788e-06, "loss": 41.0819, "step": 4305 }, { "epoch": 102.52537313432836, "grad_norm": 21.023269653320312, "learning_rate": 8.876623376623378e-06, "loss": 41.446, "step": 4306 }, { "epoch": 102.54925373134328, "grad_norm": 25.81951904296875, "learning_rate": 8.874458874458876e-06, "loss": 41.3221, "step": 4307 }, { "epoch": 102.57313432835821, "grad_norm": 19.7045841217041, "learning_rate": 8.872294372294374e-06, "loss": 42.5273, "step": 4308 }, { "epoch": 102.59701492537313, "grad_norm": 30.536680221557617, "learning_rate": 8.870129870129871e-06, "loss": 40.7574, "step": 4309 }, { "epoch": 102.62089552238805, "grad_norm": 22.61910629272461, "learning_rate": 8.86796536796537e-06, "loss": 42.1551, "step": 4310 }, { "epoch": 102.64477611940299, "grad_norm": 31.215150833129883, "learning_rate": 8.865800865800867e-06, "loss": 42.3013, "step": 4311 }, { "epoch": 102.66865671641791, "grad_norm": 29.22039794921875, "learning_rate": 8.863636363636365e-06, "loss": 42.3447, "step": 4312 }, { "epoch": 102.69253731343284, "grad_norm": 31.03571128845215, "learning_rate": 8.861471861471863e-06, "loss": 41.9643, "step": 4313 }, { "epoch": 102.71641791044776, "grad_norm": 26.90915298461914, "learning_rate": 8.85930735930736e-06, "loss": 42.8879, "step": 4314 }, { "epoch": 102.74029850746268, "grad_norm": 31.34430503845215, "learning_rate": 8.857142857142858e-06, "loss": 41.6856, "step": 4315 }, { "epoch": 102.7641791044776, "grad_norm": 26.868675231933594, "learning_rate": 8.854978354978356e-06, "loss": 41.1538, "step": 4316 }, { "epoch": 102.78805970149254, "grad_norm": 26.82084846496582, "learning_rate": 8.852813852813854e-06, "loss": 42.6873, "step": 4317 }, { "epoch": 102.81194029850747, "grad_norm": 24.742094039916992, "learning_rate": 8.850649350649352e-06, "loss": 43.168, "step": 4318 }, { "epoch": 102.83582089552239, "grad_norm": 23.871686935424805, "learning_rate": 8.84848484848485e-06, "loss": 42.0424, "step": 4319 }, { "epoch": 102.85970149253731, "grad_norm": 21.681507110595703, "learning_rate": 8.846320346320347e-06, "loss": 42.1547, "step": 4320 }, { "epoch": 102.88358208955223, "grad_norm": 28.63477325439453, "learning_rate": 8.844155844155845e-06, "loss": 41.2654, "step": 4321 }, { "epoch": 102.90746268656716, "grad_norm": 19.495147705078125, "learning_rate": 8.841991341991343e-06, "loss": 41.5641, "step": 4322 }, { "epoch": 102.9313432835821, "grad_norm": 33.34874725341797, "learning_rate": 8.83982683982684e-06, "loss": 41.7787, "step": 4323 }, { "epoch": 102.95522388059702, "grad_norm": 27.586767196655273, "learning_rate": 8.837662337662338e-06, "loss": 40.4204, "step": 4324 }, { "epoch": 102.97910447761194, "grad_norm": 28.708871841430664, "learning_rate": 8.835497835497836e-06, "loss": 41.7225, "step": 4325 }, { "epoch": 103.0, "grad_norm": 22.439306259155273, "learning_rate": 8.833333333333334e-06, "loss": 35.3291, "step": 4326 }, { "epoch": 103.02388059701492, "grad_norm": 25.760793685913086, "learning_rate": 8.831168831168832e-06, "loss": 42.0465, "step": 4327 }, { "epoch": 103.04776119402985, "grad_norm": 22.56456756591797, "learning_rate": 8.82900432900433e-06, "loss": 41.6094, "step": 4328 }, { "epoch": 103.07164179104478, "grad_norm": 30.912078857421875, "learning_rate": 8.826839826839827e-06, "loss": 43.0196, "step": 4329 }, { "epoch": 103.0955223880597, "grad_norm": 23.01909065246582, "learning_rate": 8.824675324675325e-06, "loss": 42.7305, "step": 4330 }, { "epoch": 103.11940298507463, "grad_norm": 29.197927474975586, "learning_rate": 8.822510822510823e-06, "loss": 41.0641, "step": 4331 }, { "epoch": 103.14328358208955, "grad_norm": 27.894495010375977, "learning_rate": 8.82034632034632e-06, "loss": 40.9656, "step": 4332 }, { "epoch": 103.16716417910447, "grad_norm": 27.135541915893555, "learning_rate": 8.818181818181819e-06, "loss": 41.7715, "step": 4333 }, { "epoch": 103.1910447761194, "grad_norm": 24.774351119995117, "learning_rate": 8.816017316017316e-06, "loss": 40.5809, "step": 4334 }, { "epoch": 103.21492537313434, "grad_norm": 27.74059295654297, "learning_rate": 8.813852813852814e-06, "loss": 40.9501, "step": 4335 }, { "epoch": 103.23880597014926, "grad_norm": 24.502626419067383, "learning_rate": 8.811688311688312e-06, "loss": 41.3341, "step": 4336 }, { "epoch": 103.26268656716418, "grad_norm": 29.406909942626953, "learning_rate": 8.80952380952381e-06, "loss": 42.8791, "step": 4337 }, { "epoch": 103.2865671641791, "grad_norm": 24.162965774536133, "learning_rate": 8.807359307359308e-06, "loss": 41.3314, "step": 4338 }, { "epoch": 103.31044776119403, "grad_norm": 27.782527923583984, "learning_rate": 8.805194805194805e-06, "loss": 41.0151, "step": 4339 }, { "epoch": 103.33432835820895, "grad_norm": 25.89789390563965, "learning_rate": 8.803030303030303e-06, "loss": 41.0182, "step": 4340 }, { "epoch": 103.35820895522389, "grad_norm": 31.413692474365234, "learning_rate": 8.800865800865803e-06, "loss": 41.1635, "step": 4341 }, { "epoch": 103.38208955223881, "grad_norm": 23.838945388793945, "learning_rate": 8.7987012987013e-06, "loss": 42.2695, "step": 4342 }, { "epoch": 103.40597014925373, "grad_norm": 27.55811309814453, "learning_rate": 8.796536796536798e-06, "loss": 42.6491, "step": 4343 }, { "epoch": 103.42985074626866, "grad_norm": 24.99410629272461, "learning_rate": 8.794372294372296e-06, "loss": 40.49, "step": 4344 }, { "epoch": 103.45373134328358, "grad_norm": 32.69471740722656, "learning_rate": 8.792207792207794e-06, "loss": 41.0334, "step": 4345 }, { "epoch": 103.4776119402985, "grad_norm": 25.661212921142578, "learning_rate": 8.79004329004329e-06, "loss": 42.0111, "step": 4346 }, { "epoch": 103.50149253731344, "grad_norm": 32.33528518676758, "learning_rate": 8.787878787878788e-06, "loss": 41.451, "step": 4347 }, { "epoch": 103.52537313432836, "grad_norm": 30.863183975219727, "learning_rate": 8.785714285714286e-06, "loss": 43.0101, "step": 4348 }, { "epoch": 103.54925373134328, "grad_norm": 27.80331802368164, "learning_rate": 8.783549783549783e-06, "loss": 41.8201, "step": 4349 }, { "epoch": 103.57313432835821, "grad_norm": 25.65656089782715, "learning_rate": 8.781385281385281e-06, "loss": 42.3188, "step": 4350 }, { "epoch": 103.59701492537313, "grad_norm": 27.477493286132812, "learning_rate": 8.779220779220779e-06, "loss": 42.4443, "step": 4351 }, { "epoch": 103.62089552238805, "grad_norm": 19.195556640625, "learning_rate": 8.777056277056277e-06, "loss": 41.6902, "step": 4352 }, { "epoch": 103.64477611940299, "grad_norm": 31.54138946533203, "learning_rate": 8.774891774891776e-06, "loss": 41.7891, "step": 4353 }, { "epoch": 103.66865671641791, "grad_norm": 24.392765045166016, "learning_rate": 8.772727272727274e-06, "loss": 43.201, "step": 4354 }, { "epoch": 103.69253731343284, "grad_norm": 31.868196487426758, "learning_rate": 8.770562770562772e-06, "loss": 42.0864, "step": 4355 }, { "epoch": 103.71641791044776, "grad_norm": 28.33005142211914, "learning_rate": 8.76839826839827e-06, "loss": 40.8061, "step": 4356 }, { "epoch": 103.74029850746268, "grad_norm": 29.663543701171875, "learning_rate": 8.766233766233767e-06, "loss": 41.195, "step": 4357 }, { "epoch": 103.7641791044776, "grad_norm": 24.99871826171875, "learning_rate": 8.764069264069265e-06, "loss": 42.0865, "step": 4358 }, { "epoch": 103.78805970149254, "grad_norm": 26.281768798828125, "learning_rate": 8.761904761904763e-06, "loss": 42.2214, "step": 4359 }, { "epoch": 103.81194029850747, "grad_norm": 25.848814010620117, "learning_rate": 8.75974025974026e-06, "loss": 41.752, "step": 4360 }, { "epoch": 103.83582089552239, "grad_norm": 25.99828338623047, "learning_rate": 8.757575757575759e-06, "loss": 41.3675, "step": 4361 }, { "epoch": 103.85970149253731, "grad_norm": 24.577255249023438, "learning_rate": 8.755411255411256e-06, "loss": 41.3633, "step": 4362 }, { "epoch": 103.88358208955223, "grad_norm": 28.189889907836914, "learning_rate": 8.753246753246754e-06, "loss": 41.1397, "step": 4363 }, { "epoch": 103.90746268656716, "grad_norm": 21.285263061523438, "learning_rate": 8.751082251082252e-06, "loss": 42.9034, "step": 4364 }, { "epoch": 103.9313432835821, "grad_norm": 26.459442138671875, "learning_rate": 8.74891774891775e-06, "loss": 42.1868, "step": 4365 }, { "epoch": 103.95522388059702, "grad_norm": 23.833219528198242, "learning_rate": 8.746753246753248e-06, "loss": 43.8222, "step": 4366 }, { "epoch": 103.97910447761194, "grad_norm": 28.269039154052734, "learning_rate": 8.744588744588745e-06, "loss": 41.7916, "step": 4367 }, { "epoch": 104.0, "grad_norm": 21.251577377319336, "learning_rate": 8.742424242424243e-06, "loss": 36.7322, "step": 4368 }, { "epoch": 104.02388059701492, "grad_norm": 24.385892868041992, "learning_rate": 8.740259740259741e-06, "loss": 41.1758, "step": 4369 }, { "epoch": 104.04776119402985, "grad_norm": 24.85951805114746, "learning_rate": 8.738095238095239e-06, "loss": 41.1797, "step": 4370 }, { "epoch": 104.07164179104478, "grad_norm": 22.94902229309082, "learning_rate": 8.735930735930737e-06, "loss": 42.0245, "step": 4371 }, { "epoch": 104.0955223880597, "grad_norm": 22.89316749572754, "learning_rate": 8.733766233766234e-06, "loss": 40.8802, "step": 4372 }, { "epoch": 104.11940298507463, "grad_norm": 17.931550979614258, "learning_rate": 8.731601731601732e-06, "loss": 41.7585, "step": 4373 }, { "epoch": 104.14328358208955, "grad_norm": 25.272066116333008, "learning_rate": 8.72943722943723e-06, "loss": 41.9595, "step": 4374 }, { "epoch": 104.16716417910447, "grad_norm": 18.83379364013672, "learning_rate": 8.727272727272728e-06, "loss": 42.0377, "step": 4375 }, { "epoch": 104.1910447761194, "grad_norm": 26.816553115844727, "learning_rate": 8.725108225108226e-06, "loss": 42.3945, "step": 4376 }, { "epoch": 104.21492537313434, "grad_norm": 21.217594146728516, "learning_rate": 8.722943722943723e-06, "loss": 41.0879, "step": 4377 }, { "epoch": 104.23880597014926, "grad_norm": 26.040369033813477, "learning_rate": 8.720779220779221e-06, "loss": 41.9009, "step": 4378 }, { "epoch": 104.26268656716418, "grad_norm": 21.120927810668945, "learning_rate": 8.718614718614719e-06, "loss": 41.5876, "step": 4379 }, { "epoch": 104.2865671641791, "grad_norm": 24.789485931396484, "learning_rate": 8.716450216450217e-06, "loss": 42.4683, "step": 4380 }, { "epoch": 104.31044776119403, "grad_norm": 20.2288761138916, "learning_rate": 8.714285714285715e-06, "loss": 41.7415, "step": 4381 }, { "epoch": 104.33432835820895, "grad_norm": 23.13172721862793, "learning_rate": 8.712121212121212e-06, "loss": 41.8696, "step": 4382 }, { "epoch": 104.35820895522389, "grad_norm": 21.838037490844727, "learning_rate": 8.70995670995671e-06, "loss": 43.1081, "step": 4383 }, { "epoch": 104.38208955223881, "grad_norm": 18.31660270690918, "learning_rate": 8.707792207792208e-06, "loss": 41.6609, "step": 4384 }, { "epoch": 104.40597014925373, "grad_norm": 20.596466064453125, "learning_rate": 8.705627705627706e-06, "loss": 41.9226, "step": 4385 }, { "epoch": 104.42985074626866, "grad_norm": 19.209354400634766, "learning_rate": 8.703463203463204e-06, "loss": 41.8937, "step": 4386 }, { "epoch": 104.45373134328358, "grad_norm": 21.35397720336914, "learning_rate": 8.701298701298701e-06, "loss": 41.222, "step": 4387 }, { "epoch": 104.4776119402985, "grad_norm": 16.040178298950195, "learning_rate": 8.6991341991342e-06, "loss": 40.8326, "step": 4388 }, { "epoch": 104.50149253731344, "grad_norm": 26.846803665161133, "learning_rate": 8.696969696969699e-06, "loss": 42.0748, "step": 4389 }, { "epoch": 104.52537313432836, "grad_norm": 19.368515014648438, "learning_rate": 8.694805194805196e-06, "loss": 41.4322, "step": 4390 }, { "epoch": 104.54925373134328, "grad_norm": 30.950580596923828, "learning_rate": 8.692640692640694e-06, "loss": 41.2695, "step": 4391 }, { "epoch": 104.57313432835821, "grad_norm": 23.07410430908203, "learning_rate": 8.690476190476192e-06, "loss": 41.8303, "step": 4392 }, { "epoch": 104.59701492537313, "grad_norm": 27.158117294311523, "learning_rate": 8.68831168831169e-06, "loss": 42.4952, "step": 4393 }, { "epoch": 104.62089552238805, "grad_norm": 25.001056671142578, "learning_rate": 8.686147186147188e-06, "loss": 41.4797, "step": 4394 }, { "epoch": 104.64477611940299, "grad_norm": 27.168846130371094, "learning_rate": 8.683982683982685e-06, "loss": 41.8096, "step": 4395 }, { "epoch": 104.66865671641791, "grad_norm": 21.596757888793945, "learning_rate": 8.681818181818182e-06, "loss": 41.9243, "step": 4396 }, { "epoch": 104.69253731343284, "grad_norm": 27.944332122802734, "learning_rate": 8.67965367965368e-06, "loss": 42.5102, "step": 4397 }, { "epoch": 104.71641791044776, "grad_norm": 24.295595169067383, "learning_rate": 8.677489177489177e-06, "loss": 42.5514, "step": 4398 }, { "epoch": 104.74029850746268, "grad_norm": 27.505474090576172, "learning_rate": 8.675324675324675e-06, "loss": 42.017, "step": 4399 }, { "epoch": 104.7641791044776, "grad_norm": 24.030363082885742, "learning_rate": 8.673160173160173e-06, "loss": 42.5318, "step": 4400 }, { "epoch": 104.78805970149254, "grad_norm": 26.74481964111328, "learning_rate": 8.670995670995672e-06, "loss": 42.4153, "step": 4401 }, { "epoch": 104.81194029850747, "grad_norm": 25.275205612182617, "learning_rate": 8.66883116883117e-06, "loss": 40.9114, "step": 4402 }, { "epoch": 104.83582089552239, "grad_norm": 19.21797752380371, "learning_rate": 8.666666666666668e-06, "loss": 41.4621, "step": 4403 }, { "epoch": 104.85970149253731, "grad_norm": 21.647167205810547, "learning_rate": 8.664502164502166e-06, "loss": 42.0579, "step": 4404 }, { "epoch": 104.88358208955223, "grad_norm": 18.133159637451172, "learning_rate": 8.662337662337663e-06, "loss": 41.3995, "step": 4405 }, { "epoch": 104.90746268656716, "grad_norm": 17.7130069732666, "learning_rate": 8.660173160173161e-06, "loss": 42.2021, "step": 4406 }, { "epoch": 104.9313432835821, "grad_norm": 17.646291732788086, "learning_rate": 8.658008658008659e-06, "loss": 41.4231, "step": 4407 }, { "epoch": 104.95522388059702, "grad_norm": 20.67991065979004, "learning_rate": 8.655844155844157e-06, "loss": 40.8638, "step": 4408 }, { "epoch": 104.97910447761194, "grad_norm": 19.140832901000977, "learning_rate": 8.653679653679655e-06, "loss": 42.5387, "step": 4409 }, { "epoch": 105.0, "grad_norm": 13.847710609436035, "learning_rate": 8.651515151515152e-06, "loss": 35.4038, "step": 4410 }, { "epoch": 105.02388059701492, "grad_norm": 16.923620223999023, "learning_rate": 8.64935064935065e-06, "loss": 43.0403, "step": 4411 }, { "epoch": 105.04776119402985, "grad_norm": 17.983060836791992, "learning_rate": 8.647186147186148e-06, "loss": 42.2899, "step": 4412 }, { "epoch": 105.07164179104478, "grad_norm": 16.440452575683594, "learning_rate": 8.645021645021646e-06, "loss": 41.3221, "step": 4413 }, { "epoch": 105.0955223880597, "grad_norm": 20.931194305419922, "learning_rate": 8.642857142857144e-06, "loss": 41.6118, "step": 4414 }, { "epoch": 105.11940298507463, "grad_norm": 16.63971710205078, "learning_rate": 8.640692640692641e-06, "loss": 41.56, "step": 4415 }, { "epoch": 105.14328358208955, "grad_norm": 19.395835876464844, "learning_rate": 8.63852813852814e-06, "loss": 40.3512, "step": 4416 }, { "epoch": 105.16716417910447, "grad_norm": 23.419681549072266, "learning_rate": 8.636363636363637e-06, "loss": 41.588, "step": 4417 }, { "epoch": 105.1910447761194, "grad_norm": 18.330759048461914, "learning_rate": 8.634199134199135e-06, "loss": 41.5903, "step": 4418 }, { "epoch": 105.21492537313434, "grad_norm": 32.92748260498047, "learning_rate": 8.632034632034633e-06, "loss": 41.7201, "step": 4419 }, { "epoch": 105.23880597014926, "grad_norm": 23.43516731262207, "learning_rate": 8.62987012987013e-06, "loss": 42.0367, "step": 4420 }, { "epoch": 105.26268656716418, "grad_norm": 31.077037811279297, "learning_rate": 8.627705627705628e-06, "loss": 41.8229, "step": 4421 }, { "epoch": 105.2865671641791, "grad_norm": 24.310850143432617, "learning_rate": 8.625541125541126e-06, "loss": 41.869, "step": 4422 }, { "epoch": 105.31044776119403, "grad_norm": 29.064128875732422, "learning_rate": 8.623376623376624e-06, "loss": 41.3312, "step": 4423 }, { "epoch": 105.33432835820895, "grad_norm": 27.2437686920166, "learning_rate": 8.621212121212122e-06, "loss": 41.4347, "step": 4424 }, { "epoch": 105.35820895522389, "grad_norm": 26.48787498474121, "learning_rate": 8.61904761904762e-06, "loss": 41.9868, "step": 4425 }, { "epoch": 105.38208955223881, "grad_norm": 23.06917953491211, "learning_rate": 8.616883116883117e-06, "loss": 40.7182, "step": 4426 }, { "epoch": 105.40597014925373, "grad_norm": 25.888072967529297, "learning_rate": 8.614718614718615e-06, "loss": 43.4227, "step": 4427 }, { "epoch": 105.42985074626866, "grad_norm": 21.196561813354492, "learning_rate": 8.612554112554113e-06, "loss": 42.1541, "step": 4428 }, { "epoch": 105.45373134328358, "grad_norm": 23.897281646728516, "learning_rate": 8.61038961038961e-06, "loss": 42.3009, "step": 4429 }, { "epoch": 105.4776119402985, "grad_norm": 21.39472770690918, "learning_rate": 8.608225108225108e-06, "loss": 41.9873, "step": 4430 }, { "epoch": 105.50149253731344, "grad_norm": 22.932235717773438, "learning_rate": 8.606060606060606e-06, "loss": 42.9503, "step": 4431 }, { "epoch": 105.52537313432836, "grad_norm": 19.643224716186523, "learning_rate": 8.603896103896104e-06, "loss": 41.1197, "step": 4432 }, { "epoch": 105.54925373134328, "grad_norm": 22.474496841430664, "learning_rate": 8.601731601731602e-06, "loss": 41.472, "step": 4433 }, { "epoch": 105.57313432835821, "grad_norm": 18.618505477905273, "learning_rate": 8.5995670995671e-06, "loss": 42.0385, "step": 4434 }, { "epoch": 105.59701492537313, "grad_norm": 22.780241012573242, "learning_rate": 8.597402597402597e-06, "loss": 42.2941, "step": 4435 }, { "epoch": 105.62089552238805, "grad_norm": 18.00736427307129, "learning_rate": 8.595238095238097e-06, "loss": 41.086, "step": 4436 }, { "epoch": 105.64477611940299, "grad_norm": 26.372411727905273, "learning_rate": 8.593073593073595e-06, "loss": 43.5092, "step": 4437 }, { "epoch": 105.66865671641791, "grad_norm": 20.315715789794922, "learning_rate": 8.590909090909092e-06, "loss": 42.6546, "step": 4438 }, { "epoch": 105.69253731343284, "grad_norm": 25.5256404876709, "learning_rate": 8.58874458874459e-06, "loss": 41.5332, "step": 4439 }, { "epoch": 105.71641791044776, "grad_norm": 23.848834991455078, "learning_rate": 8.586580086580088e-06, "loss": 41.1996, "step": 4440 }, { "epoch": 105.74029850746268, "grad_norm": 22.64993667602539, "learning_rate": 8.584415584415586e-06, "loss": 41.0973, "step": 4441 }, { "epoch": 105.7641791044776, "grad_norm": 25.26251792907715, "learning_rate": 8.582251082251084e-06, "loss": 40.5223, "step": 4442 }, { "epoch": 105.78805970149254, "grad_norm": 18.45581817626953, "learning_rate": 8.580086580086581e-06, "loss": 40.3545, "step": 4443 }, { "epoch": 105.81194029850747, "grad_norm": 20.561473846435547, "learning_rate": 8.57792207792208e-06, "loss": 41.3425, "step": 4444 }, { "epoch": 105.83582089552239, "grad_norm": 19.369930267333984, "learning_rate": 8.575757575757575e-06, "loss": 41.9595, "step": 4445 }, { "epoch": 105.85970149253731, "grad_norm": 16.14900779724121, "learning_rate": 8.573593073593073e-06, "loss": 41.1797, "step": 4446 }, { "epoch": 105.88358208955223, "grad_norm": 21.74477195739746, "learning_rate": 8.571428571428571e-06, "loss": 40.7879, "step": 4447 }, { "epoch": 105.90746268656716, "grad_norm": 16.549848556518555, "learning_rate": 8.56926406926407e-06, "loss": 41.5383, "step": 4448 }, { "epoch": 105.9313432835821, "grad_norm": 18.291797637939453, "learning_rate": 8.567099567099568e-06, "loss": 42.818, "step": 4449 }, { "epoch": 105.95522388059702, "grad_norm": 21.389198303222656, "learning_rate": 8.564935064935066e-06, "loss": 41.8181, "step": 4450 }, { "epoch": 105.97910447761194, "grad_norm": 18.4671630859375, "learning_rate": 8.562770562770564e-06, "loss": 40.5471, "step": 4451 }, { "epoch": 106.0, "grad_norm": 12.387261390686035, "learning_rate": 8.560606060606062e-06, "loss": 37.1723, "step": 4452 }, { "epoch": 106.02388059701492, "grad_norm": 25.35000991821289, "learning_rate": 8.55844155844156e-06, "loss": 41.526, "step": 4453 }, { "epoch": 106.04776119402985, "grad_norm": 18.196853637695312, "learning_rate": 8.556277056277057e-06, "loss": 42.4372, "step": 4454 }, { "epoch": 106.07164179104478, "grad_norm": 17.911649703979492, "learning_rate": 8.554112554112555e-06, "loss": 41.6807, "step": 4455 }, { "epoch": 106.0955223880597, "grad_norm": 18.82575798034668, "learning_rate": 8.551948051948053e-06, "loss": 41.5713, "step": 4456 }, { "epoch": 106.11940298507463, "grad_norm": 17.8409423828125, "learning_rate": 8.54978354978355e-06, "loss": 41.243, "step": 4457 }, { "epoch": 106.14328358208955, "grad_norm": 14.669032096862793, "learning_rate": 8.547619047619048e-06, "loss": 41.3578, "step": 4458 }, { "epoch": 106.16716417910447, "grad_norm": 18.624805450439453, "learning_rate": 8.545454545454546e-06, "loss": 42.5552, "step": 4459 }, { "epoch": 106.1910447761194, "grad_norm": 15.485766410827637, "learning_rate": 8.543290043290044e-06, "loss": 42.2293, "step": 4460 }, { "epoch": 106.21492537313434, "grad_norm": 19.794565200805664, "learning_rate": 8.541125541125542e-06, "loss": 41.1415, "step": 4461 }, { "epoch": 106.23880597014926, "grad_norm": 18.35716438293457, "learning_rate": 8.53896103896104e-06, "loss": 41.6452, "step": 4462 }, { "epoch": 106.26268656716418, "grad_norm": 20.6253719329834, "learning_rate": 8.536796536796537e-06, "loss": 41.2028, "step": 4463 }, { "epoch": 106.2865671641791, "grad_norm": 17.438785552978516, "learning_rate": 8.534632034632035e-06, "loss": 42.4732, "step": 4464 }, { "epoch": 106.31044776119403, "grad_norm": 22.83930778503418, "learning_rate": 8.532467532467533e-06, "loss": 40.1875, "step": 4465 }, { "epoch": 106.33432835820895, "grad_norm": 19.77629852294922, "learning_rate": 8.53030303030303e-06, "loss": 42.7191, "step": 4466 }, { "epoch": 106.35820895522389, "grad_norm": 24.823516845703125, "learning_rate": 8.528138528138529e-06, "loss": 41.8532, "step": 4467 }, { "epoch": 106.38208955223881, "grad_norm": 25.804109573364258, "learning_rate": 8.525974025974026e-06, "loss": 41.3039, "step": 4468 }, { "epoch": 106.40597014925373, "grad_norm": 18.37181854248047, "learning_rate": 8.523809523809524e-06, "loss": 42.5937, "step": 4469 }, { "epoch": 106.42985074626866, "grad_norm": 21.761140823364258, "learning_rate": 8.521645021645022e-06, "loss": 41.5739, "step": 4470 }, { "epoch": 106.45373134328358, "grad_norm": 18.34234619140625, "learning_rate": 8.51948051948052e-06, "loss": 42.0672, "step": 4471 }, { "epoch": 106.4776119402985, "grad_norm": 21.598434448242188, "learning_rate": 8.51731601731602e-06, "loss": 43.3303, "step": 4472 }, { "epoch": 106.50149253731344, "grad_norm": 19.549448013305664, "learning_rate": 8.515151515151517e-06, "loss": 41.4708, "step": 4473 }, { "epoch": 106.52537313432836, "grad_norm": 20.763225555419922, "learning_rate": 8.512987012987015e-06, "loss": 41.8263, "step": 4474 }, { "epoch": 106.54925373134328, "grad_norm": 19.644168853759766, "learning_rate": 8.510822510822511e-06, "loss": 42.1677, "step": 4475 }, { "epoch": 106.57313432835821, "grad_norm": 23.9834041595459, "learning_rate": 8.508658008658009e-06, "loss": 42.5743, "step": 4476 }, { "epoch": 106.59701492537313, "grad_norm": 19.93153953552246, "learning_rate": 8.506493506493507e-06, "loss": 41.2167, "step": 4477 }, { "epoch": 106.62089552238805, "grad_norm": 22.8863525390625, "learning_rate": 8.504329004329004e-06, "loss": 41.1281, "step": 4478 }, { "epoch": 106.64477611940299, "grad_norm": 20.42034149169922, "learning_rate": 8.502164502164502e-06, "loss": 41.0462, "step": 4479 }, { "epoch": 106.66865671641791, "grad_norm": 21.096284866333008, "learning_rate": 8.5e-06, "loss": 41.1694, "step": 4480 }, { "epoch": 106.69253731343284, "grad_norm": 21.80982780456543, "learning_rate": 8.497835497835498e-06, "loss": 41.3229, "step": 4481 }, { "epoch": 106.71641791044776, "grad_norm": 19.85307502746582, "learning_rate": 8.495670995670996e-06, "loss": 41.6649, "step": 4482 }, { "epoch": 106.74029850746268, "grad_norm": 15.509448051452637, "learning_rate": 8.493506493506493e-06, "loss": 41.1427, "step": 4483 }, { "epoch": 106.7641791044776, "grad_norm": 19.124879837036133, "learning_rate": 8.491341991341993e-06, "loss": 41.2628, "step": 4484 }, { "epoch": 106.78805970149254, "grad_norm": 16.123470306396484, "learning_rate": 8.48917748917749e-06, "loss": 41.9344, "step": 4485 }, { "epoch": 106.81194029850747, "grad_norm": 16.704002380371094, "learning_rate": 8.487012987012988e-06, "loss": 41.3242, "step": 4486 }, { "epoch": 106.83582089552239, "grad_norm": 17.679168701171875, "learning_rate": 8.484848484848486e-06, "loss": 42.4921, "step": 4487 }, { "epoch": 106.85970149253731, "grad_norm": 16.334306716918945, "learning_rate": 8.482683982683984e-06, "loss": 41.7613, "step": 4488 }, { "epoch": 106.88358208955223, "grad_norm": 21.279388427734375, "learning_rate": 8.480519480519482e-06, "loss": 40.7258, "step": 4489 }, { "epoch": 106.90746268656716, "grad_norm": 18.096824645996094, "learning_rate": 8.47835497835498e-06, "loss": 41.4365, "step": 4490 }, { "epoch": 106.9313432835821, "grad_norm": 14.148079872131348, "learning_rate": 8.476190476190477e-06, "loss": 41.9096, "step": 4491 }, { "epoch": 106.95522388059702, "grad_norm": 18.99448013305664, "learning_rate": 8.474025974025975e-06, "loss": 41.1249, "step": 4492 }, { "epoch": 106.97910447761194, "grad_norm": 19.877487182617188, "learning_rate": 8.471861471861473e-06, "loss": 41.6588, "step": 4493 }, { "epoch": 107.0, "grad_norm": 17.858646392822266, "learning_rate": 8.46969696969697e-06, "loss": 35.8561, "step": 4494 }, { "epoch": 107.02388059701492, "grad_norm": 15.608851432800293, "learning_rate": 8.467532467532467e-06, "loss": 41.4418, "step": 4495 }, { "epoch": 107.04776119402985, "grad_norm": 22.582759857177734, "learning_rate": 8.465367965367966e-06, "loss": 41.0498, "step": 4496 }, { "epoch": 107.07164179104478, "grad_norm": 21.779876708984375, "learning_rate": 8.463203463203464e-06, "loss": 41.6588, "step": 4497 }, { "epoch": 107.0955223880597, "grad_norm": 20.698528289794922, "learning_rate": 8.461038961038962e-06, "loss": 43.0142, "step": 4498 }, { "epoch": 107.11940298507463, "grad_norm": 16.091886520385742, "learning_rate": 8.45887445887446e-06, "loss": 41.3033, "step": 4499 }, { "epoch": 107.14328358208955, "grad_norm": 28.291919708251953, "learning_rate": 8.456709956709958e-06, "loss": 41.3949, "step": 4500 }, { "epoch": 107.16716417910447, "grad_norm": 19.51844596862793, "learning_rate": 8.454545454545455e-06, "loss": 42.2322, "step": 4501 }, { "epoch": 107.1910447761194, "grad_norm": 27.817554473876953, "learning_rate": 8.452380952380953e-06, "loss": 41.1951, "step": 4502 }, { "epoch": 107.21492537313434, "grad_norm": 20.286903381347656, "learning_rate": 8.450216450216451e-06, "loss": 42.1971, "step": 4503 }, { "epoch": 107.23880597014926, "grad_norm": 26.34720230102539, "learning_rate": 8.448051948051949e-06, "loss": 40.7248, "step": 4504 }, { "epoch": 107.26268656716418, "grad_norm": 21.989835739135742, "learning_rate": 8.445887445887447e-06, "loss": 42.4462, "step": 4505 }, { "epoch": 107.2865671641791, "grad_norm": 22.28291893005371, "learning_rate": 8.443722943722944e-06, "loss": 41.103, "step": 4506 }, { "epoch": 107.31044776119403, "grad_norm": 21.63711166381836, "learning_rate": 8.441558441558442e-06, "loss": 42.685, "step": 4507 }, { "epoch": 107.33432835820895, "grad_norm": 16.82655143737793, "learning_rate": 8.43939393939394e-06, "loss": 42.0045, "step": 4508 }, { "epoch": 107.35820895522389, "grad_norm": 24.85128402709961, "learning_rate": 8.437229437229438e-06, "loss": 41.6018, "step": 4509 }, { "epoch": 107.38208955223881, "grad_norm": 18.015731811523438, "learning_rate": 8.435064935064936e-06, "loss": 40.7281, "step": 4510 }, { "epoch": 107.40597014925373, "grad_norm": 26.402570724487305, "learning_rate": 8.432900432900433e-06, "loss": 42.5324, "step": 4511 }, { "epoch": 107.42985074626866, "grad_norm": 21.223861694335938, "learning_rate": 8.430735930735931e-06, "loss": 40.7112, "step": 4512 }, { "epoch": 107.45373134328358, "grad_norm": 19.461315155029297, "learning_rate": 8.428571428571429e-06, "loss": 40.8781, "step": 4513 }, { "epoch": 107.4776119402985, "grad_norm": 23.075971603393555, "learning_rate": 8.426406926406927e-06, "loss": 41.2487, "step": 4514 }, { "epoch": 107.50149253731344, "grad_norm": 22.154701232910156, "learning_rate": 8.424242424242425e-06, "loss": 41.175, "step": 4515 }, { "epoch": 107.52537313432836, "grad_norm": 29.775875091552734, "learning_rate": 8.422077922077922e-06, "loss": 42.0601, "step": 4516 }, { "epoch": 107.54925373134328, "grad_norm": 23.231462478637695, "learning_rate": 8.41991341991342e-06, "loss": 41.6765, "step": 4517 }, { "epoch": 107.57313432835821, "grad_norm": 28.446731567382812, "learning_rate": 8.417748917748918e-06, "loss": 43.3939, "step": 4518 }, { "epoch": 107.59701492537313, "grad_norm": 17.26323890686035, "learning_rate": 8.415584415584416e-06, "loss": 41.2887, "step": 4519 }, { "epoch": 107.62089552238805, "grad_norm": 20.49373435974121, "learning_rate": 8.413419913419915e-06, "loss": 41.6722, "step": 4520 }, { "epoch": 107.64477611940299, "grad_norm": 16.85104751586914, "learning_rate": 8.411255411255413e-06, "loss": 40.5418, "step": 4521 }, { "epoch": 107.66865671641791, "grad_norm": 16.146242141723633, "learning_rate": 8.40909090909091e-06, "loss": 40.9689, "step": 4522 }, { "epoch": 107.69253731343284, "grad_norm": 17.153108596801758, "learning_rate": 8.406926406926409e-06, "loss": 41.6464, "step": 4523 }, { "epoch": 107.71641791044776, "grad_norm": 20.580894470214844, "learning_rate": 8.404761904761905e-06, "loss": 42.4474, "step": 4524 }, { "epoch": 107.74029850746268, "grad_norm": 15.058161735534668, "learning_rate": 8.402597402597403e-06, "loss": 41.7216, "step": 4525 }, { "epoch": 107.7641791044776, "grad_norm": 20.474285125732422, "learning_rate": 8.4004329004329e-06, "loss": 41.7553, "step": 4526 }, { "epoch": 107.78805970149254, "grad_norm": 20.75484275817871, "learning_rate": 8.398268398268398e-06, "loss": 41.6698, "step": 4527 }, { "epoch": 107.81194029850747, "grad_norm": 14.600532531738281, "learning_rate": 8.396103896103896e-06, "loss": 41.6121, "step": 4528 }, { "epoch": 107.83582089552239, "grad_norm": 18.704586029052734, "learning_rate": 8.393939393939394e-06, "loss": 40.5424, "step": 4529 }, { "epoch": 107.85970149253731, "grad_norm": 15.7553129196167, "learning_rate": 8.391774891774892e-06, "loss": 41.0951, "step": 4530 }, { "epoch": 107.88358208955223, "grad_norm": 20.587574005126953, "learning_rate": 8.38961038961039e-06, "loss": 42.1185, "step": 4531 }, { "epoch": 107.90746268656716, "grad_norm": 16.545307159423828, "learning_rate": 8.387445887445889e-06, "loss": 40.7692, "step": 4532 }, { "epoch": 107.9313432835821, "grad_norm": 19.46141815185547, "learning_rate": 8.385281385281387e-06, "loss": 41.6013, "step": 4533 }, { "epoch": 107.95522388059702, "grad_norm": 16.87144660949707, "learning_rate": 8.383116883116884e-06, "loss": 41.6794, "step": 4534 }, { "epoch": 107.97910447761194, "grad_norm": 16.42438316345215, "learning_rate": 8.380952380952382e-06, "loss": 42.3932, "step": 4535 }, { "epoch": 108.0, "grad_norm": 24.72748374938965, "learning_rate": 8.37878787878788e-06, "loss": 36.3065, "step": 4536 }, { "epoch": 108.02388059701492, "grad_norm": 21.551437377929688, "learning_rate": 8.376623376623378e-06, "loss": 41.1883, "step": 4537 }, { "epoch": 108.04776119402985, "grad_norm": 31.447101593017578, "learning_rate": 8.374458874458876e-06, "loss": 41.2846, "step": 4538 }, { "epoch": 108.07164179104478, "grad_norm": 23.090343475341797, "learning_rate": 8.372294372294373e-06, "loss": 41.7488, "step": 4539 }, { "epoch": 108.0955223880597, "grad_norm": 29.949562072753906, "learning_rate": 8.370129870129871e-06, "loss": 42.462, "step": 4540 }, { "epoch": 108.11940298507463, "grad_norm": 21.09743309020996, "learning_rate": 8.367965367965369e-06, "loss": 41.4602, "step": 4541 }, { "epoch": 108.14328358208955, "grad_norm": 34.469139099121094, "learning_rate": 8.365800865800867e-06, "loss": 41.5088, "step": 4542 }, { "epoch": 108.16716417910447, "grad_norm": 25.73923110961914, "learning_rate": 8.363636363636365e-06, "loss": 42.9585, "step": 4543 }, { "epoch": 108.1910447761194, "grad_norm": 30.073488235473633, "learning_rate": 8.361471861471862e-06, "loss": 41.5492, "step": 4544 }, { "epoch": 108.21492537313434, "grad_norm": 26.56512451171875, "learning_rate": 8.35930735930736e-06, "loss": 41.2259, "step": 4545 }, { "epoch": 108.23880597014926, "grad_norm": 32.05238723754883, "learning_rate": 8.357142857142858e-06, "loss": 41.8249, "step": 4546 }, { "epoch": 108.26268656716418, "grad_norm": 25.487403869628906, "learning_rate": 8.354978354978356e-06, "loss": 41.2949, "step": 4547 }, { "epoch": 108.2865671641791, "grad_norm": 26.391586303710938, "learning_rate": 8.352813852813854e-06, "loss": 41.9133, "step": 4548 }, { "epoch": 108.31044776119403, "grad_norm": 23.400354385375977, "learning_rate": 8.350649350649351e-06, "loss": 41.2916, "step": 4549 }, { "epoch": 108.33432835820895, "grad_norm": 32.927467346191406, "learning_rate": 8.348484848484849e-06, "loss": 41.0151, "step": 4550 }, { "epoch": 108.35820895522389, "grad_norm": NaN, "learning_rate": 8.346320346320347e-06, "loss": 41.9067, "step": 4551 }, { "epoch": 108.38208955223881, "grad_norm": 27.199121475219727, "learning_rate": 8.346320346320347e-06, "loss": 41.8715, "step": 4552 }, { "epoch": 108.40597014925373, "grad_norm": 30.17380142211914, "learning_rate": 8.344155844155845e-06, "loss": 41.9553, "step": 4553 }, { "epoch": 108.42985074626866, "grad_norm": 28.72991943359375, "learning_rate": 8.341991341991343e-06, "loss": 41.9683, "step": 4554 }, { "epoch": 108.45373134328358, "grad_norm": 26.323143005371094, "learning_rate": 8.33982683982684e-06, "loss": 41.1456, "step": 4555 }, { "epoch": 108.4776119402985, "grad_norm": 23.400619506835938, "learning_rate": 8.337662337662338e-06, "loss": 41.7313, "step": 4556 }, { "epoch": 108.50149253731344, "grad_norm": 23.805021286010742, "learning_rate": 8.335497835497836e-06, "loss": 40.5904, "step": 4557 }, { "epoch": 108.52537313432836, "grad_norm": 21.17874526977539, "learning_rate": 8.333333333333334e-06, "loss": 41.5941, "step": 4558 }, { "epoch": 108.54925373134328, "grad_norm": 26.89427375793457, "learning_rate": 8.331168831168832e-06, "loss": 40.6515, "step": 4559 }, { "epoch": 108.57313432835821, "grad_norm": 22.102890014648438, "learning_rate": 8.32900432900433e-06, "loss": 41.6195, "step": 4560 }, { "epoch": 108.59701492537313, "grad_norm": 28.349239349365234, "learning_rate": 8.326839826839827e-06, "loss": 41.7613, "step": 4561 }, { "epoch": 108.62089552238805, "grad_norm": 24.95227813720703, "learning_rate": 8.324675324675325e-06, "loss": 42.2593, "step": 4562 }, { "epoch": 108.64477611940299, "grad_norm": 29.643531799316406, "learning_rate": 8.322510822510823e-06, "loss": 42.5247, "step": 4563 }, { "epoch": 108.66865671641791, "grad_norm": 24.321622848510742, "learning_rate": 8.32034632034632e-06, "loss": 41.5149, "step": 4564 }, { "epoch": 108.69253731343284, "grad_norm": 27.7292537689209, "learning_rate": 8.318181818181818e-06, "loss": 41.6252, "step": 4565 }, { "epoch": 108.71641791044776, "grad_norm": 23.14917755126953, "learning_rate": 8.316017316017316e-06, "loss": 41.8188, "step": 4566 }, { "epoch": 108.74029850746268, "grad_norm": 31.897857666015625, "learning_rate": 8.313852813852814e-06, "loss": 41.2639, "step": 4567 }, { "epoch": 108.7641791044776, "grad_norm": 22.20448112487793, "learning_rate": 8.311688311688313e-06, "loss": 40.8366, "step": 4568 }, { "epoch": 108.78805970149254, "grad_norm": 33.260982513427734, "learning_rate": 8.309523809523811e-06, "loss": 42.1547, "step": 4569 }, { "epoch": 108.81194029850747, "grad_norm": 28.327970504760742, "learning_rate": 8.307359307359309e-06, "loss": 41.0261, "step": 4570 }, { "epoch": 108.83582089552239, "grad_norm": 30.421405792236328, "learning_rate": 8.305194805194807e-06, "loss": 42.6333, "step": 4571 }, { "epoch": 108.85970149253731, "grad_norm": 27.54227066040039, "learning_rate": 8.303030303030305e-06, "loss": 41.5392, "step": 4572 }, { "epoch": 108.88358208955223, "grad_norm": 27.083431243896484, "learning_rate": 8.300865800865802e-06, "loss": 40.5557, "step": 4573 }, { "epoch": 108.90746268656716, "grad_norm": 22.203136444091797, "learning_rate": 8.2987012987013e-06, "loss": 40.2079, "step": 4574 }, { "epoch": 108.9313432835821, "grad_norm": 30.871158599853516, "learning_rate": 8.296536796536796e-06, "loss": 40.74, "step": 4575 }, { "epoch": 108.95522388059702, "grad_norm": 26.300838470458984, "learning_rate": 8.294372294372294e-06, "loss": 41.3107, "step": 4576 }, { "epoch": 108.97910447761194, "grad_norm": 31.482698440551758, "learning_rate": 8.292207792207792e-06, "loss": 42.54, "step": 4577 }, { "epoch": 109.0, "grad_norm": 21.353776931762695, "learning_rate": 8.29004329004329e-06, "loss": 36.6851, "step": 4578 }, { "epoch": 109.02388059701492, "grad_norm": 27.117504119873047, "learning_rate": 8.287878787878787e-06, "loss": 41.3884, "step": 4579 }, { "epoch": 109.04776119402985, "grad_norm": 23.497106552124023, "learning_rate": 8.285714285714287e-06, "loss": 41.0846, "step": 4580 }, { "epoch": 109.07164179104478, "grad_norm": 27.996051788330078, "learning_rate": 8.283549783549785e-06, "loss": 41.214, "step": 4581 }, { "epoch": 109.0955223880597, "grad_norm": 24.364675521850586, "learning_rate": 8.281385281385283e-06, "loss": 42.1255, "step": 4582 }, { "epoch": 109.11940298507463, "grad_norm": 31.155681610107422, "learning_rate": 8.27922077922078e-06, "loss": 42.1774, "step": 4583 }, { "epoch": 109.14328358208955, "grad_norm": 27.193376541137695, "learning_rate": 8.277056277056278e-06, "loss": 42.4321, "step": 4584 }, { "epoch": 109.16716417910447, "grad_norm": 30.398059844970703, "learning_rate": 8.274891774891776e-06, "loss": 42.0286, "step": 4585 }, { "epoch": 109.1910447761194, "grad_norm": 27.1219425201416, "learning_rate": 8.272727272727274e-06, "loss": 40.9508, "step": 4586 }, { "epoch": 109.21492537313434, "grad_norm": 29.481327056884766, "learning_rate": 8.270562770562772e-06, "loss": 42.1899, "step": 4587 }, { "epoch": 109.23880597014926, "grad_norm": 29.547292709350586, "learning_rate": 8.26839826839827e-06, "loss": 40.2919, "step": 4588 }, { "epoch": 109.26268656716418, "grad_norm": 26.99224281311035, "learning_rate": 8.266233766233767e-06, "loss": 41.1843, "step": 4589 }, { "epoch": 109.2865671641791, "grad_norm": 25.45054054260254, "learning_rate": 8.264069264069265e-06, "loss": 41.6843, "step": 4590 }, { "epoch": 109.31044776119403, "grad_norm": 27.529739379882812, "learning_rate": 8.261904761904763e-06, "loss": 39.7442, "step": 4591 }, { "epoch": 109.33432835820895, "grad_norm": 23.54625129699707, "learning_rate": 8.25974025974026e-06, "loss": 40.9662, "step": 4592 }, { "epoch": 109.35820895522389, "grad_norm": 26.74515151977539, "learning_rate": 8.257575757575758e-06, "loss": 42.1445, "step": 4593 }, { "epoch": 109.38208955223881, "grad_norm": 24.591623306274414, "learning_rate": 8.255411255411256e-06, "loss": 41.4631, "step": 4594 }, { "epoch": 109.40597014925373, "grad_norm": 27.32378578186035, "learning_rate": 8.253246753246754e-06, "loss": 43.223, "step": 4595 }, { "epoch": 109.42985074626866, "grad_norm": 24.74321937561035, "learning_rate": 8.251082251082252e-06, "loss": 42.1151, "step": 4596 }, { "epoch": 109.45373134328358, "grad_norm": 24.764156341552734, "learning_rate": 8.24891774891775e-06, "loss": 42.0353, "step": 4597 }, { "epoch": 109.4776119402985, "grad_norm": 22.001508712768555, "learning_rate": 8.246753246753247e-06, "loss": 41.5189, "step": 4598 }, { "epoch": 109.50149253731344, "grad_norm": 27.916759490966797, "learning_rate": 8.244588744588745e-06, "loss": 42.8372, "step": 4599 }, { "epoch": 109.52537313432836, "grad_norm": 23.65235137939453, "learning_rate": 8.242424242424243e-06, "loss": 40.9737, "step": 4600 }, { "epoch": 109.54925373134328, "grad_norm": 25.150957107543945, "learning_rate": 8.24025974025974e-06, "loss": 41.279, "step": 4601 }, { "epoch": 109.57313432835821, "grad_norm": 24.193187713623047, "learning_rate": 8.238095238095239e-06, "loss": 42.029, "step": 4602 }, { "epoch": 109.59701492537313, "grad_norm": 26.186813354492188, "learning_rate": 8.235930735930736e-06, "loss": 40.3791, "step": 4603 }, { "epoch": 109.62089552238805, "grad_norm": 24.97614097595215, "learning_rate": 8.233766233766236e-06, "loss": 40.8347, "step": 4604 }, { "epoch": 109.64477611940299, "grad_norm": 27.51297950744629, "learning_rate": 8.231601731601732e-06, "loss": 40.6329, "step": 4605 }, { "epoch": 109.66865671641791, "grad_norm": 24.866369247436523, "learning_rate": 8.22943722943723e-06, "loss": 41.5968, "step": 4606 }, { "epoch": 109.69253731343284, "grad_norm": 28.864290237426758, "learning_rate": 8.227272727272728e-06, "loss": 41.3671, "step": 4607 }, { "epoch": 109.71641791044776, "grad_norm": 29.503835678100586, "learning_rate": 8.225108225108225e-06, "loss": 41.2315, "step": 4608 }, { "epoch": 109.74029850746268, "grad_norm": 26.039966583251953, "learning_rate": 8.222943722943723e-06, "loss": 41.0179, "step": 4609 }, { "epoch": 109.7641791044776, "grad_norm": 22.42831039428711, "learning_rate": 8.220779220779221e-06, "loss": 41.3331, "step": 4610 }, { "epoch": 109.78805970149254, "grad_norm": 25.328296661376953, "learning_rate": 8.218614718614719e-06, "loss": 39.9773, "step": 4611 }, { "epoch": 109.81194029850747, "grad_norm": 18.524499893188477, "learning_rate": 8.216450216450216e-06, "loss": 42.5481, "step": 4612 }, { "epoch": 109.83582089552239, "grad_norm": 26.1571102142334, "learning_rate": 8.214285714285714e-06, "loss": 41.759, "step": 4613 }, { "epoch": 109.85970149253731, "grad_norm": 22.46668243408203, "learning_rate": 8.212121212121212e-06, "loss": 42.0987, "step": 4614 }, { "epoch": 109.88358208955223, "grad_norm": 29.418230056762695, "learning_rate": 8.20995670995671e-06, "loss": 42.8083, "step": 4615 }, { "epoch": 109.90746268656716, "grad_norm": 23.00196647644043, "learning_rate": 8.20779220779221e-06, "loss": 40.488, "step": 4616 }, { "epoch": 109.9313432835821, "grad_norm": 27.977956771850586, "learning_rate": 8.205627705627707e-06, "loss": 41.4731, "step": 4617 }, { "epoch": 109.95522388059702, "grad_norm": 24.776628494262695, "learning_rate": 8.203463203463205e-06, "loss": 42.6836, "step": 4618 }, { "epoch": 109.97910447761194, "grad_norm": 27.11109733581543, "learning_rate": 8.201298701298703e-06, "loss": 40.7662, "step": 4619 }, { "epoch": 110.0, "grad_norm": 20.246700286865234, "learning_rate": 8.1991341991342e-06, "loss": 36.1303, "step": 4620 }, { "epoch": 110.0, "step": 4620, "total_flos": 2.2713564637226506e+17, "train_loss": 7.6340307194433175, "train_runtime": 25635.1587, "train_samples_per_second": 22.965, "train_steps_per_second": 0.18 }, { "epoch": 110.02388059701492, "grad_norm": 21.282230377197266, "learning_rate": 1e-05, "loss": 42.4757, "step": 4621 }, { "epoch": 110.04776119402985, "grad_norm": Infinity, "learning_rate": 9.998015873015874e-06, "loss": 48.912, "step": 4622 }, { "epoch": 110.07164179104478, "grad_norm": Infinity, "learning_rate": 9.998015873015874e-06, "loss": 49.0673, "step": 4623 }, { "epoch": 110.0955223880597, "grad_norm": 445.232177734375, "learning_rate": 9.998015873015874e-06, "loss": 48.7345, "step": 4624 }, { "epoch": 110.11940298507463, "grad_norm": 224.98858642578125, "learning_rate": 9.996031746031746e-06, "loss": 45.5376, "step": 4625 }, { "epoch": 110.14328358208955, "grad_norm": 92.86235046386719, "learning_rate": 9.99404761904762e-06, "loss": 44.0537, "step": 4626 }, { "epoch": 110.16716417910447, "grad_norm": 67.79006958007812, "learning_rate": 9.992063492063493e-06, "loss": 42.4137, "step": 4627 }, { "epoch": 110.1910447761194, "grad_norm": 52.0079345703125, "learning_rate": 9.990079365079366e-06, "loss": 42.7133, "step": 4628 }, { "epoch": 110.21492537313434, "grad_norm": 40.780120849609375, "learning_rate": 9.988095238095239e-06, "loss": 42.7036, "step": 4629 }, { "epoch": 110.23880597014926, "grad_norm": 45.80341339111328, "learning_rate": 9.986111111111111e-06, "loss": 42.5543, "step": 4630 }, { "epoch": 110.26268656716418, "grad_norm": NaN, "learning_rate": 9.984126984126986e-06, "loss": 67.9585, "step": 4631 }, { "epoch": 110.2865671641791, "grad_norm": 37.52305603027344, "learning_rate": 9.984126984126986e-06, "loss": 42.0859, "step": 4632 }, { "epoch": 110.31044776119403, "grad_norm": 37.10969543457031, "learning_rate": 9.982142857142858e-06, "loss": 42.8517, "step": 4633 }, { "epoch": 110.33432835820895, "grad_norm": 33.601905822753906, "learning_rate": 9.980158730158731e-06, "loss": 41.4451, "step": 4634 }, { "epoch": 110.35820895522389, "grad_norm": 25.348403930664062, "learning_rate": 9.978174603174604e-06, "loss": 41.7698, "step": 4635 }, { "epoch": 110.38208955223881, "grad_norm": 28.69048309326172, "learning_rate": 9.976190476190477e-06, "loss": 41.8287, "step": 4636 }, { "epoch": 110.40597014925373, "grad_norm": 30.578548431396484, "learning_rate": 9.97420634920635e-06, "loss": 42.4165, "step": 4637 }, { "epoch": 110.42985074626866, "grad_norm": 19.63727569580078, "learning_rate": 9.972222222222224e-06, "loss": 42.37, "step": 4638 }, { "epoch": 110.45373134328358, "grad_norm": 34.42063522338867, "learning_rate": 9.970238095238096e-06, "loss": 42.7996, "step": 4639 }, { "epoch": 110.4776119402985, "grad_norm": NaN, "learning_rate": 9.968253968253969e-06, "loss": 62.9577, "step": 4640 }, { "epoch": 110.50149253731344, "grad_norm": 23.131494522094727, "learning_rate": 9.968253968253969e-06, "loss": 40.8106, "step": 4641 }, { "epoch": 110.52537313432836, "grad_norm": 22.294376373291016, "learning_rate": 9.966269841269842e-06, "loss": 42.7557, "step": 4642 }, { "epoch": 110.54925373134328, "grad_norm": 30.476016998291016, "learning_rate": 9.964285714285714e-06, "loss": 42.1925, "step": 4643 }, { "epoch": 110.57313432835821, "grad_norm": 20.84010887145996, "learning_rate": 9.962301587301589e-06, "loss": 41.9241, "step": 4644 }, { "epoch": 110.59701492537313, "grad_norm": 23.944196701049805, "learning_rate": 9.960317460317462e-06, "loss": 41.8911, "step": 4645 }, { "epoch": 110.62089552238805, "grad_norm": 28.773279190063477, "learning_rate": 9.958333333333334e-06, "loss": 41.5711, "step": 4646 }, { "epoch": 110.64477611940299, "grad_norm": 22.82482147216797, "learning_rate": 9.956349206349207e-06, "loss": 42.1915, "step": 4647 }, { "epoch": 110.66865671641791, "grad_norm": 24.0530948638916, "learning_rate": 9.95436507936508e-06, "loss": 40.6648, "step": 4648 }, { "epoch": 110.69253731343284, "grad_norm": 21.640682220458984, "learning_rate": 9.952380952380954e-06, "loss": 41.5682, "step": 4649 }, { "epoch": 110.71641791044776, "grad_norm": 21.377979278564453, "learning_rate": 9.950396825396827e-06, "loss": 41.6034, "step": 4650 }, { "epoch": 110.74029850746268, "grad_norm": 19.04741668701172, "learning_rate": 9.9484126984127e-06, "loss": 41.8165, "step": 4651 }, { "epoch": 110.7641791044776, "grad_norm": 22.74652099609375, "learning_rate": 9.946428571428572e-06, "loss": 40.4093, "step": 4652 }, { "epoch": 110.78805970149254, "grad_norm": 16.828824996948242, "learning_rate": 9.944444444444445e-06, "loss": 42.181, "step": 4653 }, { "epoch": 110.81194029850747, "grad_norm": 20.226478576660156, "learning_rate": 9.94246031746032e-06, "loss": 41.774, "step": 4654 }, { "epoch": 110.83582089552239, "grad_norm": 19.935068130493164, "learning_rate": 9.940476190476192e-06, "loss": 41.6547, "step": 4655 }, { "epoch": 110.85970149253731, "grad_norm": 18.150102615356445, "learning_rate": 9.938492063492065e-06, "loss": 39.962, "step": 4656 }, { "epoch": 110.88358208955223, "grad_norm": 27.12464141845703, "learning_rate": 9.936507936507937e-06, "loss": 41.2807, "step": 4657 }, { "epoch": 110.90746268656716, "grad_norm": 18.194360733032227, "learning_rate": 9.93452380952381e-06, "loss": 40.8381, "step": 4658 }, { "epoch": 110.9313432835821, "grad_norm": 25.638107299804688, "learning_rate": 9.932539682539684e-06, "loss": 41.2385, "step": 4659 }, { "epoch": 110.95522388059702, "grad_norm": 21.1163387298584, "learning_rate": 9.930555555555557e-06, "loss": 41.0065, "step": 4660 }, { "epoch": 110.97910447761194, "grad_norm": 17.089710235595703, "learning_rate": 9.92857142857143e-06, "loss": 41.8835, "step": 4661 }, { "epoch": 111.0, "grad_norm": 19.484764099121094, "learning_rate": 9.926587301587303e-06, "loss": 37.1289, "step": 4662 }, { "epoch": 111.02388059701492, "grad_norm": 20.73271942138672, "learning_rate": 9.924603174603175e-06, "loss": 40.8035, "step": 4663 }, { "epoch": 111.04776119402985, "grad_norm": 14.759368896484375, "learning_rate": 9.922619047619048e-06, "loss": 42.164, "step": 4664 }, { "epoch": 111.07164179104478, "grad_norm": 20.654579162597656, "learning_rate": 9.920634920634922e-06, "loss": 41.7302, "step": 4665 }, { "epoch": 111.0955223880597, "grad_norm": 17.05702781677246, "learning_rate": 9.918650793650795e-06, "loss": 41.3278, "step": 4666 }, { "epoch": 111.11940298507463, "grad_norm": 15.701156616210938, "learning_rate": 9.916666666666668e-06, "loss": 40.7933, "step": 4667 }, { "epoch": 111.14328358208955, "grad_norm": 17.04022216796875, "learning_rate": 9.91468253968254e-06, "loss": 41.58, "step": 4668 }, { "epoch": 111.16716417910447, "grad_norm": 16.614116668701172, "learning_rate": 9.912698412698413e-06, "loss": 40.8149, "step": 4669 }, { "epoch": 111.1910447761194, "grad_norm": 17.664091110229492, "learning_rate": 9.910714285714288e-06, "loss": 40.459, "step": 4670 }, { "epoch": 111.21492537313434, "grad_norm": 17.730188369750977, "learning_rate": 9.90873015873016e-06, "loss": 42.3291, "step": 4671 }, { "epoch": 111.23880597014926, "grad_norm": 14.860199928283691, "learning_rate": 9.906746031746033e-06, "loss": 42.314, "step": 4672 }, { "epoch": 111.26268656716418, "grad_norm": 18.45416259765625, "learning_rate": 9.904761904761906e-06, "loss": 41.2486, "step": 4673 }, { "epoch": 111.2865671641791, "grad_norm": 15.178065299987793, "learning_rate": 9.902777777777778e-06, "loss": 42.0394, "step": 4674 }, { "epoch": 111.31044776119403, "grad_norm": 16.214420318603516, "learning_rate": 9.900793650793653e-06, "loss": 41.6074, "step": 4675 }, { "epoch": 111.33432835820895, "grad_norm": 19.241151809692383, "learning_rate": 9.898809523809525e-06, "loss": 42.0125, "step": 4676 }, { "epoch": 111.35820895522389, "grad_norm": 16.019407272338867, "learning_rate": 9.896825396825398e-06, "loss": 41.5867, "step": 4677 }, { "epoch": 111.38208955223881, "grad_norm": 18.017990112304688, "learning_rate": 9.89484126984127e-06, "loss": 40.4534, "step": 4678 }, { "epoch": 111.40597014925373, "grad_norm": 18.37062644958496, "learning_rate": 9.892857142857143e-06, "loss": 41.4307, "step": 4679 }, { "epoch": 111.42985074626866, "grad_norm": 18.12076187133789, "learning_rate": 9.890873015873018e-06, "loss": 42.1702, "step": 4680 }, { "epoch": 111.45373134328358, "grad_norm": 21.935218811035156, "learning_rate": 9.88888888888889e-06, "loss": 40.5205, "step": 4681 }, { "epoch": 111.4776119402985, "grad_norm": 19.747133255004883, "learning_rate": 9.886904761904763e-06, "loss": 41.7721, "step": 4682 }, { "epoch": 111.50149253731344, "grad_norm": 17.159732818603516, "learning_rate": 9.884920634920636e-06, "loss": 41.285, "step": 4683 }, { "epoch": 111.52537313432836, "grad_norm": 15.736952781677246, "learning_rate": 9.882936507936509e-06, "loss": 40.8216, "step": 4684 }, { "epoch": 111.54925373134328, "grad_norm": 17.591854095458984, "learning_rate": 9.880952380952381e-06, "loss": 40.0516, "step": 4685 }, { "epoch": 111.57313432835821, "grad_norm": 17.530582427978516, "learning_rate": 9.878968253968256e-06, "loss": 41.4235, "step": 4686 }, { "epoch": 111.59701492537313, "grad_norm": 18.394372940063477, "learning_rate": 9.876984126984128e-06, "loss": 41.7204, "step": 4687 }, { "epoch": 111.62089552238805, "grad_norm": 17.80558967590332, "learning_rate": 9.875000000000001e-06, "loss": 41.6861, "step": 4688 }, { "epoch": 111.64477611940299, "grad_norm": 19.939964294433594, "learning_rate": 9.873015873015874e-06, "loss": 41.3087, "step": 4689 }, { "epoch": 111.66865671641791, "grad_norm": 14.58205509185791, "learning_rate": 9.871031746031747e-06, "loss": 41.7955, "step": 4690 }, { "epoch": 111.69253731343284, "grad_norm": 19.98933982849121, "learning_rate": 9.869047619047621e-06, "loss": 42.3174, "step": 4691 }, { "epoch": 111.71641791044776, "grad_norm": 20.377466201782227, "learning_rate": 9.867063492063494e-06, "loss": 42.1654, "step": 4692 }, { "epoch": 111.74029850746268, "grad_norm": 19.26752471923828, "learning_rate": 9.865079365079366e-06, "loss": 41.0597, "step": 4693 }, { "epoch": 111.7641791044776, "grad_norm": 16.435440063476562, "learning_rate": 9.863095238095239e-06, "loss": 42.1122, "step": 4694 }, { "epoch": 111.78805970149254, "grad_norm": 17.955474853515625, "learning_rate": 9.861111111111112e-06, "loss": 41.0326, "step": 4695 }, { "epoch": 111.81194029850747, "grad_norm": 21.791505813598633, "learning_rate": 9.859126984126986e-06, "loss": 42.4256, "step": 4696 }, { "epoch": 111.83582089552239, "grad_norm": 17.081600189208984, "learning_rate": 9.857142857142859e-06, "loss": 41.7548, "step": 4697 }, { "epoch": 111.85970149253731, "grad_norm": 21.21491241455078, "learning_rate": 9.855158730158732e-06, "loss": 41.1434, "step": 4698 }, { "epoch": 111.88358208955223, "grad_norm": 25.082992553710938, "learning_rate": 9.853174603174604e-06, "loss": 41.2857, "step": 4699 }, { "epoch": 111.90746268656716, "grad_norm": 19.19919204711914, "learning_rate": 9.851190476190477e-06, "loss": 41.5529, "step": 4700 }, { "epoch": 111.9313432835821, "grad_norm": 32.29753494262695, "learning_rate": 9.849206349206351e-06, "loss": 42.4376, "step": 4701 }, { "epoch": 111.95522388059702, "grad_norm": 20.654430389404297, "learning_rate": 9.847222222222224e-06, "loss": 41.3052, "step": 4702 }, { "epoch": 111.97910447761194, "grad_norm": 32.98462677001953, "learning_rate": 9.845238095238097e-06, "loss": 41.1561, "step": 4703 }, { "epoch": 112.0, "grad_norm": 18.214174270629883, "learning_rate": 9.843253968253968e-06, "loss": 35.3902, "step": 4704 }, { "epoch": 112.02388059701492, "grad_norm": 25.639781951904297, "learning_rate": 9.841269841269842e-06, "loss": 40.7291, "step": 4705 }, { "epoch": 112.04776119402985, "grad_norm": 19.745450973510742, "learning_rate": 9.839285714285715e-06, "loss": 41.6564, "step": 4706 }, { "epoch": 112.07164179104478, "grad_norm": 24.907617568969727, "learning_rate": 9.837301587301588e-06, "loss": 41.4856, "step": 4707 }, { "epoch": 112.0955223880597, "grad_norm": 24.20347023010254, "learning_rate": 9.83531746031746e-06, "loss": 40.6423, "step": 4708 }, { "epoch": 112.11940298507463, "grad_norm": 16.246206283569336, "learning_rate": 9.833333333333333e-06, "loss": 40.5309, "step": 4709 }, { "epoch": 112.14328358208955, "grad_norm": 28.89447784423828, "learning_rate": 9.831349206349207e-06, "loss": 41.173, "step": 4710 }, { "epoch": 112.16716417910447, "grad_norm": 18.989233016967773, "learning_rate": 9.82936507936508e-06, "loss": 42.2629, "step": 4711 }, { "epoch": 112.1910447761194, "grad_norm": 22.261035919189453, "learning_rate": 9.827380952380953e-06, "loss": 41.9901, "step": 4712 }, { "epoch": 112.21492537313434, "grad_norm": 21.082855224609375, "learning_rate": 9.825396825396825e-06, "loss": 40.9817, "step": 4713 }, { "epoch": 112.23880597014926, "grad_norm": 15.739337921142578, "learning_rate": 9.823412698412698e-06, "loss": 42.0745, "step": 4714 }, { "epoch": 112.26268656716418, "grad_norm": 25.604066848754883, "learning_rate": 9.821428571428573e-06, "loss": 40.9371, "step": 4715 }, { "epoch": 112.2865671641791, "grad_norm": 17.916481018066406, "learning_rate": 9.819444444444445e-06, "loss": 40.9361, "step": 4716 }, { "epoch": 112.31044776119403, "grad_norm": 21.53338050842285, "learning_rate": 9.817460317460318e-06, "loss": 40.2245, "step": 4717 }, { "epoch": 112.33432835820895, "grad_norm": 21.370702743530273, "learning_rate": 9.81547619047619e-06, "loss": 40.7986, "step": 4718 }, { "epoch": 112.35820895522389, "grad_norm": 18.217588424682617, "learning_rate": 9.813492063492063e-06, "loss": 41.5072, "step": 4719 }, { "epoch": 112.38208955223881, "grad_norm": 18.874122619628906, "learning_rate": 9.811507936507938e-06, "loss": 39.7088, "step": 4720 }, { "epoch": 112.40597014925373, "grad_norm": 17.31776237487793, "learning_rate": 9.80952380952381e-06, "loss": 41.6839, "step": 4721 }, { "epoch": 112.42985074626866, "grad_norm": 23.88166046142578, "learning_rate": 9.807539682539683e-06, "loss": 41.7857, "step": 4722 }, { "epoch": 112.45373134328358, "grad_norm": 17.09743881225586, "learning_rate": 9.805555555555556e-06, "loss": 42.2407, "step": 4723 }, { "epoch": 112.4776119402985, "grad_norm": 20.519947052001953, "learning_rate": 9.803571428571428e-06, "loss": 41.8095, "step": 4724 }, { "epoch": 112.50149253731344, "grad_norm": 23.761943817138672, "learning_rate": 9.801587301587301e-06, "loss": 41.371, "step": 4725 }, { "epoch": 112.52537313432836, "grad_norm": 17.033470153808594, "learning_rate": 9.799603174603176e-06, "loss": 41.5687, "step": 4726 }, { "epoch": 112.54925373134328, "grad_norm": 18.175559997558594, "learning_rate": 9.797619047619048e-06, "loss": 42.2144, "step": 4727 }, { "epoch": 112.57313432835821, "grad_norm": 19.10957145690918, "learning_rate": 9.795634920634921e-06, "loss": 40.2305, "step": 4728 }, { "epoch": 112.59701492537313, "grad_norm": 20.52096176147461, "learning_rate": 9.793650793650794e-06, "loss": 42.5612, "step": 4729 }, { "epoch": 112.62089552238805, "grad_norm": 17.42753791809082, "learning_rate": 9.791666666666666e-06, "loss": 43.286, "step": 4730 }, { "epoch": 112.64477611940299, "grad_norm": 25.452363967895508, "learning_rate": 9.78968253968254e-06, "loss": 41.0071, "step": 4731 }, { "epoch": 112.66865671641791, "grad_norm": 21.480247497558594, "learning_rate": 9.787698412698413e-06, "loss": 41.7063, "step": 4732 }, { "epoch": 112.69253731343284, "grad_norm": 18.553220748901367, "learning_rate": 9.785714285714286e-06, "loss": 41.4099, "step": 4733 }, { "epoch": 112.71641791044776, "grad_norm": 25.513225555419922, "learning_rate": 9.783730158730159e-06, "loss": 41.5696, "step": 4734 }, { "epoch": 112.74029850746268, "grad_norm": 16.76629638671875, "learning_rate": 9.781746031746032e-06, "loss": 41.6305, "step": 4735 }, { "epoch": 112.7641791044776, "grad_norm": 19.330625534057617, "learning_rate": 9.779761904761906e-06, "loss": 40.7885, "step": 4736 }, { "epoch": 112.78805970149254, "grad_norm": 24.649667739868164, "learning_rate": 9.777777777777779e-06, "loss": 41.5939, "step": 4737 }, { "epoch": 112.81194029850747, "grad_norm": 15.628157615661621, "learning_rate": 9.775793650793651e-06, "loss": 40.9676, "step": 4738 }, { "epoch": 112.83582089552239, "grad_norm": 18.18578338623047, "learning_rate": 9.773809523809524e-06, "loss": 40.0681, "step": 4739 }, { "epoch": 112.85970149253731, "grad_norm": 16.768980026245117, "learning_rate": 9.771825396825397e-06, "loss": 42.2564, "step": 4740 }, { "epoch": 112.88358208955223, "grad_norm": 18.52190399169922, "learning_rate": 9.769841269841271e-06, "loss": 42.4806, "step": 4741 }, { "epoch": 112.90746268656716, "grad_norm": 20.884937286376953, "learning_rate": 9.767857142857144e-06, "loss": 41.2333, "step": 4742 }, { "epoch": 112.9313432835821, "grad_norm": 20.760377883911133, "learning_rate": 9.765873015873017e-06, "loss": 41.3071, "step": 4743 }, { "epoch": 112.95522388059702, "grad_norm": 19.27536392211914, "learning_rate": 9.76388888888889e-06, "loss": 42.3135, "step": 4744 }, { "epoch": 112.97910447761194, "grad_norm": 16.836727142333984, "learning_rate": 9.761904761904762e-06, "loss": 40.9553, "step": 4745 }, { "epoch": 113.0, "grad_norm": 15.910188674926758, "learning_rate": 9.759920634920635e-06, "loss": 35.1574, "step": 4746 }, { "epoch": 113.02388059701492, "grad_norm": 25.05491828918457, "learning_rate": 9.757936507936509e-06, "loss": 40.585, "step": 4747 }, { "epoch": 113.04776119402985, "grad_norm": NaN, "learning_rate": 9.755952380952382e-06, "loss": 62.2866, "step": 4748 }, { "epoch": 113.07164179104478, "grad_norm": 15.88016414642334, "learning_rate": 9.755952380952382e-06, "loss": 41.1309, "step": 4749 }, { "epoch": 113.0955223880597, "grad_norm": NaN, "learning_rate": 9.753968253968254e-06, "loss": 48.2293, "step": 4750 }, { "epoch": 113.11940298507463, "grad_norm": 24.244104385375977, "learning_rate": 9.753968253968254e-06, "loss": 42.1546, "step": 4751 }, { "epoch": 113.14328358208955, "grad_norm": 24.652694702148438, "learning_rate": 9.751984126984127e-06, "loss": 41.6784, "step": 4752 }, { "epoch": 113.16716417910447, "grad_norm": 17.30400276184082, "learning_rate": 9.75e-06, "loss": 41.3338, "step": 4753 }, { "epoch": 113.1910447761194, "grad_norm": 22.837020874023438, "learning_rate": 9.748015873015874e-06, "loss": 39.9112, "step": 4754 }, { "epoch": 113.21492537313434, "grad_norm": NaN, "learning_rate": 9.746031746031747e-06, "loss": 51.4889, "step": 4755 }, { "epoch": 113.23880597014926, "grad_norm": 19.977386474609375, "learning_rate": 9.746031746031747e-06, "loss": 40.8136, "step": 4756 }, { "epoch": 113.26268656716418, "grad_norm": 17.338441848754883, "learning_rate": 9.74404761904762e-06, "loss": 41.41, "step": 4757 }, { "epoch": 113.2865671641791, "grad_norm": 17.25606346130371, "learning_rate": 9.742063492063492e-06, "loss": 42.0376, "step": 4758 }, { "epoch": 113.31044776119403, "grad_norm": 18.690338134765625, "learning_rate": 9.740079365079365e-06, "loss": 39.8714, "step": 4759 }, { "epoch": 113.33432835820895, "grad_norm": 20.5388240814209, "learning_rate": 9.73809523809524e-06, "loss": 40.7767, "step": 4760 }, { "epoch": 113.35820895522389, "grad_norm": 20.36353302001953, "learning_rate": 9.736111111111112e-06, "loss": 42.7652, "step": 4761 }, { "epoch": 113.38208955223881, "grad_norm": 17.473264694213867, "learning_rate": 9.734126984126985e-06, "loss": 41.3501, "step": 4762 }, { "epoch": 113.40597014925373, "grad_norm": 16.665048599243164, "learning_rate": 9.732142857142858e-06, "loss": 40.8948, "step": 4763 }, { "epoch": 113.42985074626866, "grad_norm": 18.917985916137695, "learning_rate": 9.73015873015873e-06, "loss": 42.4226, "step": 4764 }, { "epoch": 113.45373134328358, "grad_norm": 15.448834419250488, "learning_rate": 9.728174603174605e-06, "loss": 41.5246, "step": 4765 }, { "epoch": 113.4776119402985, "grad_norm": 16.92607879638672, "learning_rate": 9.726190476190477e-06, "loss": 41.1058, "step": 4766 }, { "epoch": 113.50149253731344, "grad_norm": 16.17359161376953, "learning_rate": 9.72420634920635e-06, "loss": 41.4232, "step": 4767 }, { "epoch": 113.52537313432836, "grad_norm": 16.6822452545166, "learning_rate": 9.722222222222223e-06, "loss": 41.9703, "step": 4768 }, { "epoch": 113.54925373134328, "grad_norm": 16.724811553955078, "learning_rate": 9.720238095238095e-06, "loss": 41.4117, "step": 4769 }, { "epoch": 113.57313432835821, "grad_norm": 16.85785484313965, "learning_rate": 9.71825396825397e-06, "loss": 41.4467, "step": 4770 }, { "epoch": 113.59701492537313, "grad_norm": 19.173654556274414, "learning_rate": 9.716269841269842e-06, "loss": 40.871, "step": 4771 }, { "epoch": 113.62089552238805, "grad_norm": 16.131881713867188, "learning_rate": 9.714285714285715e-06, "loss": 42.595, "step": 4772 }, { "epoch": 113.64477611940299, "grad_norm": 15.41543960571289, "learning_rate": 9.712301587301588e-06, "loss": 41.7077, "step": 4773 }, { "epoch": 113.66865671641791, "grad_norm": 19.808330535888672, "learning_rate": 9.71031746031746e-06, "loss": 40.8761, "step": 4774 }, { "epoch": 113.69253731343284, "grad_norm": 16.406370162963867, "learning_rate": 9.708333333333333e-06, "loss": 41.1769, "step": 4775 }, { "epoch": 113.71641791044776, "grad_norm": 20.239530563354492, "learning_rate": 9.706349206349208e-06, "loss": 40.8274, "step": 4776 }, { "epoch": 113.74029850746268, "grad_norm": 18.771743774414062, "learning_rate": 9.70436507936508e-06, "loss": 41.4099, "step": 4777 }, { "epoch": 113.7641791044776, "grad_norm": 18.418540954589844, "learning_rate": 9.702380952380953e-06, "loss": 39.6443, "step": 4778 }, { "epoch": 113.78805970149254, "grad_norm": 21.50214958190918, "learning_rate": 9.700396825396826e-06, "loss": 41.6937, "step": 4779 }, { "epoch": 113.81194029850747, "grad_norm": 22.449935913085938, "learning_rate": 9.698412698412698e-06, "loss": 41.7069, "step": 4780 }, { "epoch": 113.83582089552239, "grad_norm": 15.33384895324707, "learning_rate": 9.696428571428573e-06, "loss": 40.6666, "step": 4781 }, { "epoch": 113.85970149253731, "grad_norm": 21.013437271118164, "learning_rate": 9.694444444444446e-06, "loss": 40.5768, "step": 4782 }, { "epoch": 113.88358208955223, "grad_norm": 19.128190994262695, "learning_rate": 9.692460317460318e-06, "loss": 41.4668, "step": 4783 }, { "epoch": 113.90746268656716, "grad_norm": 23.851394653320312, "learning_rate": 9.690476190476191e-06, "loss": 41.1051, "step": 4784 }, { "epoch": 113.9313432835821, "grad_norm": 21.990671157836914, "learning_rate": 9.688492063492064e-06, "loss": 41.6264, "step": 4785 }, { "epoch": 113.95522388059702, "grad_norm": 16.185327529907227, "learning_rate": 9.686507936507938e-06, "loss": 41.8408, "step": 4786 }, { "epoch": 113.97910447761194, "grad_norm": 30.063560485839844, "learning_rate": 9.68452380952381e-06, "loss": 41.2658, "step": 4787 }, { "epoch": 114.0, "grad_norm": 19.5380916595459, "learning_rate": 9.682539682539683e-06, "loss": 36.7106, "step": 4788 }, { "epoch": 114.02388059701492, "grad_norm": 26.1965389251709, "learning_rate": 9.680555555555556e-06, "loss": 42.3092, "step": 4789 }, { "epoch": 114.04776119402985, "grad_norm": 19.98543930053711, "learning_rate": 9.678571428571429e-06, "loss": 41.2309, "step": 4790 }, { "epoch": 114.07164179104478, "grad_norm": 26.361085891723633, "learning_rate": 9.676587301587303e-06, "loss": 41.9058, "step": 4791 }, { "epoch": 114.0955223880597, "grad_norm": 23.132400512695312, "learning_rate": 9.674603174603176e-06, "loss": 43.0372, "step": 4792 }, { "epoch": 114.11940298507463, "grad_norm": 25.199525833129883, "learning_rate": 9.672619047619049e-06, "loss": 41.5403, "step": 4793 }, { "epoch": 114.14328358208955, "grad_norm": 23.17612075805664, "learning_rate": 9.670634920634921e-06, "loss": 41.0863, "step": 4794 }, { "epoch": 114.16716417910447, "grad_norm": 23.930667877197266, "learning_rate": 9.668650793650794e-06, "loss": 40.8035, "step": 4795 }, { "epoch": 114.1910447761194, "grad_norm": 23.487939834594727, "learning_rate": 9.666666666666667e-06, "loss": 39.6217, "step": 4796 }, { "epoch": 114.21492537313434, "grad_norm": 23.342439651489258, "learning_rate": 9.664682539682541e-06, "loss": 42.0502, "step": 4797 }, { "epoch": 114.23880597014926, "grad_norm": 25.328317642211914, "learning_rate": 9.662698412698414e-06, "loss": 40.3101, "step": 4798 }, { "epoch": 114.26268656716418, "grad_norm": 18.363313674926758, "learning_rate": 9.660714285714287e-06, "loss": 40.5746, "step": 4799 }, { "epoch": 114.2865671641791, "grad_norm": 24.081649780273438, "learning_rate": 9.65873015873016e-06, "loss": 42.0376, "step": 4800 }, { "epoch": 114.31044776119403, "grad_norm": 20.24997329711914, "learning_rate": 9.656746031746032e-06, "loss": 40.5347, "step": 4801 }, { "epoch": 114.33432835820895, "grad_norm": 14.942011833190918, "learning_rate": 9.654761904761906e-06, "loss": 41.7814, "step": 4802 }, { "epoch": 114.35820895522389, "grad_norm": 22.662822723388672, "learning_rate": 9.652777777777779e-06, "loss": 41.767, "step": 4803 }, { "epoch": 114.38208955223881, "grad_norm": 19.27354621887207, "learning_rate": 9.650793650793652e-06, "loss": 40.7947, "step": 4804 }, { "epoch": 114.40597014925373, "grad_norm": 14.431193351745605, "learning_rate": 9.648809523809524e-06, "loss": 42.3785, "step": 4805 }, { "epoch": 114.42985074626866, "grad_norm": 15.706212043762207, "learning_rate": 9.646825396825397e-06, "loss": 42.0003, "step": 4806 }, { "epoch": 114.45373134328358, "grad_norm": 17.65169906616211, "learning_rate": 9.644841269841271e-06, "loss": 41.968, "step": 4807 }, { "epoch": 114.4776119402985, "grad_norm": 16.792739868164062, "learning_rate": 9.642857142857144e-06, "loss": 41.1987, "step": 4808 }, { "epoch": 114.50149253731344, "grad_norm": 20.06905746459961, "learning_rate": 9.640873015873017e-06, "loss": 41.0098, "step": 4809 }, { "epoch": 114.52537313432836, "grad_norm": 24.13865852355957, "learning_rate": 9.63888888888889e-06, "loss": 41.5633, "step": 4810 }, { "epoch": 114.54925373134328, "grad_norm": 16.85896873474121, "learning_rate": 9.636904761904762e-06, "loss": 41.7772, "step": 4811 }, { "epoch": 114.57313432835821, "grad_norm": 15.44628620147705, "learning_rate": 9.634920634920637e-06, "loss": 40.0732, "step": 4812 }, { "epoch": 114.59701492537313, "grad_norm": 18.970260620117188, "learning_rate": 9.63293650793651e-06, "loss": 42.318, "step": 4813 }, { "epoch": 114.62089552238805, "grad_norm": 16.574501037597656, "learning_rate": 9.630952380952382e-06, "loss": 40.0387, "step": 4814 }, { "epoch": 114.64477611940299, "grad_norm": 18.372955322265625, "learning_rate": 9.628968253968255e-06, "loss": 41.5759, "step": 4815 }, { "epoch": 114.66865671641791, "grad_norm": 21.253253936767578, "learning_rate": 9.626984126984127e-06, "loss": 40.2675, "step": 4816 }, { "epoch": 114.69253731343284, "grad_norm": 19.223817825317383, "learning_rate": 9.625e-06, "loss": 41.1779, "step": 4817 }, { "epoch": 114.71641791044776, "grad_norm": 17.391407012939453, "learning_rate": 9.623015873015875e-06, "loss": 40.9899, "step": 4818 }, { "epoch": 114.74029850746268, "grad_norm": 21.367889404296875, "learning_rate": 9.621031746031747e-06, "loss": 40.1854, "step": 4819 }, { "epoch": 114.7641791044776, "grad_norm": 21.202396392822266, "learning_rate": 9.61904761904762e-06, "loss": 41.5819, "step": 4820 }, { "epoch": 114.78805970149254, "grad_norm": 14.345793724060059, "learning_rate": 9.617063492063493e-06, "loss": 41.7843, "step": 4821 }, { "epoch": 114.81194029850747, "grad_norm": 16.483112335205078, "learning_rate": 9.615079365079365e-06, "loss": 40.9715, "step": 4822 }, { "epoch": 114.83582089552239, "grad_norm": 16.397315979003906, "learning_rate": 9.61309523809524e-06, "loss": 40.8702, "step": 4823 }, { "epoch": 114.85970149253731, "grad_norm": 14.784750938415527, "learning_rate": 9.611111111111112e-06, "loss": 40.5076, "step": 4824 }, { "epoch": 114.88358208955223, "grad_norm": 21.29036521911621, "learning_rate": 9.609126984126985e-06, "loss": 41.0657, "step": 4825 }, { "epoch": 114.90746268656716, "grad_norm": 19.237743377685547, "learning_rate": 9.607142857142858e-06, "loss": 40.7839, "step": 4826 }, { "epoch": 114.9313432835821, "grad_norm": 17.527833938598633, "learning_rate": 9.60515873015873e-06, "loss": 41.3853, "step": 4827 }, { "epoch": 114.95522388059702, "grad_norm": 16.477439880371094, "learning_rate": 9.603174603174605e-06, "loss": 41.3862, "step": 4828 }, { "epoch": 114.97910447761194, "grad_norm": 16.46197509765625, "learning_rate": 9.601190476190478e-06, "loss": 41.9143, "step": 4829 }, { "epoch": 115.0, "grad_norm": 18.8862361907959, "learning_rate": 9.59920634920635e-06, "loss": 36.444, "step": 4830 }, { "epoch": 115.02388059701492, "grad_norm": 22.985044479370117, "learning_rate": 9.597222222222223e-06, "loss": 41.3098, "step": 4831 }, { "epoch": 115.04776119402985, "grad_norm": 17.263700485229492, "learning_rate": 9.595238095238096e-06, "loss": 41.2013, "step": 4832 }, { "epoch": 115.07164179104478, "grad_norm": 21.497802734375, "learning_rate": 9.59325396825397e-06, "loss": 40.4798, "step": 4833 }, { "epoch": 115.0955223880597, "grad_norm": 20.014450073242188, "learning_rate": 9.591269841269843e-06, "loss": 41.2098, "step": 4834 }, { "epoch": 115.11940298507463, "grad_norm": 18.972618103027344, "learning_rate": 9.589285714285716e-06, "loss": 41.7606, "step": 4835 }, { "epoch": 115.14328358208955, "grad_norm": 14.9144287109375, "learning_rate": 9.587301587301588e-06, "loss": 40.7529, "step": 4836 }, { "epoch": 115.16716417910447, "grad_norm": 24.37519073486328, "learning_rate": 9.585317460317461e-06, "loss": 41.7598, "step": 4837 }, { "epoch": 115.1910447761194, "grad_norm": 23.033283233642578, "learning_rate": 9.583333333333335e-06, "loss": 41.4316, "step": 4838 }, { "epoch": 115.21492537313434, "grad_norm": 20.98251724243164, "learning_rate": 9.581349206349208e-06, "loss": 40.3066, "step": 4839 }, { "epoch": 115.23880597014926, "grad_norm": 21.950714111328125, "learning_rate": 9.57936507936508e-06, "loss": 40.1732, "step": 4840 }, { "epoch": 115.26268656716418, "grad_norm": 22.479713439941406, "learning_rate": 9.577380952380953e-06, "loss": 41.586, "step": 4841 }, { "epoch": 115.2865671641791, "grad_norm": 16.739639282226562, "learning_rate": 9.575396825396826e-06, "loss": 42.143, "step": 4842 }, { "epoch": 115.31044776119403, "grad_norm": 23.182594299316406, "learning_rate": 9.573412698412699e-06, "loss": 42.4852, "step": 4843 }, { "epoch": 115.33432835820895, "grad_norm": 23.18885040283203, "learning_rate": 9.571428571428573e-06, "loss": 40.3618, "step": 4844 }, { "epoch": 115.35820895522389, "grad_norm": 15.238030433654785, "learning_rate": 9.569444444444446e-06, "loss": 41.3859, "step": 4845 }, { "epoch": 115.38208955223881, "grad_norm": 28.07355308532715, "learning_rate": 9.567460317460319e-06, "loss": 41.1147, "step": 4846 }, { "epoch": 115.40597014925373, "grad_norm": 21.76200294494629, "learning_rate": 9.565476190476191e-06, "loss": 41.6603, "step": 4847 }, { "epoch": 115.42985074626866, "grad_norm": 32.459312438964844, "learning_rate": 9.563492063492064e-06, "loss": 40.7283, "step": 4848 }, { "epoch": 115.45373134328358, "grad_norm": 22.368288040161133, "learning_rate": 9.561507936507938e-06, "loss": 40.4951, "step": 4849 }, { "epoch": 115.4776119402985, "grad_norm": 22.91469955444336, "learning_rate": 9.559523809523811e-06, "loss": 41.117, "step": 4850 }, { "epoch": 115.50149253731344, "grad_norm": 20.357376098632812, "learning_rate": 9.557539682539684e-06, "loss": 41.753, "step": 4851 }, { "epoch": 115.52537313432836, "grad_norm": 21.377849578857422, "learning_rate": 9.555555555555556e-06, "loss": 41.8999, "step": 4852 }, { "epoch": 115.54925373134328, "grad_norm": 33.38006591796875, "learning_rate": 9.55357142857143e-06, "loss": 41.1317, "step": 4853 }, { "epoch": 115.57313432835821, "grad_norm": 21.435209274291992, "learning_rate": 9.551587301587304e-06, "loss": 40.1686, "step": 4854 }, { "epoch": 115.59701492537313, "grad_norm": 31.958423614501953, "learning_rate": 9.549603174603176e-06, "loss": 42.572, "step": 4855 }, { "epoch": 115.62089552238805, "grad_norm": 21.460599899291992, "learning_rate": 9.547619047619049e-06, "loss": 40.5071, "step": 4856 }, { "epoch": 115.64477611940299, "grad_norm": 33.65336227416992, "learning_rate": 9.545634920634922e-06, "loss": 41.7753, "step": 4857 }, { "epoch": 115.66865671641791, "grad_norm": 23.594022750854492, "learning_rate": 9.543650793650794e-06, "loss": 41.4436, "step": 4858 }, { "epoch": 115.69253731343284, "grad_norm": 23.563594818115234, "learning_rate": 9.541666666666669e-06, "loss": 39.9414, "step": 4859 }, { "epoch": 115.71641791044776, "grad_norm": 24.98297882080078, "learning_rate": 9.539682539682541e-06, "loss": 40.8619, "step": 4860 }, { "epoch": 115.74029850746268, "grad_norm": 22.393163681030273, "learning_rate": 9.537698412698414e-06, "loss": 42.8338, "step": 4861 }, { "epoch": 115.7641791044776, "grad_norm": 30.07286834716797, "learning_rate": 9.535714285714287e-06, "loss": 41.2226, "step": 4862 }, { "epoch": 115.78805970149254, "grad_norm": 22.388198852539062, "learning_rate": 9.53373015873016e-06, "loss": 41.1935, "step": 4863 }, { "epoch": 115.81194029850747, "grad_norm": 33.4913215637207, "learning_rate": 9.531746031746032e-06, "loss": 42.5784, "step": 4864 }, { "epoch": 115.83582089552239, "grad_norm": 25.117082595825195, "learning_rate": 9.529761904761905e-06, "loss": 39.364, "step": 4865 }, { "epoch": 115.85970149253731, "grad_norm": 37.31660079956055, "learning_rate": 9.527777777777778e-06, "loss": 41.5319, "step": 4866 }, { "epoch": 115.88358208955223, "grad_norm": 28.936159133911133, "learning_rate": 9.52579365079365e-06, "loss": 41.757, "step": 4867 }, { "epoch": 115.90746268656716, "grad_norm": 34.599647521972656, "learning_rate": 9.523809523809525e-06, "loss": 41.6518, "step": 4868 }, { "epoch": 115.9313432835821, "grad_norm": 27.539873123168945, "learning_rate": 9.521825396825397e-06, "loss": 40.9794, "step": 4869 }, { "epoch": 115.95522388059702, "grad_norm": 37.74484634399414, "learning_rate": 9.51984126984127e-06, "loss": 40.8585, "step": 4870 }, { "epoch": 115.97910447761194, "grad_norm": 32.444847106933594, "learning_rate": 9.517857142857143e-06, "loss": 41.7152, "step": 4871 }, { "epoch": 116.0, "grad_norm": 32.239253997802734, "learning_rate": 9.515873015873016e-06, "loss": 35.2825, "step": 4872 }, { "epoch": 116.02388059701492, "grad_norm": 35.12287521362305, "learning_rate": 9.51388888888889e-06, "loss": 41.7451, "step": 4873 }, { "epoch": 116.04776119402985, "grad_norm": 28.03133773803711, "learning_rate": 9.511904761904763e-06, "loss": 40.8461, "step": 4874 }, { "epoch": 116.07164179104478, "grad_norm": 25.59912872314453, "learning_rate": 9.509920634920635e-06, "loss": 41.5307, "step": 4875 }, { "epoch": 116.0955223880597, "grad_norm": 31.361936569213867, "learning_rate": 9.507936507936508e-06, "loss": 41.9054, "step": 4876 }, { "epoch": 116.11940298507463, "grad_norm": 21.869449615478516, "learning_rate": 9.50595238095238e-06, "loss": 40.38, "step": 4877 }, { "epoch": 116.14328358208955, "grad_norm": 38.86557388305664, "learning_rate": 9.503968253968255e-06, "loss": 42.0518, "step": 4878 }, { "epoch": 116.16716417910447, "grad_norm": 31.712495803833008, "learning_rate": 9.501984126984128e-06, "loss": 40.2141, "step": 4879 }, { "epoch": 116.1910447761194, "grad_norm": 34.77455520629883, "learning_rate": 9.5e-06, "loss": 41.5116, "step": 4880 }, { "epoch": 116.21492537313434, "grad_norm": 28.530269622802734, "learning_rate": 9.498015873015873e-06, "loss": 40.6907, "step": 4881 }, { "epoch": 116.23880597014926, "grad_norm": 28.550081253051758, "learning_rate": 9.496031746031746e-06, "loss": 41.0168, "step": 4882 }, { "epoch": 116.26268656716418, "grad_norm": 28.081035614013672, "learning_rate": 9.494047619047619e-06, "loss": 42.3482, "step": 4883 }, { "epoch": 116.2865671641791, "grad_norm": 39.402713775634766, "learning_rate": 9.492063492063493e-06, "loss": 41.3423, "step": 4884 }, { "epoch": 116.31044776119403, "grad_norm": 30.37664794921875, "learning_rate": 9.490079365079366e-06, "loss": 41.0571, "step": 4885 }, { "epoch": 116.33432835820895, "grad_norm": 33.314979553222656, "learning_rate": 9.488095238095238e-06, "loss": 41.7844, "step": 4886 }, { "epoch": 116.35820895522389, "grad_norm": 31.91356658935547, "learning_rate": 9.486111111111111e-06, "loss": 42.6115, "step": 4887 }, { "epoch": 116.38208955223881, "grad_norm": 33.23076629638672, "learning_rate": 9.484126984126984e-06, "loss": 42.9912, "step": 4888 }, { "epoch": 116.40597014925373, "grad_norm": 33.23727798461914, "learning_rate": 9.482142857142858e-06, "loss": 40.2839, "step": 4889 }, { "epoch": 116.42985074626866, "grad_norm": 34.349090576171875, "learning_rate": 9.480158730158731e-06, "loss": 41.3853, "step": 4890 }, { "epoch": 116.45373134328358, "grad_norm": 28.603391647338867, "learning_rate": 9.478174603174604e-06, "loss": 41.8607, "step": 4891 }, { "epoch": 116.4776119402985, "grad_norm": 30.6513671875, "learning_rate": 9.476190476190476e-06, "loss": 40.6123, "step": 4892 }, { "epoch": 116.50149253731344, "grad_norm": 26.542037963867188, "learning_rate": 9.474206349206349e-06, "loss": 40.7056, "step": 4893 }, { "epoch": 116.52537313432836, "grad_norm": 33.709774017333984, "learning_rate": 9.472222222222223e-06, "loss": 41.8717, "step": 4894 }, { "epoch": 116.54925373134328, "grad_norm": 29.847158432006836, "learning_rate": 9.470238095238096e-06, "loss": 39.7896, "step": 4895 }, { "epoch": 116.57313432835821, "grad_norm": 29.366252899169922, "learning_rate": 9.468253968253969e-06, "loss": 40.6317, "step": 4896 }, { "epoch": 116.59701492537313, "grad_norm": 27.17310905456543, "learning_rate": 9.466269841269841e-06, "loss": 41.57, "step": 4897 }, { "epoch": 116.62089552238805, "grad_norm": 29.52984619140625, "learning_rate": 9.464285714285714e-06, "loss": 41.313, "step": 4898 }, { "epoch": 116.64477611940299, "grad_norm": 25.72901725769043, "learning_rate": 9.462301587301589e-06, "loss": 39.4479, "step": 4899 }, { "epoch": 116.66865671641791, "grad_norm": 36.030372619628906, "learning_rate": 9.460317460317461e-06, "loss": 41.6829, "step": 4900 }, { "epoch": 116.69253731343284, "grad_norm": 30.29513168334961, "learning_rate": 9.458333333333334e-06, "loss": 41.8183, "step": 4901 }, { "epoch": 116.71641791044776, "grad_norm": 28.564956665039062, "learning_rate": 9.456349206349207e-06, "loss": 41.1474, "step": 4902 }, { "epoch": 116.74029850746268, "grad_norm": 24.22428321838379, "learning_rate": 9.45436507936508e-06, "loss": 41.2769, "step": 4903 }, { "epoch": 116.7641791044776, "grad_norm": 27.916051864624023, "learning_rate": 9.452380952380952e-06, "loss": 40.8082, "step": 4904 }, { "epoch": 116.78805970149254, "grad_norm": 20.302335739135742, "learning_rate": 9.450396825396826e-06, "loss": 41.0273, "step": 4905 }, { "epoch": 116.81194029850747, "grad_norm": 32.881134033203125, "learning_rate": 9.4484126984127e-06, "loss": 41.9168, "step": 4906 }, { "epoch": 116.83582089552239, "grad_norm": 26.058923721313477, "learning_rate": 9.446428571428572e-06, "loss": 41.0683, "step": 4907 }, { "epoch": 116.85970149253731, "grad_norm": 34.14630889892578, "learning_rate": 9.444444444444445e-06, "loss": 40.9509, "step": 4908 }, { "epoch": 116.88358208955223, "grad_norm": 31.35688018798828, "learning_rate": 9.442460317460317e-06, "loss": 40.551, "step": 4909 }, { "epoch": 116.90746268656716, "grad_norm": 24.473339080810547, "learning_rate": 9.440476190476192e-06, "loss": 39.3649, "step": 4910 }, { "epoch": 116.9313432835821, "grad_norm": 21.814205169677734, "learning_rate": 9.438492063492064e-06, "loss": 40.4577, "step": 4911 }, { "epoch": 116.95522388059702, "grad_norm": 29.724409103393555, "learning_rate": 9.436507936507937e-06, "loss": 40.6152, "step": 4912 }, { "epoch": 116.97910447761194, "grad_norm": 24.086170196533203, "learning_rate": 9.43452380952381e-06, "loss": 41.106, "step": 4913 }, { "epoch": 117.0, "grad_norm": 28.476037979125977, "learning_rate": 9.432539682539682e-06, "loss": 36.447, "step": 4914 }, { "epoch": 117.02388059701492, "grad_norm": 27.55150032043457, "learning_rate": 9.430555555555557e-06, "loss": 41.9582, "step": 4915 }, { "epoch": 117.04776119402985, "grad_norm": 28.565845489501953, "learning_rate": 9.42857142857143e-06, "loss": 40.9572, "step": 4916 }, { "epoch": 117.07164179104478, "grad_norm": 24.59885025024414, "learning_rate": 9.426587301587302e-06, "loss": 41.2797, "step": 4917 }, { "epoch": 117.0955223880597, "grad_norm": 21.83265495300293, "learning_rate": 9.424603174603175e-06, "loss": 41.1726, "step": 4918 }, { "epoch": 117.11940298507463, "grad_norm": 21.117053985595703, "learning_rate": 9.422619047619048e-06, "loss": 42.4423, "step": 4919 }, { "epoch": 117.14328358208955, "grad_norm": 26.478992462158203, "learning_rate": 9.420634920634922e-06, "loss": 40.2709, "step": 4920 }, { "epoch": 117.16716417910447, "grad_norm": 20.61237335205078, "learning_rate": 9.418650793650795e-06, "loss": 40.8788, "step": 4921 }, { "epoch": 117.1910447761194, "grad_norm": 32.1706657409668, "learning_rate": 9.416666666666667e-06, "loss": 41.6381, "step": 4922 }, { "epoch": 117.21492537313434, "grad_norm": 26.040164947509766, "learning_rate": 9.41468253968254e-06, "loss": 40.662, "step": 4923 }, { "epoch": 117.23880597014926, "grad_norm": 27.465307235717773, "learning_rate": 9.412698412698413e-06, "loss": 39.2348, "step": 4924 }, { "epoch": 117.26268656716418, "grad_norm": 28.407739639282227, "learning_rate": 9.410714285714286e-06, "loss": 40.981, "step": 4925 }, { "epoch": 117.2865671641791, "grad_norm": 26.080398559570312, "learning_rate": 9.40873015873016e-06, "loss": 39.726, "step": 4926 }, { "epoch": 117.31044776119403, "grad_norm": 23.23761749267578, "learning_rate": 9.406746031746033e-06, "loss": 41.9898, "step": 4927 }, { "epoch": 117.33432835820895, "grad_norm": 25.763086318969727, "learning_rate": 9.404761904761905e-06, "loss": 41.6503, "step": 4928 }, { "epoch": 117.35820895522389, "grad_norm": 25.27565574645996, "learning_rate": 9.402777777777778e-06, "loss": 41.5848, "step": 4929 }, { "epoch": 117.38208955223881, "grad_norm": 21.535991668701172, "learning_rate": 9.40079365079365e-06, "loss": 41.4816, "step": 4930 }, { "epoch": 117.40597014925373, "grad_norm": 20.212120056152344, "learning_rate": 9.398809523809525e-06, "loss": 40.8427, "step": 4931 }, { "epoch": 117.42985074626866, "grad_norm": 24.479822158813477, "learning_rate": 9.396825396825398e-06, "loss": 41.3141, "step": 4932 }, { "epoch": 117.45373134328358, "grad_norm": 14.332042694091797, "learning_rate": 9.39484126984127e-06, "loss": 41.4974, "step": 4933 }, { "epoch": 117.4776119402985, "grad_norm": 22.84208869934082, "learning_rate": 9.392857142857143e-06, "loss": 41.8713, "step": 4934 }, { "epoch": 117.50149253731344, "grad_norm": 18.916187286376953, "learning_rate": 9.390873015873016e-06, "loss": 41.2954, "step": 4935 }, { "epoch": 117.52537313432836, "grad_norm": 22.096107482910156, "learning_rate": 9.38888888888889e-06, "loss": 40.7045, "step": 4936 }, { "epoch": 117.54925373134328, "grad_norm": 20.42098045349121, "learning_rate": 9.386904761904763e-06, "loss": 42.4039, "step": 4937 }, { "epoch": 117.57313432835821, "grad_norm": 19.17930793762207, "learning_rate": 9.384920634920636e-06, "loss": 41.2849, "step": 4938 }, { "epoch": 117.59701492537313, "grad_norm": 18.003908157348633, "learning_rate": 9.382936507936508e-06, "loss": 41.2694, "step": 4939 }, { "epoch": 117.62089552238805, "grad_norm": 21.67378044128418, "learning_rate": 9.380952380952381e-06, "loss": 41.4086, "step": 4940 }, { "epoch": 117.64477611940299, "grad_norm": 14.220067024230957, "learning_rate": 9.378968253968255e-06, "loss": 40.5293, "step": 4941 }, { "epoch": 117.66865671641791, "grad_norm": 17.12972640991211, "learning_rate": 9.376984126984128e-06, "loss": 40.7469, "step": 4942 }, { "epoch": 117.69253731343284, "grad_norm": 21.055694580078125, "learning_rate": 9.375000000000001e-06, "loss": 39.6643, "step": 4943 }, { "epoch": 117.71641791044776, "grad_norm": 17.032026290893555, "learning_rate": 9.373015873015874e-06, "loss": 39.6835, "step": 4944 }, { "epoch": 117.74029850746268, "grad_norm": 22.909225463867188, "learning_rate": 9.371031746031746e-06, "loss": 41.411, "step": 4945 }, { "epoch": 117.7641791044776, "grad_norm": 15.6399564743042, "learning_rate": 9.36904761904762e-06, "loss": 41.0838, "step": 4946 }, { "epoch": 117.78805970149254, "grad_norm": 22.99868392944336, "learning_rate": 9.367063492063493e-06, "loss": 41.1988, "step": 4947 }, { "epoch": 117.81194029850747, "grad_norm": 19.78955841064453, "learning_rate": 9.365079365079366e-06, "loss": 41.5181, "step": 4948 }, { "epoch": 117.83582089552239, "grad_norm": 21.281328201293945, "learning_rate": 9.363095238095239e-06, "loss": 40.5115, "step": 4949 }, { "epoch": 117.85970149253731, "grad_norm": 19.100648880004883, "learning_rate": 9.361111111111111e-06, "loss": 40.3604, "step": 4950 }, { "epoch": 117.88358208955223, "grad_norm": 24.486183166503906, "learning_rate": 9.359126984126984e-06, "loss": 42.065, "step": 4951 }, { "epoch": 117.90746268656716, "grad_norm": 20.265453338623047, "learning_rate": 9.357142857142859e-06, "loss": 42.1137, "step": 4952 }, { "epoch": 117.9313432835821, "grad_norm": 21.281848907470703, "learning_rate": 9.355158730158731e-06, "loss": 42.0899, "step": 4953 }, { "epoch": 117.95522388059702, "grad_norm": 21.65452766418457, "learning_rate": 9.353174603174604e-06, "loss": 41.4076, "step": 4954 }, { "epoch": 117.97910447761194, "grad_norm": 19.85662841796875, "learning_rate": 9.351190476190477e-06, "loss": 40.9143, "step": 4955 }, { "epoch": 118.0, "grad_norm": 16.60548210144043, "learning_rate": 9.34920634920635e-06, "loss": 35.2268, "step": 4956 }, { "epoch": 118.02388059701492, "grad_norm": 19.02985382080078, "learning_rate": 9.347222222222224e-06, "loss": 41.6227, "step": 4957 }, { "epoch": 118.04776119402985, "grad_norm": 20.057069778442383, "learning_rate": 9.345238095238096e-06, "loss": 39.6729, "step": 4958 }, { "epoch": 118.07164179104478, "grad_norm": 16.330196380615234, "learning_rate": 9.343253968253969e-06, "loss": 41.2542, "step": 4959 }, { "epoch": 118.0955223880597, "grad_norm": 18.172393798828125, "learning_rate": 9.341269841269842e-06, "loss": 40.0607, "step": 4960 }, { "epoch": 118.11940298507463, "grad_norm": 20.96540069580078, "learning_rate": 9.339285714285715e-06, "loss": 40.585, "step": 4961 }, { "epoch": 118.14328358208955, "grad_norm": 14.967394828796387, "learning_rate": 9.337301587301589e-06, "loss": 40.2613, "step": 4962 }, { "epoch": 118.16716417910447, "grad_norm": 18.953601837158203, "learning_rate": 9.335317460317462e-06, "loss": 39.38, "step": 4963 }, { "epoch": 118.1910447761194, "grad_norm": 15.904739379882812, "learning_rate": 9.333333333333334e-06, "loss": 41.6314, "step": 4964 }, { "epoch": 118.21492537313434, "grad_norm": 24.293170928955078, "learning_rate": 9.331349206349207e-06, "loss": 40.5077, "step": 4965 }, { "epoch": 118.23880597014926, "grad_norm": 20.04494857788086, "learning_rate": 9.32936507936508e-06, "loss": 40.8951, "step": 4966 }, { "epoch": 118.26268656716418, "grad_norm": 23.613727569580078, "learning_rate": 9.327380952380954e-06, "loss": 42.0233, "step": 4967 }, { "epoch": 118.2865671641791, "grad_norm": 23.967741012573242, "learning_rate": 9.325396825396827e-06, "loss": 41.0547, "step": 4968 }, { "epoch": 118.31044776119403, "grad_norm": 19.54030418395996, "learning_rate": 9.3234126984127e-06, "loss": 41.2887, "step": 4969 }, { "epoch": 118.33432835820895, "grad_norm": 23.12442398071289, "learning_rate": 9.321428571428572e-06, "loss": 40.5083, "step": 4970 }, { "epoch": 118.35820895522389, "grad_norm": 21.34069061279297, "learning_rate": 9.319444444444445e-06, "loss": 41.3474, "step": 4971 }, { "epoch": 118.38208955223881, "grad_norm": 20.411256790161133, "learning_rate": 9.317460317460318e-06, "loss": 40.3927, "step": 4972 }, { "epoch": 118.40597014925373, "grad_norm": 21.702983856201172, "learning_rate": 9.315476190476192e-06, "loss": 41.2522, "step": 4973 }, { "epoch": 118.42985074626866, "grad_norm": 20.09593963623047, "learning_rate": 9.313492063492065e-06, "loss": 40.8607, "step": 4974 }, { "epoch": 118.45373134328358, "grad_norm": 16.693893432617188, "learning_rate": 9.311507936507937e-06, "loss": 41.9847, "step": 4975 }, { "epoch": 118.4776119402985, "grad_norm": 16.682085037231445, "learning_rate": 9.30952380952381e-06, "loss": 41.3428, "step": 4976 }, { "epoch": 118.50149253731344, "grad_norm": 16.73056983947754, "learning_rate": 9.307539682539683e-06, "loss": 40.8279, "step": 4977 }, { "epoch": 118.52537313432836, "grad_norm": 16.317480087280273, "learning_rate": 9.305555555555557e-06, "loss": 40.4602, "step": 4978 }, { "epoch": 118.54925373134328, "grad_norm": 15.660470008850098, "learning_rate": 9.30357142857143e-06, "loss": 40.7565, "step": 4979 }, { "epoch": 118.57313432835821, "grad_norm": 21.601036071777344, "learning_rate": 9.301587301587303e-06, "loss": 41.7317, "step": 4980 }, { "epoch": 118.59701492537313, "grad_norm": 16.545438766479492, "learning_rate": 9.299603174603175e-06, "loss": 42.1659, "step": 4981 }, { "epoch": 118.62089552238805, "grad_norm": 20.3563175201416, "learning_rate": 9.297619047619048e-06, "loss": 39.8948, "step": 4982 }, { "epoch": 118.64477611940299, "grad_norm": 19.03108024597168, "learning_rate": 9.295634920634922e-06, "loss": 40.6225, "step": 4983 }, { "epoch": 118.66865671641791, "grad_norm": 18.866544723510742, "learning_rate": 9.293650793650795e-06, "loss": 40.781, "step": 4984 }, { "epoch": 118.69253731343284, "grad_norm": 18.367883682250977, "learning_rate": 9.291666666666668e-06, "loss": 42.1775, "step": 4985 }, { "epoch": 118.71641791044776, "grad_norm": 17.574983596801758, "learning_rate": 9.28968253968254e-06, "loss": 40.7228, "step": 4986 }, { "epoch": 118.74029850746268, "grad_norm": 17.931612014770508, "learning_rate": 9.287698412698413e-06, "loss": 41.352, "step": 4987 }, { "epoch": 118.7641791044776, "grad_norm": NaN, "learning_rate": 9.285714285714288e-06, "loss": 37.2747, "step": 4988 }, { "epoch": 118.78805970149254, "grad_norm": 19.131587982177734, "learning_rate": 9.285714285714288e-06, "loss": 41.442, "step": 4989 }, { "epoch": 118.81194029850747, "grad_norm": 19.01002311706543, "learning_rate": 9.28373015873016e-06, "loss": 40.1583, "step": 4990 }, { "epoch": 118.83582089552239, "grad_norm": 20.718921661376953, "learning_rate": 9.281746031746033e-06, "loss": 42.1721, "step": 4991 }, { "epoch": 118.85970149253731, "grad_norm": 24.149545669555664, "learning_rate": 9.279761904761906e-06, "loss": 39.6434, "step": 4992 }, { "epoch": 118.88358208955223, "grad_norm": 19.575162887573242, "learning_rate": 9.277777777777778e-06, "loss": 41.7524, "step": 4993 }, { "epoch": 118.90746268656716, "grad_norm": 21.472047805786133, "learning_rate": 9.275793650793653e-06, "loss": 41.5381, "step": 4994 }, { "epoch": 118.9313432835821, "grad_norm": 18.96376609802246, "learning_rate": 9.273809523809525e-06, "loss": 41.8712, "step": 4995 }, { "epoch": 118.95522388059702, "grad_norm": 20.816585540771484, "learning_rate": 9.271825396825398e-06, "loss": 42.7263, "step": 4996 }, { "epoch": 118.97910447761194, "grad_norm": 18.856704711914062, "learning_rate": 9.26984126984127e-06, "loss": 42.2396, "step": 4997 }, { "epoch": 119.0, "grad_norm": 17.700910568237305, "learning_rate": 9.267857142857144e-06, "loss": 35.0377, "step": 4998 }, { "epoch": 119.02388059701492, "grad_norm": 18.852880477905273, "learning_rate": 9.265873015873016e-06, "loss": 40.1171, "step": 4999 }, { "epoch": 119.04776119402985, "grad_norm": 17.4823055267334, "learning_rate": 9.26388888888889e-06, "loss": 39.7783, "step": 5000 }, { "epoch": 119.07164179104478, "grad_norm": 22.45401954650879, "learning_rate": 9.261904761904763e-06, "loss": 41.6926, "step": 5001 }, { "epoch": 119.0955223880597, "grad_norm": 19.38802719116211, "learning_rate": 9.259920634920636e-06, "loss": 41.0149, "step": 5002 }, { "epoch": 119.11940298507463, "grad_norm": 18.921022415161133, "learning_rate": 9.257936507936509e-06, "loss": 41.2486, "step": 5003 }, { "epoch": 119.14328358208955, "grad_norm": 22.00980567932129, "learning_rate": 9.255952380952381e-06, "loss": 40.8794, "step": 5004 }, { "epoch": 119.16716417910447, "grad_norm": 13.831929206848145, "learning_rate": 9.253968253968256e-06, "loss": 40.3292, "step": 5005 }, { "epoch": 119.1910447761194, "grad_norm": 20.504989624023438, "learning_rate": 9.251984126984129e-06, "loss": 41.5119, "step": 5006 }, { "epoch": 119.21492537313434, "grad_norm": 15.127291679382324, "learning_rate": 9.250000000000001e-06, "loss": 40.214, "step": 5007 }, { "epoch": 119.23880597014926, "grad_norm": 18.562606811523438, "learning_rate": 9.248015873015874e-06, "loss": 41.0757, "step": 5008 }, { "epoch": 119.26268656716418, "grad_norm": 20.99079132080078, "learning_rate": 9.246031746031747e-06, "loss": 41.3658, "step": 5009 }, { "epoch": 119.2865671641791, "grad_norm": 17.714588165283203, "learning_rate": 9.244047619047621e-06, "loss": 41.8379, "step": 5010 }, { "epoch": 119.31044776119403, "grad_norm": 20.95669174194336, "learning_rate": 9.242063492063494e-06, "loss": 40.6619, "step": 5011 }, { "epoch": 119.33432835820895, "grad_norm": 18.291975021362305, "learning_rate": 9.240079365079366e-06, "loss": 38.9992, "step": 5012 }, { "epoch": 119.35820895522389, "grad_norm": 14.831878662109375, "learning_rate": 9.238095238095239e-06, "loss": 41.5072, "step": 5013 }, { "epoch": 119.38208955223881, "grad_norm": 17.76835823059082, "learning_rate": 9.236111111111112e-06, "loss": 41.0227, "step": 5014 }, { "epoch": 119.40597014925373, "grad_norm": 15.433774948120117, "learning_rate": 9.234126984126986e-06, "loss": 40.4539, "step": 5015 }, { "epoch": 119.42985074626866, "grad_norm": 23.18012237548828, "learning_rate": 9.232142857142859e-06, "loss": 41.8991, "step": 5016 }, { "epoch": 119.45373134328358, "grad_norm": 17.35015106201172, "learning_rate": 9.230158730158732e-06, "loss": 40.189, "step": 5017 }, { "epoch": 119.4776119402985, "grad_norm": 19.60420036315918, "learning_rate": 9.228174603174604e-06, "loss": 41.602, "step": 5018 }, { "epoch": 119.50149253731344, "grad_norm": 20.470211029052734, "learning_rate": 9.226190476190477e-06, "loss": 42.1062, "step": 5019 }, { "epoch": 119.52537313432836, "grad_norm": 16.949901580810547, "learning_rate": 9.22420634920635e-06, "loss": 41.6508, "step": 5020 }, { "epoch": 119.54925373134328, "grad_norm": 22.598966598510742, "learning_rate": 9.222222222222224e-06, "loss": 39.9819, "step": 5021 }, { "epoch": 119.57313432835821, "grad_norm": 16.502370834350586, "learning_rate": 9.220238095238097e-06, "loss": 40.1142, "step": 5022 }, { "epoch": 119.59701492537313, "grad_norm": 20.456647872924805, "learning_rate": 9.218253968253968e-06, "loss": 41.6525, "step": 5023 }, { "epoch": 119.62089552238805, "grad_norm": 18.311965942382812, "learning_rate": 9.216269841269842e-06, "loss": 41.1592, "step": 5024 }, { "epoch": 119.64477611940299, "grad_norm": 19.683259963989258, "learning_rate": 9.214285714285715e-06, "loss": 40.853, "step": 5025 }, { "epoch": 119.66865671641791, "grad_norm": 20.134082794189453, "learning_rate": 9.212301587301588e-06, "loss": 40.3045, "step": 5026 }, { "epoch": 119.69253731343284, "grad_norm": 28.281267166137695, "learning_rate": 9.21031746031746e-06, "loss": 41.6703, "step": 5027 }, { "epoch": 119.71641791044776, "grad_norm": 22.25422477722168, "learning_rate": 9.208333333333333e-06, "loss": 41.0012, "step": 5028 }, { "epoch": 119.74029850746268, "grad_norm": 15.698911666870117, "learning_rate": 9.206349206349207e-06, "loss": 39.3874, "step": 5029 }, { "epoch": 119.7641791044776, "grad_norm": 22.822614669799805, "learning_rate": 9.20436507936508e-06, "loss": 42.7782, "step": 5030 }, { "epoch": 119.78805970149254, "grad_norm": 18.489330291748047, "learning_rate": 9.202380952380953e-06, "loss": 42.2175, "step": 5031 }, { "epoch": 119.81194029850747, "grad_norm": 23.18742561340332, "learning_rate": 9.200396825396825e-06, "loss": 42.1583, "step": 5032 }, { "epoch": 119.83582089552239, "grad_norm": 24.11537742614746, "learning_rate": 9.198412698412698e-06, "loss": 40.783, "step": 5033 }, { "epoch": 119.85970149253731, "grad_norm": 16.897441864013672, "learning_rate": 9.196428571428571e-06, "loss": 40.3459, "step": 5034 }, { "epoch": 119.88358208955223, "grad_norm": 20.22298812866211, "learning_rate": 9.194444444444445e-06, "loss": 40.9984, "step": 5035 }, { "epoch": 119.90746268656716, "grad_norm": 19.373756408691406, "learning_rate": 9.192460317460318e-06, "loss": 41.8363, "step": 5036 }, { "epoch": 119.9313432835821, "grad_norm": 16.265701293945312, "learning_rate": 9.19047619047619e-06, "loss": 40.9217, "step": 5037 }, { "epoch": 119.95522388059702, "grad_norm": 28.902698516845703, "learning_rate": 9.188492063492063e-06, "loss": 41.7966, "step": 5038 }, { "epoch": 119.97910447761194, "grad_norm": 19.491430282592773, "learning_rate": 9.186507936507936e-06, "loss": 41.2973, "step": 5039 }, { "epoch": 120.0, "grad_norm": 25.749500274658203, "learning_rate": 9.18452380952381e-06, "loss": 35.3125, "step": 5040 }, { "epoch": 120.0, "step": 5040, "total_flos": 2.4776207925060864e+17, "train_loss": 3.4518184624021013, "train_runtime": 12809.9419, "train_samples_per_second": 50.136, "train_steps_per_second": 0.393 }, { "epoch": 120.02388059701492, "grad_norm": 24.0944766998291, "learning_rate": 1e-05, "loss": 41.0597, "step": 5041 }, { "epoch": 120.04776119402985, "grad_norm": Infinity, "learning_rate": 9.998168498168499e-06, "loss": 46.3783, "step": 5042 }, { "epoch": 120.07164179104478, "grad_norm": 259.0445861816406, "learning_rate": 9.998168498168499e-06, "loss": 46.5108, "step": 5043 }, { "epoch": 120.0955223880597, "grad_norm": 128.19775390625, "learning_rate": 9.996336996336997e-06, "loss": 45.0948, "step": 5044 }, { "epoch": 120.11940298507463, "grad_norm": 58.83436584472656, "learning_rate": 9.994505494505496e-06, "loss": 43.1635, "step": 5045 }, { "epoch": 120.14328358208955, "grad_norm": 58.79975891113281, "learning_rate": 9.992673992673994e-06, "loss": 41.6829, "step": 5046 }, { "epoch": 120.16716417910447, "grad_norm": 50.534278869628906, "learning_rate": 9.990842490842492e-06, "loss": 42.3871, "step": 5047 }, { "epoch": 120.1910447761194, "grad_norm": 38.682125091552734, "learning_rate": 9.98901098901099e-06, "loss": 40.9709, "step": 5048 }, { "epoch": 120.21492537313434, "grad_norm": 35.06442642211914, "learning_rate": 9.987179487179488e-06, "loss": 41.0217, "step": 5049 }, { "epoch": 120.23880597014926, "grad_norm": 59.00712585449219, "learning_rate": 9.985347985347986e-06, "loss": 41.7985, "step": 5050 }, { "epoch": 120.26268656716418, "grad_norm": 36.52231216430664, "learning_rate": 9.983516483516485e-06, "loss": 41.6886, "step": 5051 }, { "epoch": 120.2865671641791, "grad_norm": 35.213436126708984, "learning_rate": 9.981684981684983e-06, "loss": 40.9909, "step": 5052 }, { "epoch": 120.31044776119403, "grad_norm": 40.0443000793457, "learning_rate": 9.97985347985348e-06, "loss": 41.1657, "step": 5053 }, { "epoch": 120.33432835820895, "grad_norm": 27.66771697998047, "learning_rate": 9.978021978021979e-06, "loss": 41.327, "step": 5054 }, { "epoch": 120.35820895522389, "grad_norm": 34.4952507019043, "learning_rate": 9.976190476190477e-06, "loss": 40.8086, "step": 5055 }, { "epoch": 120.38208955223881, "grad_norm": 26.404708862304688, "learning_rate": 9.974358974358974e-06, "loss": 41.0862, "step": 5056 }, { "epoch": 120.40597014925373, "grad_norm": 24.669050216674805, "learning_rate": 9.972527472527474e-06, "loss": 40.6639, "step": 5057 }, { "epoch": 120.42985074626866, "grad_norm": 29.60878562927246, "learning_rate": 9.970695970695972e-06, "loss": 40.8127, "step": 5058 }, { "epoch": 120.45373134328358, "grad_norm": 17.245283126831055, "learning_rate": 9.96886446886447e-06, "loss": 41.6983, "step": 5059 }, { "epoch": 120.4776119402985, "grad_norm": 26.338546752929688, "learning_rate": 9.967032967032968e-06, "loss": 40.5917, "step": 5060 }, { "epoch": 120.50149253731344, "grad_norm": 25.838808059692383, "learning_rate": 9.965201465201466e-06, "loss": 41.6386, "step": 5061 }, { "epoch": 120.52537313432836, "grad_norm": 17.583539962768555, "learning_rate": 9.963369963369965e-06, "loss": 39.5372, "step": 5062 }, { "epoch": 120.54925373134328, "grad_norm": 29.433382034301758, "learning_rate": 9.961538461538463e-06, "loss": 41.2372, "step": 5063 }, { "epoch": 120.57313432835821, "grad_norm": 19.41893768310547, "learning_rate": 9.959706959706961e-06, "loss": 41.2464, "step": 5064 }, { "epoch": 120.59701492537313, "grad_norm": 20.060937881469727, "learning_rate": 9.957875457875459e-06, "loss": 41.1316, "step": 5065 }, { "epoch": 120.62089552238805, "grad_norm": 21.93149185180664, "learning_rate": 9.956043956043957e-06, "loss": 40.6738, "step": 5066 }, { "epoch": 120.64477611940299, "grad_norm": 20.02782440185547, "learning_rate": 9.954212454212454e-06, "loss": 41.0332, "step": 5067 }, { "epoch": 120.66865671641791, "grad_norm": 16.836517333984375, "learning_rate": 9.952380952380954e-06, "loss": 41.8322, "step": 5068 }, { "epoch": 120.69253731343284, "grad_norm": 19.467927932739258, "learning_rate": 9.950549450549452e-06, "loss": 42.0419, "step": 5069 }, { "epoch": 120.71641791044776, "grad_norm": 20.398895263671875, "learning_rate": 9.94871794871795e-06, "loss": 40.1522, "step": 5070 }, { "epoch": 120.74029850746268, "grad_norm": 17.445634841918945, "learning_rate": 9.946886446886448e-06, "loss": 41.1946, "step": 5071 }, { "epoch": 120.7641791044776, "grad_norm": 17.94610595703125, "learning_rate": 9.945054945054946e-06, "loss": 41.4025, "step": 5072 }, { "epoch": 120.78805970149254, "grad_norm": 25.02172088623047, "learning_rate": 9.943223443223443e-06, "loss": 42.0855, "step": 5073 }, { "epoch": 120.81194029850747, "grad_norm": 16.557662963867188, "learning_rate": 9.941391941391943e-06, "loss": 39.8862, "step": 5074 }, { "epoch": 120.83582089552239, "grad_norm": 19.688400268554688, "learning_rate": 9.939560439560441e-06, "loss": 40.9361, "step": 5075 }, { "epoch": 120.85970149253731, "grad_norm": 29.196117401123047, "learning_rate": 9.937728937728939e-06, "loss": 42.8812, "step": 5076 }, { "epoch": 120.88358208955223, "grad_norm": 17.111480712890625, "learning_rate": 9.935897435897437e-06, "loss": 41.4032, "step": 5077 }, { "epoch": 120.90746268656716, "grad_norm": 29.072128295898438, "learning_rate": 9.934065934065935e-06, "loss": 42.2839, "step": 5078 }, { "epoch": 120.9313432835821, "grad_norm": 24.953367233276367, "learning_rate": 9.932234432234434e-06, "loss": 41.5165, "step": 5079 }, { "epoch": 120.95522388059702, "grad_norm": 19.515911102294922, "learning_rate": 9.930402930402932e-06, "loss": 40.4111, "step": 5080 }, { "epoch": 120.97910447761194, "grad_norm": 23.281414031982422, "learning_rate": 9.92857142857143e-06, "loss": 40.3576, "step": 5081 }, { "epoch": 121.0, "grad_norm": 16.75458335876465, "learning_rate": 9.926739926739928e-06, "loss": 36.3203, "step": 5082 }, { "epoch": 121.02388059701492, "grad_norm": 29.20741844177246, "learning_rate": 9.924908424908426e-06, "loss": 39.9303, "step": 5083 }, { "epoch": 121.04776119402985, "grad_norm": 21.79246711730957, "learning_rate": 9.923076923076923e-06, "loss": 41.4785, "step": 5084 }, { "epoch": 121.07164179104478, "grad_norm": 29.117504119873047, "learning_rate": 9.921245421245423e-06, "loss": 41.8695, "step": 5085 }, { "epoch": 121.0955223880597, "grad_norm": 17.819120407104492, "learning_rate": 9.919413919413921e-06, "loss": 39.2762, "step": 5086 }, { "epoch": 121.11940298507463, "grad_norm": 24.556377410888672, "learning_rate": 9.917582417582419e-06, "loss": 41.5134, "step": 5087 }, { "epoch": 121.14328358208955, "grad_norm": 19.049671173095703, "learning_rate": 9.915750915750917e-06, "loss": 40.8369, "step": 5088 }, { "epoch": 121.16716417910447, "grad_norm": 20.745899200439453, "learning_rate": 9.913919413919415e-06, "loss": 41.4137, "step": 5089 }, { "epoch": 121.1910447761194, "grad_norm": 21.53566551208496, "learning_rate": 9.912087912087912e-06, "loss": 40.3688, "step": 5090 }, { "epoch": 121.21492537313434, "grad_norm": 23.52694320678711, "learning_rate": 9.910256410256412e-06, "loss": 41.1741, "step": 5091 }, { "epoch": 121.23880597014926, "grad_norm": 19.23663330078125, "learning_rate": 9.90842490842491e-06, "loss": 41.2629, "step": 5092 }, { "epoch": 121.26268656716418, "grad_norm": 20.38791847229004, "learning_rate": 9.906593406593408e-06, "loss": 40.6994, "step": 5093 }, { "epoch": 121.2865671641791, "grad_norm": 29.10164451599121, "learning_rate": 9.904761904761906e-06, "loss": 41.7159, "step": 5094 }, { "epoch": 121.31044776119403, "grad_norm": 18.191295623779297, "learning_rate": 9.902930402930403e-06, "loss": 40.0695, "step": 5095 }, { "epoch": 121.33432835820895, "grad_norm": 34.14667510986328, "learning_rate": 9.901098901098903e-06, "loss": 40.7836, "step": 5096 }, { "epoch": 121.35820895522389, "grad_norm": 25.464981079101562, "learning_rate": 9.899267399267401e-06, "loss": 40.5731, "step": 5097 }, { "epoch": 121.38208955223881, "grad_norm": 34.738773345947266, "learning_rate": 9.897435897435899e-06, "loss": 42.5079, "step": 5098 }, { "epoch": 121.40597014925373, "grad_norm": 24.047697067260742, "learning_rate": 9.895604395604397e-06, "loss": 41.9274, "step": 5099 }, { "epoch": 121.42985074626866, "grad_norm": 36.788326263427734, "learning_rate": 9.893772893772895e-06, "loss": 41.1378, "step": 5100 }, { "epoch": 121.45373134328358, "grad_norm": 26.662019729614258, "learning_rate": 9.891941391941392e-06, "loss": 41.4065, "step": 5101 }, { "epoch": 121.4776119402985, "grad_norm": 35.20701217651367, "learning_rate": 9.890109890109892e-06, "loss": 39.1299, "step": 5102 }, { "epoch": 121.50149253731344, "grad_norm": 29.675378799438477, "learning_rate": 9.88827838827839e-06, "loss": 41.0234, "step": 5103 }, { "epoch": 121.52537313432836, "grad_norm": 34.06852722167969, "learning_rate": 9.886446886446888e-06, "loss": 41.8632, "step": 5104 }, { "epoch": 121.54925373134328, "grad_norm": 25.621753692626953, "learning_rate": 9.884615384615386e-06, "loss": 40.9295, "step": 5105 }, { "epoch": 121.57313432835821, "grad_norm": 27.804433822631836, "learning_rate": 9.882783882783884e-06, "loss": 40.0458, "step": 5106 }, { "epoch": 121.59701492537313, "grad_norm": 26.332223892211914, "learning_rate": 9.880952380952381e-06, "loss": 39.7798, "step": 5107 }, { "epoch": 121.62089552238805, "grad_norm": 29.49053192138672, "learning_rate": 9.879120879120881e-06, "loss": 42.0289, "step": 5108 }, { "epoch": 121.64477611940299, "grad_norm": 24.052976608276367, "learning_rate": 9.877289377289379e-06, "loss": 40.5861, "step": 5109 }, { "epoch": 121.66865671641791, "grad_norm": 23.03173828125, "learning_rate": 9.875457875457877e-06, "loss": 40.9261, "step": 5110 }, { "epoch": 121.69253731343284, "grad_norm": 24.134889602661133, "learning_rate": 9.873626373626375e-06, "loss": 41.0466, "step": 5111 }, { "epoch": 121.71641791044776, "grad_norm": 19.443124771118164, "learning_rate": 9.871794871794872e-06, "loss": 40.4331, "step": 5112 }, { "epoch": 121.74029850746268, "grad_norm": 31.88178825378418, "learning_rate": 9.869963369963372e-06, "loss": 40.6991, "step": 5113 }, { "epoch": 121.7641791044776, "grad_norm": 21.850631713867188, "learning_rate": 9.86813186813187e-06, "loss": 41.4331, "step": 5114 }, { "epoch": 121.78805970149254, "grad_norm": 37.39925765991211, "learning_rate": 9.866300366300368e-06, "loss": 40.9437, "step": 5115 }, { "epoch": 121.81194029850747, "grad_norm": 31.58283042907715, "learning_rate": 9.864468864468866e-06, "loss": 41.0558, "step": 5116 }, { "epoch": 121.83582089552239, "grad_norm": 29.965499877929688, "learning_rate": 9.862637362637364e-06, "loss": 39.5632, "step": 5117 }, { "epoch": 121.85970149253731, "grad_norm": 25.50206756591797, "learning_rate": 9.860805860805861e-06, "loss": 41.287, "step": 5118 }, { "epoch": 121.88358208955223, "grad_norm": 34.806034088134766, "learning_rate": 9.858974358974361e-06, "loss": 41.0144, "step": 5119 }, { "epoch": 121.90746268656716, "grad_norm": 21.66145133972168, "learning_rate": 9.857142857142859e-06, "loss": 41.2587, "step": 5120 }, { "epoch": 121.9313432835821, "grad_norm": 37.883094787597656, "learning_rate": 9.855311355311357e-06, "loss": 40.7321, "step": 5121 }, { "epoch": 121.95522388059702, "grad_norm": 28.472124099731445, "learning_rate": 9.853479853479855e-06, "loss": 41.5554, "step": 5122 }, { "epoch": 121.97910447761194, "grad_norm": 35.33477783203125, "learning_rate": 9.851648351648352e-06, "loss": 42.0246, "step": 5123 }, { "epoch": 122.0, "grad_norm": 27.911645889282227, "learning_rate": 9.84981684981685e-06, "loss": 35.3824, "step": 5124 }, { "epoch": 122.02388059701492, "grad_norm": 33.792213439941406, "learning_rate": 9.84798534798535e-06, "loss": 40.2451, "step": 5125 }, { "epoch": 122.04776119402985, "grad_norm": 33.73054885864258, "learning_rate": 9.846153846153848e-06, "loss": 41.5777, "step": 5126 }, { "epoch": 122.07164179104478, "grad_norm": 29.55936622619629, "learning_rate": 9.844322344322346e-06, "loss": 40.5313, "step": 5127 }, { "epoch": 122.0955223880597, "grad_norm": 21.786413192749023, "learning_rate": 9.842490842490844e-06, "loss": 41.795, "step": 5128 }, { "epoch": 122.11940298507463, "grad_norm": 38.503475189208984, "learning_rate": 9.840659340659341e-06, "loss": 40.2868, "step": 5129 }, { "epoch": 122.14328358208955, "grad_norm": 27.126779556274414, "learning_rate": 9.83882783882784e-06, "loss": 40.5464, "step": 5130 }, { "epoch": 122.16716417910447, "grad_norm": 34.76428985595703, "learning_rate": 9.836996336996337e-06, "loss": 40.2589, "step": 5131 }, { "epoch": 122.1910447761194, "grad_norm": 37.37604522705078, "learning_rate": 9.835164835164835e-06, "loss": 40.8401, "step": 5132 }, { "epoch": 122.21492537313434, "grad_norm": 29.67528533935547, "learning_rate": 9.833333333333333e-06, "loss": 40.9921, "step": 5133 }, { "epoch": 122.23880597014926, "grad_norm": 27.43715476989746, "learning_rate": 9.831501831501832e-06, "loss": 40.4038, "step": 5134 }, { "epoch": 122.26268656716418, "grad_norm": 30.960216522216797, "learning_rate": 9.82967032967033e-06, "loss": 39.8886, "step": 5135 }, { "epoch": 122.2865671641791, "grad_norm": 27.186513900756836, "learning_rate": 9.827838827838828e-06, "loss": 42.1122, "step": 5136 }, { "epoch": 122.31044776119403, "grad_norm": 32.01823806762695, "learning_rate": 9.826007326007326e-06, "loss": 40.7854, "step": 5137 }, { "epoch": 122.33432835820895, "grad_norm": 26.988773345947266, "learning_rate": 9.824175824175824e-06, "loss": 40.5902, "step": 5138 }, { "epoch": 122.35820895522389, "grad_norm": 29.70166778564453, "learning_rate": 9.822344322344322e-06, "loss": 41.4538, "step": 5139 }, { "epoch": 122.38208955223881, "grad_norm": 25.9971981048584, "learning_rate": 9.820512820512821e-06, "loss": 39.6575, "step": 5140 }, { "epoch": 122.40597014925373, "grad_norm": 33.1441535949707, "learning_rate": 9.81868131868132e-06, "loss": 40.0902, "step": 5141 }, { "epoch": 122.42985074626866, "grad_norm": 27.196630477905273, "learning_rate": 9.816849816849817e-06, "loss": 40.0376, "step": 5142 }, { "epoch": 122.45373134328358, "grad_norm": 34.561798095703125, "learning_rate": 9.815018315018315e-06, "loss": 41.6209, "step": 5143 }, { "epoch": 122.4776119402985, "grad_norm": 33.98078155517578, "learning_rate": 9.813186813186813e-06, "loss": 40.8931, "step": 5144 }, { "epoch": 122.50149253731344, "grad_norm": 29.115427017211914, "learning_rate": 9.811355311355313e-06, "loss": 41.4718, "step": 5145 }, { "epoch": 122.52537313432836, "grad_norm": 24.698219299316406, "learning_rate": 9.80952380952381e-06, "loss": 40.2337, "step": 5146 }, { "epoch": 122.54925373134328, "grad_norm": 32.09329605102539, "learning_rate": 9.807692307692308e-06, "loss": 40.1893, "step": 5147 }, { "epoch": 122.57313432835821, "grad_norm": 28.50708770751953, "learning_rate": 9.805860805860806e-06, "loss": 41.2457, "step": 5148 }, { "epoch": 122.59701492537313, "grad_norm": 34.65631103515625, "learning_rate": 9.804029304029304e-06, "loss": 40.8311, "step": 5149 }, { "epoch": 122.62089552238805, "grad_norm": 27.82625961303711, "learning_rate": 9.802197802197802e-06, "loss": 40.3574, "step": 5150 }, { "epoch": 122.64477611940299, "grad_norm": 31.24656105041504, "learning_rate": 9.800366300366301e-06, "loss": 40.999, "step": 5151 }, { "epoch": 122.66865671641791, "grad_norm": 26.075342178344727, "learning_rate": 9.7985347985348e-06, "loss": 41.6763, "step": 5152 }, { "epoch": 122.69253731343284, "grad_norm": 28.61420440673828, "learning_rate": 9.796703296703297e-06, "loss": 41.1096, "step": 5153 }, { "epoch": 122.71641791044776, "grad_norm": 24.201374053955078, "learning_rate": 9.794871794871795e-06, "loss": 41.7294, "step": 5154 }, { "epoch": 122.74029850746268, "grad_norm": 33.25908660888672, "learning_rate": 9.793040293040293e-06, "loss": 41.0633, "step": 5155 }, { "epoch": 122.7641791044776, "grad_norm": 28.24220848083496, "learning_rate": 9.79120879120879e-06, "loss": 42.0281, "step": 5156 }, { "epoch": 122.78805970149254, "grad_norm": 34.96881103515625, "learning_rate": 9.78937728937729e-06, "loss": 40.648, "step": 5157 }, { "epoch": 122.81194029850747, "grad_norm": 29.03910255432129, "learning_rate": 9.787545787545788e-06, "loss": 41.1215, "step": 5158 }, { "epoch": 122.83582089552239, "grad_norm": 30.120044708251953, "learning_rate": 9.785714285714286e-06, "loss": 41.7353, "step": 5159 }, { "epoch": 122.85970149253731, "grad_norm": 30.23310661315918, "learning_rate": 9.783882783882784e-06, "loss": 40.7885, "step": 5160 }, { "epoch": 122.88358208955223, "grad_norm": 29.74199104309082, "learning_rate": 9.782051282051282e-06, "loss": 41.4646, "step": 5161 }, { "epoch": 122.90746268656716, "grad_norm": 27.558090209960938, "learning_rate": 9.780219780219781e-06, "loss": 41.0687, "step": 5162 }, { "epoch": 122.9313432835821, "grad_norm": 29.82993507385254, "learning_rate": 9.77838827838828e-06, "loss": 41.5666, "step": 5163 }, { "epoch": 122.95522388059702, "grad_norm": 24.96250343322754, "learning_rate": 9.776556776556777e-06, "loss": 41.1099, "step": 5164 }, { "epoch": 122.97910447761194, "grad_norm": 34.85405731201172, "learning_rate": 9.774725274725275e-06, "loss": 38.6541, "step": 5165 }, { "epoch": 123.0, "grad_norm": 28.839818954467773, "learning_rate": 9.772893772893773e-06, "loss": 35.9493, "step": 5166 }, { "epoch": 123.02388059701492, "grad_norm": 32.29933547973633, "learning_rate": 9.771062271062271e-06, "loss": 40.9199, "step": 5167 }, { "epoch": 123.04776119402985, "grad_norm": 26.617511749267578, "learning_rate": 9.76923076923077e-06, "loss": 39.6813, "step": 5168 }, { "epoch": 123.07164179104478, "grad_norm": 29.118209838867188, "learning_rate": 9.767399267399268e-06, "loss": 40.971, "step": 5169 }, { "epoch": 123.0955223880597, "grad_norm": 26.295345306396484, "learning_rate": 9.765567765567766e-06, "loss": 40.8982, "step": 5170 }, { "epoch": 123.11940298507463, "grad_norm": 33.3271369934082, "learning_rate": 9.763736263736264e-06, "loss": 41.9198, "step": 5171 }, { "epoch": 123.14328358208955, "grad_norm": 25.857398986816406, "learning_rate": 9.761904761904762e-06, "loss": 39.5357, "step": 5172 }, { "epoch": 123.16716417910447, "grad_norm": 32.97218704223633, "learning_rate": 9.76007326007326e-06, "loss": 41.1038, "step": 5173 }, { "epoch": 123.1910447761194, "grad_norm": 28.88793182373047, "learning_rate": 9.75824175824176e-06, "loss": 40.8065, "step": 5174 }, { "epoch": 123.21492537313434, "grad_norm": 24.024185180664062, "learning_rate": 9.756410256410257e-06, "loss": 39.8969, "step": 5175 }, { "epoch": 123.23880597014926, "grad_norm": 23.380300521850586, "learning_rate": 9.754578754578755e-06, "loss": 40.962, "step": 5176 }, { "epoch": 123.26268656716418, "grad_norm": 28.82596778869629, "learning_rate": 9.752747252747253e-06, "loss": 40.4959, "step": 5177 }, { "epoch": 123.2865671641791, "grad_norm": 19.895410537719727, "learning_rate": 9.750915750915751e-06, "loss": 41.0015, "step": 5178 }, { "epoch": 123.31044776119403, "grad_norm": 28.44173812866211, "learning_rate": 9.74908424908425e-06, "loss": 40.7281, "step": 5179 }, { "epoch": 123.33432835820895, "grad_norm": 22.277742385864258, "learning_rate": 9.747252747252748e-06, "loss": 40.0391, "step": 5180 }, { "epoch": 123.35820895522389, "grad_norm": 27.770545959472656, "learning_rate": 9.745421245421246e-06, "loss": 41.0649, "step": 5181 }, { "epoch": 123.38208955223881, "grad_norm": 22.383668899536133, "learning_rate": 9.743589743589744e-06, "loss": 40.735, "step": 5182 }, { "epoch": 123.40597014925373, "grad_norm": 31.16164779663086, "learning_rate": 9.741758241758242e-06, "loss": 41.1004, "step": 5183 }, { "epoch": 123.42985074626866, "grad_norm": 25.458309173583984, "learning_rate": 9.73992673992674e-06, "loss": 40.4399, "step": 5184 }, { "epoch": 123.45373134328358, "grad_norm": 37.73893356323242, "learning_rate": 9.73809523809524e-06, "loss": 42.08, "step": 5185 }, { "epoch": 123.4776119402985, "grad_norm": 28.723541259765625, "learning_rate": 9.736263736263737e-06, "loss": 40.479, "step": 5186 }, { "epoch": 123.50149253731344, "grad_norm": 30.29216194152832, "learning_rate": 9.734432234432235e-06, "loss": 40.76, "step": 5187 }, { "epoch": 123.52537313432836, "grad_norm": 25.559480667114258, "learning_rate": 9.732600732600733e-06, "loss": 39.7645, "step": 5188 }, { "epoch": 123.54925373134328, "grad_norm": 30.328344345092773, "learning_rate": 9.730769230769231e-06, "loss": 42.2182, "step": 5189 }, { "epoch": 123.57313432835821, "grad_norm": 24.075218200683594, "learning_rate": 9.728937728937729e-06, "loss": 39.7574, "step": 5190 }, { "epoch": 123.59701492537313, "grad_norm": 29.823719024658203, "learning_rate": 9.727106227106228e-06, "loss": 41.1253, "step": 5191 }, { "epoch": 123.62089552238805, "grad_norm": 21.241701126098633, "learning_rate": 9.725274725274726e-06, "loss": 41.0588, "step": 5192 }, { "epoch": 123.64477611940299, "grad_norm": 34.10343933105469, "learning_rate": 9.723443223443224e-06, "loss": 40.7287, "step": 5193 }, { "epoch": 123.66865671641791, "grad_norm": 24.037466049194336, "learning_rate": 9.721611721611722e-06, "loss": 41.1033, "step": 5194 }, { "epoch": 123.69253731343284, "grad_norm": 26.837879180908203, "learning_rate": 9.71978021978022e-06, "loss": 41.1869, "step": 5195 }, { "epoch": 123.71641791044776, "grad_norm": 22.90353012084961, "learning_rate": 9.71794871794872e-06, "loss": 41.4571, "step": 5196 }, { "epoch": 123.74029850746268, "grad_norm": 31.232582092285156, "learning_rate": 9.716117216117217e-06, "loss": 40.6233, "step": 5197 }, { "epoch": 123.7641791044776, "grad_norm": 24.480405807495117, "learning_rate": 9.714285714285715e-06, "loss": 40.8121, "step": 5198 }, { "epoch": 123.78805970149254, "grad_norm": 33.86972427368164, "learning_rate": 9.712454212454213e-06, "loss": 41.596, "step": 5199 }, { "epoch": 123.81194029850747, "grad_norm": 23.727428436279297, "learning_rate": 9.710622710622711e-06, "loss": 41.1944, "step": 5200 }, { "epoch": 123.83582089552239, "grad_norm": 32.29154586791992, "learning_rate": 9.708791208791209e-06, "loss": 40.361, "step": 5201 }, { "epoch": 123.85970149253731, "grad_norm": 22.611989974975586, "learning_rate": 9.706959706959708e-06, "loss": 39.6001, "step": 5202 }, { "epoch": 123.88358208955223, "grad_norm": 33.92005157470703, "learning_rate": 9.705128205128206e-06, "loss": 42.1112, "step": 5203 }, { "epoch": 123.90746268656716, "grad_norm": 28.993995666503906, "learning_rate": 9.703296703296704e-06, "loss": 41.436, "step": 5204 }, { "epoch": 123.9313432835821, "grad_norm": 27.87895393371582, "learning_rate": 9.701465201465202e-06, "loss": 40.7799, "step": 5205 }, { "epoch": 123.95522388059702, "grad_norm": 29.898271560668945, "learning_rate": 9.6996336996337e-06, "loss": 41.2078, "step": 5206 }, { "epoch": 123.97910447761194, "grad_norm": 24.88825798034668, "learning_rate": 9.697802197802198e-06, "loss": 40.1142, "step": 5207 }, { "epoch": 124.0, "grad_norm": 20.064050674438477, "learning_rate": 9.695970695970697e-06, "loss": 35.74, "step": 5208 }, { "epoch": 124.02388059701492, "grad_norm": 30.132843017578125, "learning_rate": 9.694139194139195e-06, "loss": 41.1361, "step": 5209 }, { "epoch": 124.04776119402985, "grad_norm": 21.139568328857422, "learning_rate": 9.692307692307693e-06, "loss": 41.4147, "step": 5210 }, { "epoch": 124.07164179104478, "grad_norm": 29.36510467529297, "learning_rate": 9.690476190476191e-06, "loss": 40.6358, "step": 5211 }, { "epoch": 124.0955223880597, "grad_norm": 27.090465545654297, "learning_rate": 9.688644688644689e-06, "loss": 41.6791, "step": 5212 }, { "epoch": 124.11940298507463, "grad_norm": 24.170644760131836, "learning_rate": 9.686813186813188e-06, "loss": 40.7704, "step": 5213 }, { "epoch": 124.14328358208955, "grad_norm": 26.26068115234375, "learning_rate": 9.684981684981686e-06, "loss": 39.3922, "step": 5214 }, { "epoch": 124.16716417910447, "grad_norm": 22.155975341796875, "learning_rate": 9.683150183150184e-06, "loss": 40.9681, "step": 5215 }, { "epoch": 124.1910447761194, "grad_norm": 25.21603012084961, "learning_rate": 9.681318681318682e-06, "loss": 42.1241, "step": 5216 }, { "epoch": 124.21492537313434, "grad_norm": 16.18509292602539, "learning_rate": 9.67948717948718e-06, "loss": 40.9757, "step": 5217 }, { "epoch": 124.23880597014926, "grad_norm": 21.46571159362793, "learning_rate": 9.677655677655678e-06, "loss": 41.6829, "step": 5218 }, { "epoch": 124.26268656716418, "grad_norm": 18.195297241210938, "learning_rate": 9.675824175824177e-06, "loss": 40.194, "step": 5219 }, { "epoch": 124.2865671641791, "grad_norm": 18.00617790222168, "learning_rate": 9.673992673992675e-06, "loss": 39.3802, "step": 5220 }, { "epoch": 124.31044776119403, "grad_norm": 18.236934661865234, "learning_rate": 9.672161172161173e-06, "loss": 41.0138, "step": 5221 }, { "epoch": 124.33432835820895, "grad_norm": 16.526309967041016, "learning_rate": 9.670329670329671e-06, "loss": 40.2031, "step": 5222 }, { "epoch": 124.35820895522389, "grad_norm": 20.008708953857422, "learning_rate": 9.668498168498169e-06, "loss": 40.9772, "step": 5223 }, { "epoch": 124.38208955223881, "grad_norm": 14.738056182861328, "learning_rate": 9.666666666666667e-06, "loss": 40.5985, "step": 5224 }, { "epoch": 124.40597014925373, "grad_norm": 19.540645599365234, "learning_rate": 9.664835164835166e-06, "loss": 41.1823, "step": 5225 }, { "epoch": 124.42985074626866, "grad_norm": 17.26000213623047, "learning_rate": 9.663003663003664e-06, "loss": 40.2975, "step": 5226 }, { "epoch": 124.45373134328358, "grad_norm": 19.984989166259766, "learning_rate": 9.661172161172162e-06, "loss": 40.6366, "step": 5227 }, { "epoch": 124.4776119402985, "grad_norm": 24.717369079589844, "learning_rate": 9.65934065934066e-06, "loss": 40.665, "step": 5228 }, { "epoch": 124.50149253731344, "grad_norm": 16.406538009643555, "learning_rate": 9.657509157509158e-06, "loss": 40.4751, "step": 5229 }, { "epoch": 124.52537313432836, "grad_norm": 23.191200256347656, "learning_rate": 9.655677655677657e-06, "loss": 40.6781, "step": 5230 }, { "epoch": 124.54925373134328, "grad_norm": 18.91063690185547, "learning_rate": 9.653846153846155e-06, "loss": 40.9245, "step": 5231 }, { "epoch": 124.57313432835821, "grad_norm": 23.012889862060547, "learning_rate": 9.652014652014653e-06, "loss": 41.7688, "step": 5232 }, { "epoch": 124.59701492537313, "grad_norm": 20.35813331604004, "learning_rate": 9.650183150183151e-06, "loss": 40.9169, "step": 5233 }, { "epoch": 124.62089552238805, "grad_norm": 22.06452751159668, "learning_rate": 9.648351648351649e-06, "loss": 41.0061, "step": 5234 }, { "epoch": 124.64477611940299, "grad_norm": 23.17784309387207, "learning_rate": 9.646520146520147e-06, "loss": 40.7811, "step": 5235 }, { "epoch": 124.66865671641791, "grad_norm": 19.43151092529297, "learning_rate": 9.644688644688646e-06, "loss": 40.3725, "step": 5236 }, { "epoch": 124.69253731343284, "grad_norm": 23.144960403442383, "learning_rate": 9.642857142857144e-06, "loss": 42.382, "step": 5237 }, { "epoch": 124.71641791044776, "grad_norm": 18.223936080932617, "learning_rate": 9.641025641025642e-06, "loss": 40.3686, "step": 5238 }, { "epoch": 124.74029850746268, "grad_norm": 21.855030059814453, "learning_rate": 9.63919413919414e-06, "loss": 41.8939, "step": 5239 }, { "epoch": 124.7641791044776, "grad_norm": 21.252012252807617, "learning_rate": 9.637362637362638e-06, "loss": 40.5954, "step": 5240 }, { "epoch": 124.78805970149254, "grad_norm": 20.320215225219727, "learning_rate": 9.635531135531136e-06, "loss": 40.2941, "step": 5241 }, { "epoch": 124.81194029850747, "grad_norm": 20.874921798706055, "learning_rate": 9.633699633699635e-06, "loss": 39.8523, "step": 5242 }, { "epoch": 124.83582089552239, "grad_norm": 20.560138702392578, "learning_rate": 9.631868131868133e-06, "loss": 40.1141, "step": 5243 }, { "epoch": 124.85970149253731, "grad_norm": 16.412206649780273, "learning_rate": 9.630036630036631e-06, "loss": 41.1038, "step": 5244 }, { "epoch": 124.88358208955223, "grad_norm": 20.39592170715332, "learning_rate": 9.628205128205129e-06, "loss": 39.8884, "step": 5245 }, { "epoch": 124.90746268656716, "grad_norm": 16.07599639892578, "learning_rate": 9.626373626373627e-06, "loss": 40.7929, "step": 5246 }, { "epoch": 124.9313432835821, "grad_norm": 14.183424949645996, "learning_rate": 9.624542124542126e-06, "loss": 41.5261, "step": 5247 }, { "epoch": 124.95522388059702, "grad_norm": 17.80473518371582, "learning_rate": 9.622710622710624e-06, "loss": 39.6694, "step": 5248 }, { "epoch": 124.97910447761194, "grad_norm": 16.59119987487793, "learning_rate": 9.620879120879122e-06, "loss": 40.9024, "step": 5249 }, { "epoch": 125.0, "grad_norm": 15.37125301361084, "learning_rate": 9.61904761904762e-06, "loss": 35.3595, "step": 5250 }, { "epoch": 125.02388059701492, "grad_norm": 18.345430374145508, "learning_rate": 9.617216117216118e-06, "loss": 40.2401, "step": 5251 }, { "epoch": 125.04776119402985, "grad_norm": 17.491918563842773, "learning_rate": 9.615384615384616e-06, "loss": 39.8787, "step": 5252 }, { "epoch": 125.07164179104478, "grad_norm": 16.483713150024414, "learning_rate": 9.613553113553115e-06, "loss": 41.3826, "step": 5253 }, { "epoch": 125.0955223880597, "grad_norm": 15.222822189331055, "learning_rate": 9.611721611721613e-06, "loss": 41.1321, "step": 5254 }, { "epoch": 125.11940298507463, "grad_norm": 16.675804138183594, "learning_rate": 9.609890109890111e-06, "loss": 41.0334, "step": 5255 }, { "epoch": 125.14328358208955, "grad_norm": 17.025266647338867, "learning_rate": 9.608058608058609e-06, "loss": 40.6213, "step": 5256 }, { "epoch": 125.16716417910447, "grad_norm": 15.499921798706055, "learning_rate": 9.606227106227107e-06, "loss": 39.8817, "step": 5257 }, { "epoch": 125.1910447761194, "grad_norm": 14.926107406616211, "learning_rate": 9.604395604395605e-06, "loss": 42.1196, "step": 5258 }, { "epoch": 125.21492537313434, "grad_norm": 17.896583557128906, "learning_rate": 9.602564102564104e-06, "loss": 40.1941, "step": 5259 }, { "epoch": 125.23880597014926, "grad_norm": 21.413713455200195, "learning_rate": 9.600732600732602e-06, "loss": 39.9496, "step": 5260 }, { "epoch": 125.26268656716418, "grad_norm": 20.262035369873047, "learning_rate": 9.5989010989011e-06, "loss": 40.8554, "step": 5261 }, { "epoch": 125.2865671641791, "grad_norm": 17.94382095336914, "learning_rate": 9.597069597069598e-06, "loss": 40.618, "step": 5262 }, { "epoch": 125.31044776119403, "grad_norm": 14.720929145812988, "learning_rate": 9.595238095238096e-06, "loss": 41.6634, "step": 5263 }, { "epoch": 125.33432835820895, "grad_norm": 21.567907333374023, "learning_rate": 9.593406593406595e-06, "loss": 41.6142, "step": 5264 }, { "epoch": 125.35820895522389, "grad_norm": 23.717586517333984, "learning_rate": 9.591575091575093e-06, "loss": 41.458, "step": 5265 }, { "epoch": 125.38208955223881, "grad_norm": 13.948038101196289, "learning_rate": 9.589743589743591e-06, "loss": 41.2909, "step": 5266 }, { "epoch": 125.40597014925373, "grad_norm": 27.392465591430664, "learning_rate": 9.587912087912089e-06, "loss": 41.3308, "step": 5267 }, { "epoch": 125.42985074626866, "grad_norm": 20.557374954223633, "learning_rate": 9.586080586080587e-06, "loss": 41.7717, "step": 5268 }, { "epoch": 125.45373134328358, "grad_norm": 21.076601028442383, "learning_rate": 9.584249084249085e-06, "loss": 39.8928, "step": 5269 }, { "epoch": 125.4776119402985, "grad_norm": 22.425079345703125, "learning_rate": 9.582417582417584e-06, "loss": 41.5856, "step": 5270 }, { "epoch": 125.50149253731344, "grad_norm": 19.16175079345703, "learning_rate": 9.580586080586082e-06, "loss": 40.7785, "step": 5271 }, { "epoch": 125.52537313432836, "grad_norm": 18.763565063476562, "learning_rate": 9.57875457875458e-06, "loss": 39.8277, "step": 5272 }, { "epoch": 125.54925373134328, "grad_norm": 18.3720645904541, "learning_rate": 9.576923076923078e-06, "loss": 40.9783, "step": 5273 }, { "epoch": 125.57313432835821, "grad_norm": 18.62623405456543, "learning_rate": 9.575091575091576e-06, "loss": 40.5206, "step": 5274 }, { "epoch": 125.59701492537313, "grad_norm": 20.078596115112305, "learning_rate": 9.573260073260074e-06, "loss": 40.2231, "step": 5275 }, { "epoch": 125.62089552238805, "grad_norm": 28.77025032043457, "learning_rate": 9.571428571428573e-06, "loss": 40.4628, "step": 5276 }, { "epoch": 125.64477611940299, "grad_norm": 17.48457145690918, "learning_rate": 9.569597069597071e-06, "loss": 38.3776, "step": 5277 }, { "epoch": 125.66865671641791, "grad_norm": 29.077014923095703, "learning_rate": 9.567765567765569e-06, "loss": 40.7467, "step": 5278 }, { "epoch": 125.69253731343284, "grad_norm": 22.95465660095215, "learning_rate": 9.565934065934067e-06, "loss": 40.831, "step": 5279 }, { "epoch": 125.71641791044776, "grad_norm": 26.317485809326172, "learning_rate": 9.564102564102565e-06, "loss": 40.4036, "step": 5280 }, { "epoch": 125.74029850746268, "grad_norm": 20.771020889282227, "learning_rate": 9.562271062271064e-06, "loss": 40.7238, "step": 5281 }, { "epoch": 125.7641791044776, "grad_norm": 21.856155395507812, "learning_rate": 9.560439560439562e-06, "loss": 40.1953, "step": 5282 }, { "epoch": 125.78805970149254, "grad_norm": 24.748037338256836, "learning_rate": 9.55860805860806e-06, "loss": 39.5605, "step": 5283 }, { "epoch": 125.81194029850747, "grad_norm": NaN, "learning_rate": 9.556776556776558e-06, "loss": 60.4649, "step": 5284 }, { "epoch": 125.83582089552239, "grad_norm": 18.565261840820312, "learning_rate": 9.556776556776558e-06, "loss": 41.9694, "step": 5285 }, { "epoch": 125.85970149253731, "grad_norm": 29.6701717376709, "learning_rate": 9.554945054945056e-06, "loss": 41.4843, "step": 5286 }, { "epoch": 125.88358208955223, "grad_norm": 20.192317962646484, "learning_rate": 9.553113553113554e-06, "loss": 39.8961, "step": 5287 }, { "epoch": 125.90746268656716, "grad_norm": 25.22960662841797, "learning_rate": 9.551282051282053e-06, "loss": 39.7754, "step": 5288 }, { "epoch": 125.9313432835821, "grad_norm": 19.892139434814453, "learning_rate": 9.549450549450551e-06, "loss": 40.257, "step": 5289 }, { "epoch": 125.95522388059702, "grad_norm": 18.426124572753906, "learning_rate": 9.547619047619049e-06, "loss": 41.252, "step": 5290 }, { "epoch": 125.97910447761194, "grad_norm": 24.085840225219727, "learning_rate": 9.545787545787547e-06, "loss": 41.3266, "step": 5291 }, { "epoch": 126.0, "grad_norm": 14.462137222290039, "learning_rate": 9.543956043956045e-06, "loss": 36.235, "step": 5292 }, { "epoch": 126.02388059701492, "grad_norm": 21.527910232543945, "learning_rate": 9.542124542124543e-06, "loss": 41.1059, "step": 5293 }, { "epoch": 126.04776119402985, "grad_norm": 19.539413452148438, "learning_rate": 9.540293040293042e-06, "loss": 41.8102, "step": 5294 }, { "epoch": 126.07164179104478, "grad_norm": 16.535566329956055, "learning_rate": 9.53846153846154e-06, "loss": 40.4373, "step": 5295 }, { "epoch": 126.0955223880597, "grad_norm": 30.60129737854004, "learning_rate": 9.536630036630038e-06, "loss": 40.3107, "step": 5296 }, { "epoch": 126.11940298507463, "grad_norm": 19.504737854003906, "learning_rate": 9.534798534798536e-06, "loss": 39.7933, "step": 5297 }, { "epoch": 126.14328358208955, "grad_norm": 40.68082809448242, "learning_rate": 9.532967032967034e-06, "loss": 40.4788, "step": 5298 }, { "epoch": 126.16716417910447, "grad_norm": 29.288623809814453, "learning_rate": 9.531135531135532e-06, "loss": 40.3154, "step": 5299 }, { "epoch": 126.1910447761194, "grad_norm": 39.86507797241211, "learning_rate": 9.52930402930403e-06, "loss": 40.9565, "step": 5300 }, { "epoch": 126.21492537313434, "grad_norm": 37.94214630126953, "learning_rate": 9.527472527472527e-06, "loss": 39.7678, "step": 5301 }, { "epoch": 126.23880597014926, "grad_norm": 30.128881454467773, "learning_rate": 9.525641025641025e-06, "loss": 40.9812, "step": 5302 }, { "epoch": 126.26268656716418, "grad_norm": 32.11579895019531, "learning_rate": 9.523809523809525e-06, "loss": 40.7709, "step": 5303 }, { "epoch": 126.2865671641791, "grad_norm": 30.610383987426758, "learning_rate": 9.521978021978023e-06, "loss": 39.3623, "step": 5304 }, { "epoch": 126.31044776119403, "grad_norm": 25.186908721923828, "learning_rate": 9.52014652014652e-06, "loss": 40.5833, "step": 5305 }, { "epoch": 126.33432835820895, "grad_norm": 35.33464050292969, "learning_rate": 9.518315018315018e-06, "loss": 40.4571, "step": 5306 }, { "epoch": 126.35820895522389, "grad_norm": 30.900115966796875, "learning_rate": 9.516483516483516e-06, "loss": 41.5089, "step": 5307 }, { "epoch": 126.38208955223881, "grad_norm": 30.170385360717773, "learning_rate": 9.514652014652014e-06, "loss": 40.4776, "step": 5308 }, { "epoch": 126.40597014925373, "grad_norm": 25.576396942138672, "learning_rate": 9.512820512820514e-06, "loss": 40.5452, "step": 5309 }, { "epoch": 126.42985074626866, "grad_norm": 31.52381706237793, "learning_rate": 9.510989010989012e-06, "loss": 41.0569, "step": 5310 }, { "epoch": 126.45373134328358, "grad_norm": 28.613876342773438, "learning_rate": 9.50915750915751e-06, "loss": 40.4693, "step": 5311 }, { "epoch": 126.4776119402985, "grad_norm": 34.052391052246094, "learning_rate": 9.507326007326007e-06, "loss": 39.9473, "step": 5312 }, { "epoch": 126.50149253731344, "grad_norm": 28.65314292907715, "learning_rate": 9.505494505494505e-06, "loss": 39.012, "step": 5313 }, { "epoch": 126.52537313432836, "grad_norm": 28.400449752807617, "learning_rate": 9.503663003663005e-06, "loss": 40.3811, "step": 5314 }, { "epoch": 126.54925373134328, "grad_norm": 26.874284744262695, "learning_rate": 9.501831501831503e-06, "loss": 39.8546, "step": 5315 }, { "epoch": 126.57313432835821, "grad_norm": 29.638126373291016, "learning_rate": 9.5e-06, "loss": 41.6115, "step": 5316 }, { "epoch": 126.59701492537313, "grad_norm": 27.8295841217041, "learning_rate": 9.498168498168498e-06, "loss": 40.7197, "step": 5317 }, { "epoch": 126.62089552238805, "grad_norm": 34.33130645751953, "learning_rate": 9.496336996336996e-06, "loss": 41.2572, "step": 5318 }, { "epoch": 126.64477611940299, "grad_norm": 28.334978103637695, "learning_rate": 9.494505494505494e-06, "loss": 41.4411, "step": 5319 }, { "epoch": 126.66865671641791, "grad_norm": 28.492050170898438, "learning_rate": 9.492673992673994e-06, "loss": 39.6498, "step": 5320 }, { "epoch": 126.69253731343284, "grad_norm": 26.12206268310547, "learning_rate": 9.490842490842492e-06, "loss": 41.3662, "step": 5321 }, { "epoch": 126.71641791044776, "grad_norm": 31.722883224487305, "learning_rate": 9.48901098901099e-06, "loss": 41.3887, "step": 5322 }, { "epoch": 126.74029850746268, "grad_norm": 26.18402862548828, "learning_rate": 9.487179487179487e-06, "loss": 41.5819, "step": 5323 }, { "epoch": 126.7641791044776, "grad_norm": 29.256437301635742, "learning_rate": 9.485347985347985e-06, "loss": 41.0093, "step": 5324 }, { "epoch": 126.78805970149254, "grad_norm": 26.79650115966797, "learning_rate": 9.483516483516483e-06, "loss": 39.9968, "step": 5325 }, { "epoch": 126.81194029850747, "grad_norm": 27.056190490722656, "learning_rate": 9.481684981684983e-06, "loss": 39.1216, "step": 5326 }, { "epoch": 126.83582089552239, "grad_norm": 22.335859298706055, "learning_rate": 9.47985347985348e-06, "loss": 40.7778, "step": 5327 }, { "epoch": 126.85970149253731, "grad_norm": NaN, "learning_rate": 9.478021978021978e-06, "loss": 58.1092, "step": 5328 }, { "epoch": 126.88358208955223, "grad_norm": 24.419767379760742, "learning_rate": 9.478021978021978e-06, "loss": 40.4599, "step": 5329 }, { "epoch": 126.90746268656716, "grad_norm": 20.041467666625977, "learning_rate": 9.476190476190476e-06, "loss": 40.1727, "step": 5330 }, { "epoch": 126.9313432835821, "grad_norm": 26.40553855895996, "learning_rate": 9.474358974358974e-06, "loss": 41.0726, "step": 5331 }, { "epoch": 126.95522388059702, "grad_norm": 24.665653228759766, "learning_rate": 9.472527472527474e-06, "loss": 42.0728, "step": 5332 }, { "epoch": 126.97910447761194, "grad_norm": 21.605026245117188, "learning_rate": 9.470695970695972e-06, "loss": 40.6338, "step": 5333 }, { "epoch": 127.0, "grad_norm": 20.224733352661133, "learning_rate": 9.46886446886447e-06, "loss": 35.2993, "step": 5334 }, { "epoch": 127.02388059701492, "grad_norm": 21.891176223754883, "learning_rate": 9.467032967032967e-06, "loss": 40.7857, "step": 5335 }, { "epoch": 127.04776119402985, "grad_norm": 24.02487564086914, "learning_rate": 9.465201465201465e-06, "loss": 39.9996, "step": 5336 }, { "epoch": 127.07164179104478, "grad_norm": 26.67331314086914, "learning_rate": 9.463369963369963e-06, "loss": 41.6034, "step": 5337 }, { "epoch": 127.0955223880597, "grad_norm": 18.6497802734375, "learning_rate": 9.461538461538463e-06, "loss": 42.0516, "step": 5338 }, { "epoch": 127.11940298507463, "grad_norm": 31.833471298217773, "learning_rate": 9.45970695970696e-06, "loss": 40.597, "step": 5339 }, { "epoch": 127.14328358208955, "grad_norm": 27.305522918701172, "learning_rate": 9.457875457875458e-06, "loss": 40.7429, "step": 5340 }, { "epoch": 127.16716417910447, "grad_norm": 29.530677795410156, "learning_rate": 9.456043956043956e-06, "loss": 40.4873, "step": 5341 }, { "epoch": 127.1910447761194, "grad_norm": 29.030101776123047, "learning_rate": 9.454212454212454e-06, "loss": 39.0437, "step": 5342 }, { "epoch": 127.21492537313434, "grad_norm": NaN, "learning_rate": 9.452380952380952e-06, "loss": 60.0619, "step": 5343 }, { "epoch": 127.23880597014926, "grad_norm": 31.528074264526367, "learning_rate": 9.452380952380952e-06, "loss": 40.4362, "step": 5344 }, { "epoch": 127.26268656716418, "grad_norm": 28.54173469543457, "learning_rate": 9.450549450549452e-06, "loss": 41.0408, "step": 5345 }, { "epoch": 127.2865671641791, "grad_norm": 33.24728775024414, "learning_rate": 9.44871794871795e-06, "loss": 40.6743, "step": 5346 }, { "epoch": 127.31044776119403, "grad_norm": 29.59555435180664, "learning_rate": 9.446886446886447e-06, "loss": 40.3809, "step": 5347 }, { "epoch": 127.33432835820895, "grad_norm": 32.15523147583008, "learning_rate": 9.445054945054945e-06, "loss": 40.6554, "step": 5348 }, { "epoch": 127.35820895522389, "grad_norm": 31.1282901763916, "learning_rate": 9.443223443223443e-06, "loss": 39.8469, "step": 5349 }, { "epoch": 127.38208955223881, "grad_norm": 30.29323959350586, "learning_rate": 9.441391941391943e-06, "loss": 40.155, "step": 5350 }, { "epoch": 127.40597014925373, "grad_norm": 28.87554931640625, "learning_rate": 9.43956043956044e-06, "loss": 39.2159, "step": 5351 }, { "epoch": 127.42985074626866, "grad_norm": 28.77309226989746, "learning_rate": 9.437728937728938e-06, "loss": 40.7679, "step": 5352 }, { "epoch": 127.45373134328358, "grad_norm": 24.797941207885742, "learning_rate": 9.435897435897436e-06, "loss": 40.2341, "step": 5353 }, { "epoch": 127.4776119402985, "grad_norm": 30.226261138916016, "learning_rate": 9.434065934065934e-06, "loss": 41.5231, "step": 5354 }, { "epoch": 127.50149253731344, "grad_norm": 23.113122940063477, "learning_rate": 9.432234432234432e-06, "loss": 40.5916, "step": 5355 }, { "epoch": 127.52537313432836, "grad_norm": 36.03447341918945, "learning_rate": 9.430402930402932e-06, "loss": 40.8739, "step": 5356 }, { "epoch": 127.54925373134328, "grad_norm": 32.825225830078125, "learning_rate": 9.42857142857143e-06, "loss": 40.2842, "step": 5357 }, { "epoch": 127.57313432835821, "grad_norm": 26.586402893066406, "learning_rate": 9.426739926739927e-06, "loss": 40.3329, "step": 5358 }, { "epoch": 127.59701492537313, "grad_norm": 25.394254684448242, "learning_rate": 9.424908424908425e-06, "loss": 41.4049, "step": 5359 }, { "epoch": 127.62089552238805, "grad_norm": 28.440998077392578, "learning_rate": 9.423076923076923e-06, "loss": 39.6021, "step": 5360 }, { "epoch": 127.64477611940299, "grad_norm": 25.379180908203125, "learning_rate": 9.421245421245421e-06, "loss": 40.6451, "step": 5361 }, { "epoch": 127.66865671641791, "grad_norm": 33.607208251953125, "learning_rate": 9.41941391941392e-06, "loss": 41.685, "step": 5362 }, { "epoch": 127.69253731343284, "grad_norm": 24.925783157348633, "learning_rate": 9.417582417582418e-06, "loss": 41.2479, "step": 5363 }, { "epoch": 127.71641791044776, "grad_norm": 34.97409439086914, "learning_rate": 9.415750915750916e-06, "loss": 40.9911, "step": 5364 }, { "epoch": 127.74029850746268, "grad_norm": 25.85514259338379, "learning_rate": 9.413919413919414e-06, "loss": 39.0472, "step": 5365 }, { "epoch": 127.7641791044776, "grad_norm": 32.1847038269043, "learning_rate": 9.412087912087912e-06, "loss": 40.6535, "step": 5366 }, { "epoch": 127.78805970149254, "grad_norm": 29.844226837158203, "learning_rate": 9.410256410256412e-06, "loss": 39.3755, "step": 5367 }, { "epoch": 127.81194029850747, "grad_norm": 31.974084854125977, "learning_rate": 9.40842490842491e-06, "loss": 41.1137, "step": 5368 }, { "epoch": 127.83582089552239, "grad_norm": 30.38601303100586, "learning_rate": 9.406593406593407e-06, "loss": 40.9944, "step": 5369 }, { "epoch": 127.85970149253731, "grad_norm": 25.314817428588867, "learning_rate": 9.404761904761905e-06, "loss": 40.0512, "step": 5370 }, { "epoch": 127.88358208955223, "grad_norm": 26.56514549255371, "learning_rate": 9.402930402930403e-06, "loss": 40.3742, "step": 5371 }, { "epoch": 127.90746268656716, "grad_norm": 27.275182723999023, "learning_rate": 9.401098901098901e-06, "loss": 41.387, "step": 5372 }, { "epoch": 127.9313432835821, "grad_norm": 24.307111740112305, "learning_rate": 9.3992673992674e-06, "loss": 41.2045, "step": 5373 }, { "epoch": 127.95522388059702, "grad_norm": 34.821327209472656, "learning_rate": 9.397435897435899e-06, "loss": 40.7932, "step": 5374 }, { "epoch": 127.97910447761194, "grad_norm": 31.300153732299805, "learning_rate": 9.395604395604396e-06, "loss": 41.0882, "step": 5375 }, { "epoch": 128.0, "grad_norm": 25.360746383666992, "learning_rate": 9.393772893772894e-06, "loss": 35.6565, "step": 5376 }, { "epoch": 128.02388059701494, "grad_norm": 32.15913772583008, "learning_rate": 9.391941391941392e-06, "loss": 39.5816, "step": 5377 }, { "epoch": 128.04776119402985, "grad_norm": 22.382192611694336, "learning_rate": 9.39010989010989e-06, "loss": 41.6509, "step": 5378 }, { "epoch": 128.07164179104478, "grad_norm": 23.480571746826172, "learning_rate": 9.38827838827839e-06, "loss": 40.7536, "step": 5379 }, { "epoch": 128.0955223880597, "grad_norm": 19.44637107849121, "learning_rate": 9.386446886446887e-06, "loss": 40.499, "step": 5380 }, { "epoch": 128.11940298507463, "grad_norm": 20.136741638183594, "learning_rate": 9.384615384615385e-06, "loss": 41.3551, "step": 5381 }, { "epoch": 128.14328358208957, "grad_norm": 20.516332626342773, "learning_rate": 9.382783882783883e-06, "loss": 40.7676, "step": 5382 }, { "epoch": 128.16716417910447, "grad_norm": 18.942041397094727, "learning_rate": 9.380952380952381e-06, "loss": 40.5404, "step": 5383 }, { "epoch": 128.1910447761194, "grad_norm": 22.05898666381836, "learning_rate": 9.37912087912088e-06, "loss": 40.9921, "step": 5384 }, { "epoch": 128.21492537313432, "grad_norm": 15.969873428344727, "learning_rate": 9.377289377289379e-06, "loss": 40.1066, "step": 5385 }, { "epoch": 128.23880597014926, "grad_norm": 18.438854217529297, "learning_rate": 9.375457875457876e-06, "loss": 39.8564, "step": 5386 }, { "epoch": 128.26268656716417, "grad_norm": 18.526012420654297, "learning_rate": 9.373626373626374e-06, "loss": 40.388, "step": 5387 }, { "epoch": 128.2865671641791, "grad_norm": 13.87939167022705, "learning_rate": 9.371794871794872e-06, "loss": 41.2088, "step": 5388 }, { "epoch": 128.31044776119404, "grad_norm": 19.515592575073242, "learning_rate": 9.36996336996337e-06, "loss": 40.3321, "step": 5389 }, { "epoch": 128.33432835820895, "grad_norm": 17.547893524169922, "learning_rate": 9.36813186813187e-06, "loss": 40.0459, "step": 5390 }, { "epoch": 128.3582089552239, "grad_norm": 24.08388900756836, "learning_rate": 9.366300366300367e-06, "loss": 40.7233, "step": 5391 }, { "epoch": 128.3820895522388, "grad_norm": 25.02381134033203, "learning_rate": 9.364468864468865e-06, "loss": 41.4629, "step": 5392 }, { "epoch": 128.40597014925373, "grad_norm": 17.845233917236328, "learning_rate": 9.362637362637363e-06, "loss": 40.136, "step": 5393 }, { "epoch": 128.42985074626867, "grad_norm": 24.73293685913086, "learning_rate": 9.360805860805861e-06, "loss": 40.1744, "step": 5394 }, { "epoch": 128.45373134328358, "grad_norm": 18.738384246826172, "learning_rate": 9.358974358974359e-06, "loss": 40.9566, "step": 5395 }, { "epoch": 128.47761194029852, "grad_norm": 22.628456115722656, "learning_rate": 9.357142857142859e-06, "loss": 39.9645, "step": 5396 }, { "epoch": 128.50149253731342, "grad_norm": 19.057598114013672, "learning_rate": 9.355311355311356e-06, "loss": 38.6498, "step": 5397 }, { "epoch": 128.52537313432836, "grad_norm": 20.58139419555664, "learning_rate": 9.353479853479854e-06, "loss": 41.7546, "step": 5398 }, { "epoch": 128.54925373134327, "grad_norm": 23.596145629882812, "learning_rate": 9.351648351648352e-06, "loss": 39.7231, "step": 5399 }, { "epoch": 128.5731343283582, "grad_norm": 18.677183151245117, "learning_rate": 9.34981684981685e-06, "loss": 39.6687, "step": 5400 }, { "epoch": 128.59701492537314, "grad_norm": 22.48053550720215, "learning_rate": 9.34798534798535e-06, "loss": 41.1109, "step": 5401 }, { "epoch": 128.62089552238805, "grad_norm": 18.408390045166016, "learning_rate": 9.346153846153847e-06, "loss": 40.0313, "step": 5402 }, { "epoch": 128.644776119403, "grad_norm": 18.866302490234375, "learning_rate": 9.344322344322345e-06, "loss": 41.4068, "step": 5403 }, { "epoch": 128.6686567164179, "grad_norm": 18.15769386291504, "learning_rate": 9.342490842490843e-06, "loss": 40.0289, "step": 5404 }, { "epoch": 128.69253731343284, "grad_norm": 21.213743209838867, "learning_rate": 9.340659340659341e-06, "loss": 41.5406, "step": 5405 }, { "epoch": 128.71641791044777, "grad_norm": 14.050131797790527, "learning_rate": 9.338827838827839e-06, "loss": 40.9447, "step": 5406 }, { "epoch": 128.74029850746268, "grad_norm": 20.822832107543945, "learning_rate": 9.336996336996339e-06, "loss": 40.7616, "step": 5407 }, { "epoch": 128.76417910447762, "grad_norm": 16.915830612182617, "learning_rate": 9.335164835164836e-06, "loss": 39.8529, "step": 5408 }, { "epoch": 128.78805970149253, "grad_norm": 24.053998947143555, "learning_rate": 9.333333333333334e-06, "loss": 41.2986, "step": 5409 }, { "epoch": 128.81194029850747, "grad_norm": 21.357769012451172, "learning_rate": 9.331501831501832e-06, "loss": 41.6824, "step": 5410 }, { "epoch": 128.83582089552237, "grad_norm": 16.31240463256836, "learning_rate": 9.32967032967033e-06, "loss": 40.1068, "step": 5411 }, { "epoch": 128.8597014925373, "grad_norm": 18.027111053466797, "learning_rate": 9.327838827838828e-06, "loss": 39.9807, "step": 5412 }, { "epoch": 128.88358208955225, "grad_norm": 17.471216201782227, "learning_rate": 9.326007326007328e-06, "loss": 40.1997, "step": 5413 }, { "epoch": 128.90746268656716, "grad_norm": 14.707521438598633, "learning_rate": 9.324175824175825e-06, "loss": 40.2096, "step": 5414 }, { "epoch": 128.9313432835821, "grad_norm": 16.55643081665039, "learning_rate": 9.322344322344323e-06, "loss": 41.4804, "step": 5415 }, { "epoch": 128.955223880597, "grad_norm": 17.47356414794922, "learning_rate": 9.320512820512821e-06, "loss": 40.0436, "step": 5416 }, { "epoch": 128.97910447761194, "grad_norm": 18.92135238647461, "learning_rate": 9.318681318681319e-06, "loss": 40.1894, "step": 5417 }, { "epoch": 129.0, "grad_norm": 17.002300262451172, "learning_rate": 9.316849816849819e-06, "loss": 36.4327, "step": 5418 }, { "epoch": 129.02388059701494, "grad_norm": 18.210742950439453, "learning_rate": 9.315018315018316e-06, "loss": 40.8625, "step": 5419 }, { "epoch": 129.04776119402985, "grad_norm": 17.84212875366211, "learning_rate": 9.313186813186814e-06, "loss": 39.4351, "step": 5420 }, { "epoch": 129.07164179104478, "grad_norm": 16.789724349975586, "learning_rate": 9.311355311355312e-06, "loss": 40.7753, "step": 5421 }, { "epoch": 129.0955223880597, "grad_norm": 16.961986541748047, "learning_rate": 9.30952380952381e-06, "loss": 39.5132, "step": 5422 }, { "epoch": 129.11940298507463, "grad_norm": 19.639286041259766, "learning_rate": 9.307692307692308e-06, "loss": 41.1626, "step": 5423 }, { "epoch": 129.14328358208957, "grad_norm": 15.542900085449219, "learning_rate": 9.305860805860808e-06, "loss": 37.8755, "step": 5424 }, { "epoch": 129.16716417910447, "grad_norm": 21.57238006591797, "learning_rate": 9.304029304029305e-06, "loss": 40.1871, "step": 5425 }, { "epoch": 129.1910447761194, "grad_norm": 18.78668785095215, "learning_rate": 9.302197802197803e-06, "loss": 40.0839, "step": 5426 }, { "epoch": 129.21492537313432, "grad_norm": 19.953189849853516, "learning_rate": 9.300366300366301e-06, "loss": 39.1708, "step": 5427 }, { "epoch": 129.23880597014926, "grad_norm": 19.159618377685547, "learning_rate": 9.298534798534799e-06, "loss": 40.0572, "step": 5428 }, { "epoch": 129.26268656716417, "grad_norm": 20.108295440673828, "learning_rate": 9.296703296703297e-06, "loss": 40.5194, "step": 5429 }, { "epoch": 129.2865671641791, "grad_norm": 18.625139236450195, "learning_rate": 9.294871794871796e-06, "loss": 39.8611, "step": 5430 }, { "epoch": 129.31044776119404, "grad_norm": 23.48390007019043, "learning_rate": 9.293040293040294e-06, "loss": 39.9747, "step": 5431 }, { "epoch": 129.33432835820895, "grad_norm": 17.067564010620117, "learning_rate": 9.291208791208792e-06, "loss": 40.5828, "step": 5432 }, { "epoch": 129.3582089552239, "grad_norm": 24.928804397583008, "learning_rate": 9.28937728937729e-06, "loss": 41.1937, "step": 5433 }, { "epoch": 129.3820895522388, "grad_norm": 20.61871910095215, "learning_rate": 9.287545787545788e-06, "loss": 40.7314, "step": 5434 }, { "epoch": 129.40597014925373, "grad_norm": 28.40680694580078, "learning_rate": 9.285714285714288e-06, "loss": 40.6506, "step": 5435 }, { "epoch": 129.42985074626867, "grad_norm": 22.84246253967285, "learning_rate": 9.283882783882785e-06, "loss": 41.2816, "step": 5436 }, { "epoch": 129.45373134328358, "grad_norm": 29.882131576538086, "learning_rate": 9.282051282051283e-06, "loss": 40.7815, "step": 5437 }, { "epoch": 129.47761194029852, "grad_norm": 21.24380111694336, "learning_rate": 9.280219780219781e-06, "loss": 41.2894, "step": 5438 }, { "epoch": 129.50149253731342, "grad_norm": 23.89835548400879, "learning_rate": 9.278388278388279e-06, "loss": 40.1416, "step": 5439 }, { "epoch": 129.52537313432836, "grad_norm": 21.517475128173828, "learning_rate": 9.276556776556777e-06, "loss": 41.05, "step": 5440 }, { "epoch": 129.54925373134327, "grad_norm": 17.225387573242188, "learning_rate": 9.274725274725277e-06, "loss": 41.058, "step": 5441 }, { "epoch": 129.5731343283582, "grad_norm": 17.844186782836914, "learning_rate": 9.272893772893774e-06, "loss": 41.5639, "step": 5442 }, { "epoch": 129.59701492537314, "grad_norm": 18.40740394592285, "learning_rate": 9.271062271062272e-06, "loss": 40.1886, "step": 5443 }, { "epoch": 129.62089552238805, "grad_norm": 16.104562759399414, "learning_rate": 9.26923076923077e-06, "loss": 40.9611, "step": 5444 }, { "epoch": 129.644776119403, "grad_norm": 15.872597694396973, "learning_rate": 9.267399267399268e-06, "loss": 41.0908, "step": 5445 }, { "epoch": 129.6686567164179, "grad_norm": 16.474458694458008, "learning_rate": 9.265567765567766e-06, "loss": 39.5688, "step": 5446 }, { "epoch": 129.69253731343284, "grad_norm": 18.232454299926758, "learning_rate": 9.263736263736265e-06, "loss": 40.8028, "step": 5447 }, { "epoch": 129.71641791044777, "grad_norm": 16.598379135131836, "learning_rate": 9.261904761904763e-06, "loss": 40.5047, "step": 5448 }, { "epoch": 129.74029850746268, "grad_norm": 19.88945770263672, "learning_rate": 9.260073260073261e-06, "loss": 41.5815, "step": 5449 }, { "epoch": 129.76417910447762, "grad_norm": 18.88849449157715, "learning_rate": 9.258241758241759e-06, "loss": 41.1635, "step": 5450 }, { "epoch": 129.78805970149253, "grad_norm": 16.19620704650879, "learning_rate": 9.256410256410257e-06, "loss": 39.988, "step": 5451 }, { "epoch": 129.81194029850747, "grad_norm": 17.755510330200195, "learning_rate": 9.254578754578757e-06, "loss": 39.5852, "step": 5452 }, { "epoch": 129.83582089552237, "grad_norm": 18.566909790039062, "learning_rate": 9.252747252747254e-06, "loss": 40.5909, "step": 5453 }, { "epoch": 129.8597014925373, "grad_norm": NaN, "learning_rate": 9.250915750915752e-06, "loss": 36.0401, "step": 5454 }, { "epoch": 129.88358208955225, "grad_norm": 16.708894729614258, "learning_rate": 9.250915750915752e-06, "loss": 40.8427, "step": 5455 }, { "epoch": 129.90746268656716, "grad_norm": 18.091861724853516, "learning_rate": 9.24908424908425e-06, "loss": 40.9939, "step": 5456 }, { "epoch": 129.9313432835821, "grad_norm": 19.519044876098633, "learning_rate": 9.247252747252748e-06, "loss": 40.0013, "step": 5457 }, { "epoch": 129.955223880597, "grad_norm": 15.587594032287598, "learning_rate": 9.245421245421246e-06, "loss": 40.2778, "step": 5458 }, { "epoch": 129.97910447761194, "grad_norm": 19.010610580444336, "learning_rate": 9.243589743589745e-06, "loss": 40.7324, "step": 5459 }, { "epoch": 130.0, "grad_norm": 16.66925048828125, "learning_rate": 9.241758241758243e-06, "loss": 35.6576, "step": 5460 }, { "epoch": 130.0, "step": 5460, "total_flos": 2.6841554727339034e+17, "train_loss": 3.138686427441272, "train_runtime": 12817.2093, "train_samples_per_second": 54.283, "train_steps_per_second": 0.426 }, { "epoch": 130.02388059701494, "grad_norm": 18.575708389282227, "learning_rate": 1e-05, "loss": 40.4192, "step": 5461 }, { "epoch": 130.04776119402985, "grad_norm": Infinity, "learning_rate": 9.998299319727893e-06, "loss": 45.3289, "step": 5462 }, { "epoch": 130.07164179104478, "grad_norm": 228.45680236816406, "learning_rate": 9.998299319727893e-06, "loss": 44.984, "step": 5463 }, { "epoch": 130.0955223880597, "grad_norm": 129.42657470703125, "learning_rate": 9.996598639455783e-06, "loss": 44.7713, "step": 5464 }, { "epoch": 130.11940298507463, "grad_norm": 55.86802291870117, "learning_rate": 9.994897959183675e-06, "loss": 43.4058, "step": 5465 }, { "epoch": 130.14328358208957, "grad_norm": 93.52910614013672, "learning_rate": 9.993197278911566e-06, "loss": 42.3555, "step": 5466 }, { "epoch": 130.16716417910447, "grad_norm": 49.01381301879883, "learning_rate": 9.991496598639456e-06, "loss": 41.0505, "step": 5467 }, { "epoch": 130.1910447761194, "grad_norm": 54.29282760620117, "learning_rate": 9.989795918367348e-06, "loss": 40.938, "step": 5468 }, { "epoch": 130.21492537313432, "grad_norm": 37.95635223388672, "learning_rate": 9.988095238095239e-06, "loss": 40.3188, "step": 5469 }, { "epoch": 130.23880597014926, "grad_norm": 36.652523040771484, "learning_rate": 9.98639455782313e-06, "loss": 41.5117, "step": 5470 }, { "epoch": 130.26268656716417, "grad_norm": 40.90021896362305, "learning_rate": 9.984693877551021e-06, "loss": 42.5635, "step": 5471 }, { "epoch": 130.2865671641791, "grad_norm": 28.823503494262695, "learning_rate": 9.982993197278913e-06, "loss": 41.404, "step": 5472 }, { "epoch": 130.31044776119404, "grad_norm": 24.62152862548828, "learning_rate": 9.981292517006804e-06, "loss": 40.9372, "step": 5473 }, { "epoch": 130.33432835820895, "grad_norm": 29.644268035888672, "learning_rate": 9.979591836734694e-06, "loss": 40.5455, "step": 5474 }, { "epoch": 130.3582089552239, "grad_norm": 21.859779357910156, "learning_rate": 9.977891156462586e-06, "loss": 41.4169, "step": 5475 }, { "epoch": 130.3820895522388, "grad_norm": 23.489789962768555, "learning_rate": 9.976190476190477e-06, "loss": 39.8592, "step": 5476 }, { "epoch": 130.40597014925373, "grad_norm": 18.39851951599121, "learning_rate": 9.974489795918369e-06, "loss": 39.969, "step": 5477 }, { "epoch": 130.42985074626867, "grad_norm": 25.369873046875, "learning_rate": 9.972789115646259e-06, "loss": 39.7739, "step": 5478 }, { "epoch": 130.45373134328358, "grad_norm": 22.13943862915039, "learning_rate": 9.97108843537415e-06, "loss": 40.0817, "step": 5479 }, { "epoch": 130.47761194029852, "grad_norm": 22.7308292388916, "learning_rate": 9.969387755102042e-06, "loss": 41.4501, "step": 5480 }, { "epoch": 130.50149253731342, "grad_norm": 16.09027671813965, "learning_rate": 9.967687074829932e-06, "loss": 41.1944, "step": 5481 }, { "epoch": 130.52537313432836, "grad_norm": 20.12171745300293, "learning_rate": 9.965986394557824e-06, "loss": 40.5477, "step": 5482 }, { "epoch": 130.54925373134327, "grad_norm": 18.88404655456543, "learning_rate": 9.964285714285714e-06, "loss": 39.402, "step": 5483 }, { "epoch": 130.5731343283582, "grad_norm": 20.32000732421875, "learning_rate": 9.962585034013607e-06, "loss": 40.6678, "step": 5484 }, { "epoch": 130.59701492537314, "grad_norm": 20.351774215698242, "learning_rate": 9.960884353741499e-06, "loss": 40.075, "step": 5485 }, { "epoch": 130.62089552238805, "grad_norm": 14.881600379943848, "learning_rate": 9.959183673469387e-06, "loss": 40.0113, "step": 5486 }, { "epoch": 130.644776119403, "grad_norm": 21.500431060791016, "learning_rate": 9.95748299319728e-06, "loss": 39.5136, "step": 5487 }, { "epoch": 130.6686567164179, "grad_norm": 20.59417152404785, "learning_rate": 9.955782312925172e-06, "loss": 40.3664, "step": 5488 }, { "epoch": 130.69253731343284, "grad_norm": 20.141138076782227, "learning_rate": 9.954081632653062e-06, "loss": 40.8991, "step": 5489 }, { "epoch": 130.71641791044777, "grad_norm": 16.41176986694336, "learning_rate": 9.952380952380954e-06, "loss": 40.3613, "step": 5490 }, { "epoch": 130.74029850746268, "grad_norm": 20.832176208496094, "learning_rate": 9.950680272108844e-06, "loss": 40.709, "step": 5491 }, { "epoch": 130.76417910447762, "grad_norm": 18.355520248413086, "learning_rate": 9.948979591836737e-06, "loss": 41.1332, "step": 5492 }, { "epoch": 130.78805970149253, "grad_norm": 21.0073299407959, "learning_rate": 9.947278911564627e-06, "loss": 40.496, "step": 5493 }, { "epoch": 130.81194029850747, "grad_norm": 18.184412002563477, "learning_rate": 9.945578231292517e-06, "loss": 40.9514, "step": 5494 }, { "epoch": 130.83582089552237, "grad_norm": 23.272981643676758, "learning_rate": 9.94387755102041e-06, "loss": 40.2932, "step": 5495 }, { "epoch": 130.8597014925373, "grad_norm": 16.066865921020508, "learning_rate": 9.9421768707483e-06, "loss": 41.2145, "step": 5496 }, { "epoch": 130.88358208955225, "grad_norm": 19.863813400268555, "learning_rate": 9.940476190476192e-06, "loss": 40.9969, "step": 5497 }, { "epoch": 130.90746268656716, "grad_norm": 20.84225082397461, "learning_rate": 9.938775510204082e-06, "loss": 40.1575, "step": 5498 }, { "epoch": 130.9313432835821, "grad_norm": 16.452865600585938, "learning_rate": 9.937074829931974e-06, "loss": 38.9115, "step": 5499 }, { "epoch": 130.955223880597, "grad_norm": 19.1783390045166, "learning_rate": 9.935374149659865e-06, "loss": 40.7441, "step": 5500 }, { "epoch": 130.97910447761194, "grad_norm": 21.94544219970703, "learning_rate": 9.933673469387755e-06, "loss": 41.8275, "step": 5501 }, { "epoch": 131.0, "grad_norm": 13.472136497497559, "learning_rate": 9.931972789115647e-06, "loss": 34.5508, "step": 5502 }, { "epoch": 131.02388059701494, "grad_norm": 18.82528305053711, "learning_rate": 9.930272108843538e-06, "loss": 41.3352, "step": 5503 }, { "epoch": 131.04776119402985, "grad_norm": 16.815523147583008, "learning_rate": 9.92857142857143e-06, "loss": 39.1606, "step": 5504 }, { "epoch": 131.07164179104478, "grad_norm": 18.014087677001953, "learning_rate": 9.92687074829932e-06, "loss": 41.389, "step": 5505 }, { "epoch": 131.0955223880597, "grad_norm": 18.153976440429688, "learning_rate": 9.92517006802721e-06, "loss": 41.0835, "step": 5506 }, { "epoch": 131.11940298507463, "grad_norm": 16.97452163696289, "learning_rate": 9.923469387755103e-06, "loss": 41.149, "step": 5507 }, { "epoch": 131.14328358208957, "grad_norm": 16.83989143371582, "learning_rate": 9.921768707482993e-06, "loss": 40.9826, "step": 5508 }, { "epoch": 131.16716417910447, "grad_norm": 15.62459659576416, "learning_rate": 9.920068027210885e-06, "loss": 41.0703, "step": 5509 }, { "epoch": 131.1910447761194, "grad_norm": 14.438183784484863, "learning_rate": 9.918367346938776e-06, "loss": 41.3628, "step": 5510 }, { "epoch": 131.21492537313432, "grad_norm": 23.413602828979492, "learning_rate": 9.916666666666668e-06, "loss": 40.0985, "step": 5511 }, { "epoch": 131.23880597014926, "grad_norm": 19.558998107910156, "learning_rate": 9.91496598639456e-06, "loss": 40.2111, "step": 5512 }, { "epoch": 131.26268656716417, "grad_norm": 12.165032386779785, "learning_rate": 9.913265306122449e-06, "loss": 40.6546, "step": 5513 }, { "epoch": 131.2865671641791, "grad_norm": 18.257869720458984, "learning_rate": 9.91156462585034e-06, "loss": 42.2008, "step": 5514 }, { "epoch": 131.31044776119404, "grad_norm": 21.828651428222656, "learning_rate": 9.909863945578233e-06, "loss": 39.9445, "step": 5515 }, { "epoch": 131.33432835820895, "grad_norm": 16.42722511291504, "learning_rate": 9.908163265306123e-06, "loss": 40.4844, "step": 5516 }, { "epoch": 131.3582089552239, "grad_norm": 15.241820335388184, "learning_rate": 9.906462585034015e-06, "loss": 39.5317, "step": 5517 }, { "epoch": 131.3820895522388, "grad_norm": 14.893516540527344, "learning_rate": 9.904761904761906e-06, "loss": 40.9785, "step": 5518 }, { "epoch": 131.40597014925373, "grad_norm": 15.520328521728516, "learning_rate": 9.903061224489798e-06, "loss": 40.2672, "step": 5519 }, { "epoch": 131.42985074626867, "grad_norm": 20.18976402282715, "learning_rate": 9.901360544217688e-06, "loss": 39.9149, "step": 5520 }, { "epoch": 131.45373134328358, "grad_norm": 17.364437103271484, "learning_rate": 9.899659863945579e-06, "loss": 39.2209, "step": 5521 }, { "epoch": 131.47761194029852, "grad_norm": 18.58165168762207, "learning_rate": 9.89795918367347e-06, "loss": 41.0448, "step": 5522 }, { "epoch": 131.50149253731342, "grad_norm": 14.185492515563965, "learning_rate": 9.896258503401361e-06, "loss": 40.0128, "step": 5523 }, { "epoch": 131.52537313432836, "grad_norm": 18.741228103637695, "learning_rate": 9.894557823129253e-06, "loss": 40.4496, "step": 5524 }, { "epoch": 131.54925373134327, "grad_norm": 16.92427635192871, "learning_rate": 9.892857142857143e-06, "loss": 39.8989, "step": 5525 }, { "epoch": 131.5731343283582, "grad_norm": 18.248445510864258, "learning_rate": 9.891156462585036e-06, "loss": 39.7473, "step": 5526 }, { "epoch": 131.59701492537314, "grad_norm": 18.88313865661621, "learning_rate": 9.889455782312926e-06, "loss": 40.1553, "step": 5527 }, { "epoch": 131.62089552238805, "grad_norm": 19.88404655456543, "learning_rate": 9.887755102040816e-06, "loss": 40.9204, "step": 5528 }, { "epoch": 131.644776119403, "grad_norm": 17.367191314697266, "learning_rate": 9.886054421768708e-06, "loss": 39.9127, "step": 5529 }, { "epoch": 131.6686567164179, "grad_norm": 18.826900482177734, "learning_rate": 9.884353741496599e-06, "loss": 40.3812, "step": 5530 }, { "epoch": 131.69253731343284, "grad_norm": 16.4368953704834, "learning_rate": 9.882653061224491e-06, "loss": 40.536, "step": 5531 }, { "epoch": 131.71641791044777, "grad_norm": 17.32594108581543, "learning_rate": 9.880952380952381e-06, "loss": 41.1266, "step": 5532 }, { "epoch": 131.74029850746268, "grad_norm": 25.614294052124023, "learning_rate": 9.879251700680272e-06, "loss": 40.5684, "step": 5533 }, { "epoch": 131.76417910447762, "grad_norm": 18.330366134643555, "learning_rate": 9.877551020408164e-06, "loss": 40.9372, "step": 5534 }, { "epoch": 131.78805970149253, "grad_norm": 17.743070602416992, "learning_rate": 9.875850340136054e-06, "loss": 40.3854, "step": 5535 }, { "epoch": 131.81194029850747, "grad_norm": 20.566457748413086, "learning_rate": 9.874149659863946e-06, "loss": 40.7082, "step": 5536 }, { "epoch": 131.83582089552237, "grad_norm": 24.459489822387695, "learning_rate": 9.872448979591838e-06, "loss": 41.2391, "step": 5537 }, { "epoch": 131.8597014925373, "grad_norm": 15.706886291503906, "learning_rate": 9.870748299319729e-06, "loss": 39.2229, "step": 5538 }, { "epoch": 131.88358208955225, "grad_norm": 18.37516975402832, "learning_rate": 9.869047619047621e-06, "loss": 39.3149, "step": 5539 }, { "epoch": 131.90746268656716, "grad_norm": 18.098176956176758, "learning_rate": 9.867346938775511e-06, "loss": 39.4227, "step": 5540 }, { "epoch": 131.9313432835821, "grad_norm": 14.898340225219727, "learning_rate": 9.865646258503402e-06, "loss": 39.4192, "step": 5541 }, { "epoch": 131.955223880597, "grad_norm": 21.42721176147461, "learning_rate": 9.863945578231294e-06, "loss": 40.879, "step": 5542 }, { "epoch": 131.97910447761194, "grad_norm": 19.07784652709961, "learning_rate": 9.862244897959184e-06, "loss": 40.1433, "step": 5543 }, { "epoch": 132.0, "grad_norm": 16.04290199279785, "learning_rate": 9.860544217687076e-06, "loss": 34.6443, "step": 5544 }, { "epoch": 132.02388059701494, "grad_norm": 17.641210556030273, "learning_rate": 9.858843537414967e-06, "loss": 40.6522, "step": 5545 }, { "epoch": 132.04776119402985, "grad_norm": 22.368637084960938, "learning_rate": 9.857142857142859e-06, "loss": 41.3016, "step": 5546 }, { "epoch": 132.07164179104478, "grad_norm": 19.166706085205078, "learning_rate": 9.85544217687075e-06, "loss": 40.4856, "step": 5547 }, { "epoch": 132.0955223880597, "grad_norm": 20.525333404541016, "learning_rate": 9.85374149659864e-06, "loss": 41.073, "step": 5548 }, { "epoch": 132.11940298507463, "grad_norm": 19.90342140197754, "learning_rate": 9.852040816326532e-06, "loss": 39.3624, "step": 5549 }, { "epoch": 132.14328358208957, "grad_norm": 15.503653526306152, "learning_rate": 9.850340136054422e-06, "loss": 41.8662, "step": 5550 }, { "epoch": 132.16716417910447, "grad_norm": 15.871750831604004, "learning_rate": 9.848639455782314e-06, "loss": 39.9852, "step": 5551 }, { "epoch": 132.1910447761194, "grad_norm": 25.539695739746094, "learning_rate": 9.846938775510205e-06, "loss": 38.6591, "step": 5552 }, { "epoch": 132.21492537313432, "grad_norm": 20.75281524658203, "learning_rate": 9.845238095238097e-06, "loss": 40.6497, "step": 5553 }, { "epoch": 132.23880597014926, "grad_norm": 17.729764938354492, "learning_rate": 9.843537414965987e-06, "loss": 39.8891, "step": 5554 }, { "epoch": 132.26268656716417, "grad_norm": 16.33675765991211, "learning_rate": 9.841836734693878e-06, "loss": 41.0467, "step": 5555 }, { "epoch": 132.2865671641791, "grad_norm": 16.76603126525879, "learning_rate": 9.84013605442177e-06, "loss": 41.4239, "step": 5556 }, { "epoch": 132.31044776119404, "grad_norm": 20.384475708007812, "learning_rate": 9.83843537414966e-06, "loss": 40.4335, "step": 5557 }, { "epoch": 132.33432835820895, "grad_norm": 16.604625701904297, "learning_rate": 9.836734693877552e-06, "loss": 40.5462, "step": 5558 }, { "epoch": 132.3582089552239, "grad_norm": 19.941377639770508, "learning_rate": 9.835034013605444e-06, "loss": 41.7404, "step": 5559 }, { "epoch": 132.3820895522388, "grad_norm": 15.349913597106934, "learning_rate": 9.833333333333333e-06, "loss": 38.5686, "step": 5560 }, { "epoch": 132.40597014925373, "grad_norm": 21.03326988220215, "learning_rate": 9.831632653061225e-06, "loss": 41.3306, "step": 5561 }, { "epoch": 132.42985074626867, "grad_norm": 18.98489761352539, "learning_rate": 9.829931972789115e-06, "loss": 39.5212, "step": 5562 }, { "epoch": 132.45373134328358, "grad_norm": 15.476447105407715, "learning_rate": 9.828231292517008e-06, "loss": 39.31, "step": 5563 }, { "epoch": 132.47761194029852, "grad_norm": 17.050857543945312, "learning_rate": 9.8265306122449e-06, "loss": 40.1559, "step": 5564 }, { "epoch": 132.50149253731342, "grad_norm": 17.630809783935547, "learning_rate": 9.82482993197279e-06, "loss": 39.8047, "step": 5565 }, { "epoch": 132.52537313432836, "grad_norm": NaN, "learning_rate": 9.823129251700682e-06, "loss": 54.2096, "step": 5566 }, { "epoch": 132.54925373134327, "grad_norm": 18.909269332885742, "learning_rate": 9.823129251700682e-06, "loss": 40.1987, "step": 5567 }, { "epoch": 132.5731343283582, "grad_norm": 20.534330368041992, "learning_rate": 9.821428571428573e-06, "loss": 40.7122, "step": 5568 }, { "epoch": 132.59701492537314, "grad_norm": 16.048946380615234, "learning_rate": 9.819727891156463e-06, "loss": 40.2584, "step": 5569 }, { "epoch": 132.62089552238805, "grad_norm": 14.615914344787598, "learning_rate": 9.818027210884355e-06, "loss": 40.1147, "step": 5570 }, { "epoch": 132.644776119403, "grad_norm": 16.347827911376953, "learning_rate": 9.816326530612245e-06, "loss": 41.0445, "step": 5571 }, { "epoch": 132.6686567164179, "grad_norm": 20.02432632446289, "learning_rate": 9.814625850340137e-06, "loss": 40.1337, "step": 5572 }, { "epoch": 132.69253731343284, "grad_norm": 18.087976455688477, "learning_rate": 9.812925170068028e-06, "loss": 39.9454, "step": 5573 }, { "epoch": 132.71641791044777, "grad_norm": 16.806800842285156, "learning_rate": 9.81122448979592e-06, "loss": 40.7469, "step": 5574 }, { "epoch": 132.74029850746268, "grad_norm": 14.957366943359375, "learning_rate": 9.80952380952381e-06, "loss": 41.9708, "step": 5575 }, { "epoch": 132.76417910447762, "grad_norm": 15.429438591003418, "learning_rate": 9.8078231292517e-06, "loss": 40.5727, "step": 5576 }, { "epoch": 132.78805970149253, "grad_norm": 18.437835693359375, "learning_rate": 9.806122448979593e-06, "loss": 39.3392, "step": 5577 }, { "epoch": 132.81194029850747, "grad_norm": 23.49526023864746, "learning_rate": 9.804421768707483e-06, "loss": 40.8007, "step": 5578 }, { "epoch": 132.83582089552237, "grad_norm": 15.580110549926758, "learning_rate": 9.802721088435375e-06, "loss": 40.2113, "step": 5579 }, { "epoch": 132.8597014925373, "grad_norm": 13.494383811950684, "learning_rate": 9.801020408163266e-06, "loss": 39.5501, "step": 5580 }, { "epoch": 132.88358208955225, "grad_norm": 14.148122787475586, "learning_rate": 9.799319727891158e-06, "loss": 39.5385, "step": 5581 }, { "epoch": 132.90746268656716, "grad_norm": 14.981057167053223, "learning_rate": 9.797619047619048e-06, "loss": 40.1832, "step": 5582 }, { "epoch": 132.9313432835821, "grad_norm": 17.651594161987305, "learning_rate": 9.795918367346939e-06, "loss": 40.8822, "step": 5583 }, { "epoch": 132.955223880597, "grad_norm": 23.53675079345703, "learning_rate": 9.79421768707483e-06, "loss": 40.4374, "step": 5584 }, { "epoch": 132.97910447761194, "grad_norm": 14.797532081604004, "learning_rate": 9.792517006802721e-06, "loss": 40.3035, "step": 5585 }, { "epoch": 133.0, "grad_norm": 19.286834716796875, "learning_rate": 9.790816326530613e-06, "loss": 35.0022, "step": 5586 }, { "epoch": 133.02388059701494, "grad_norm": 25.947200775146484, "learning_rate": 9.789115646258505e-06, "loss": 40.0884, "step": 5587 }, { "epoch": 133.04776119402985, "grad_norm": 17.286386489868164, "learning_rate": 9.787414965986394e-06, "loss": 40.6761, "step": 5588 }, { "epoch": 133.07164179104478, "grad_norm": 16.327795028686523, "learning_rate": 9.785714285714286e-06, "loss": 39.5775, "step": 5589 }, { "epoch": 133.0955223880597, "grad_norm": 25.301265716552734, "learning_rate": 9.784013605442178e-06, "loss": 39.6754, "step": 5590 }, { "epoch": 133.11940298507463, "grad_norm": 18.68819236755371, "learning_rate": 9.782312925170069e-06, "loss": 40.444, "step": 5591 }, { "epoch": 133.14328358208957, "grad_norm": 16.839736938476562, "learning_rate": 9.78061224489796e-06, "loss": 39.586, "step": 5592 }, { "epoch": 133.16716417910447, "grad_norm": 27.723005294799805, "learning_rate": 9.778911564625851e-06, "loss": 40.631, "step": 5593 }, { "epoch": 133.1910447761194, "grad_norm": 16.834030151367188, "learning_rate": 9.777210884353743e-06, "loss": 39.9121, "step": 5594 }, { "epoch": 133.21492537313432, "grad_norm": 16.289016723632812, "learning_rate": 9.775510204081634e-06, "loss": 39.8342, "step": 5595 }, { "epoch": 133.23880597014926, "grad_norm": 23.45367431640625, "learning_rate": 9.773809523809524e-06, "loss": 39.8418, "step": 5596 }, { "epoch": 133.26268656716417, "grad_norm": 18.50150489807129, "learning_rate": 9.772108843537416e-06, "loss": 40.8921, "step": 5597 }, { "epoch": 133.2865671641791, "grad_norm": 15.655564308166504, "learning_rate": 9.770408163265307e-06, "loss": 39.9306, "step": 5598 }, { "epoch": 133.31044776119404, "grad_norm": 23.770095825195312, "learning_rate": 9.768707482993199e-06, "loss": 40.686, "step": 5599 }, { "epoch": 133.33432835820895, "grad_norm": 21.083984375, "learning_rate": 9.767006802721089e-06, "loss": 40.5774, "step": 5600 }, { "epoch": 133.3582089552239, "grad_norm": 14.010787010192871, "learning_rate": 9.765306122448981e-06, "loss": 40.3888, "step": 5601 }, { "epoch": 133.3820895522388, "grad_norm": 29.777660369873047, "learning_rate": 9.763605442176872e-06, "loss": 41.4408, "step": 5602 }, { "epoch": 133.40597014925373, "grad_norm": 19.067794799804688, "learning_rate": 9.761904761904762e-06, "loss": 40.1208, "step": 5603 }, { "epoch": 133.42985074626867, "grad_norm": 30.848791122436523, "learning_rate": 9.760204081632654e-06, "loss": 40.7094, "step": 5604 }, { "epoch": 133.45373134328358, "grad_norm": 29.024898529052734, "learning_rate": 9.758503401360544e-06, "loss": 40.7004, "step": 5605 }, { "epoch": 133.47761194029852, "grad_norm": 22.88898468017578, "learning_rate": 9.756802721088437e-06, "loss": 40.257, "step": 5606 }, { "epoch": 133.50149253731342, "grad_norm": 39.7208137512207, "learning_rate": 9.755102040816327e-06, "loss": 38.6114, "step": 5607 }, { "epoch": 133.52537313432836, "grad_norm": 30.543888092041016, "learning_rate": 9.753401360544217e-06, "loss": 40.1867, "step": 5608 }, { "epoch": 133.54925373134327, "grad_norm": 36.19719314575195, "learning_rate": 9.75170068027211e-06, "loss": 40.5948, "step": 5609 }, { "epoch": 133.5731343283582, "grad_norm": 32.90020751953125, "learning_rate": 9.75e-06, "loss": 39.8139, "step": 5610 }, { "epoch": 133.59701492537314, "grad_norm": 34.50712585449219, "learning_rate": 9.748299319727892e-06, "loss": 40.1155, "step": 5611 }, { "epoch": 133.62089552238805, "grad_norm": 32.24649429321289, "learning_rate": 9.746598639455784e-06, "loss": 40.149, "step": 5612 }, { "epoch": 133.644776119403, "grad_norm": 35.7637939453125, "learning_rate": 9.744897959183674e-06, "loss": 39.7303, "step": 5613 }, { "epoch": 133.6686567164179, "grad_norm": 31.09421157836914, "learning_rate": 9.743197278911567e-06, "loss": 41.0925, "step": 5614 }, { "epoch": 133.69253731343284, "grad_norm": 37.82075881958008, "learning_rate": 9.741496598639457e-06, "loss": 39.9909, "step": 5615 }, { "epoch": 133.71641791044777, "grad_norm": 33.92351150512695, "learning_rate": 9.739795918367347e-06, "loss": 40.0986, "step": 5616 }, { "epoch": 133.74029850746268, "grad_norm": 29.645198822021484, "learning_rate": 9.73809523809524e-06, "loss": 41.5591, "step": 5617 }, { "epoch": 133.76417910447762, "grad_norm": 24.506332397460938, "learning_rate": 9.73639455782313e-06, "loss": 41.3366, "step": 5618 }, { "epoch": 133.78805970149253, "grad_norm": 38.3758544921875, "learning_rate": 9.734693877551022e-06, "loss": 41.016, "step": 5619 }, { "epoch": 133.81194029850747, "grad_norm": 33.210044860839844, "learning_rate": 9.732993197278912e-06, "loss": 40.9384, "step": 5620 }, { "epoch": 133.83582089552237, "grad_norm": 33.01791000366211, "learning_rate": 9.731292517006804e-06, "loss": 39.6658, "step": 5621 }, { "epoch": 133.8597014925373, "grad_norm": 34.2905158996582, "learning_rate": 9.729591836734695e-06, "loss": 40.4843, "step": 5622 }, { "epoch": 133.88358208955225, "grad_norm": 29.771053314208984, "learning_rate": 9.727891156462585e-06, "loss": 40.2978, "step": 5623 }, { "epoch": 133.90746268656716, "grad_norm": 30.07183837890625, "learning_rate": 9.726190476190477e-06, "loss": 40.2479, "step": 5624 }, { "epoch": 133.9313432835821, "grad_norm": 30.720661163330078, "learning_rate": 9.724489795918368e-06, "loss": 39.5252, "step": 5625 }, { "epoch": 133.955223880597, "grad_norm": 27.56161117553711, "learning_rate": 9.72278911564626e-06, "loss": 40.4758, "step": 5626 }, { "epoch": 133.97910447761194, "grad_norm": 32.74715805053711, "learning_rate": 9.72108843537415e-06, "loss": 40.6321, "step": 5627 }, { "epoch": 134.0, "grad_norm": 25.854846954345703, "learning_rate": 9.719387755102042e-06, "loss": 34.2593, "step": 5628 }, { "epoch": 134.02388059701494, "grad_norm": 33.82636642456055, "learning_rate": 9.717687074829933e-06, "loss": 40.2388, "step": 5629 }, { "epoch": 134.04776119402985, "grad_norm": 29.441238403320312, "learning_rate": 9.715986394557823e-06, "loss": 40.5805, "step": 5630 }, { "epoch": 134.07164179104478, "grad_norm": 29.590694427490234, "learning_rate": 9.714285714285715e-06, "loss": 38.7185, "step": 5631 }, { "epoch": 134.0955223880597, "grad_norm": 26.878095626831055, "learning_rate": 9.712585034013606e-06, "loss": 41.1294, "step": 5632 }, { "epoch": 134.11940298507463, "grad_norm": 31.240013122558594, "learning_rate": 9.710884353741498e-06, "loss": 40.0814, "step": 5633 }, { "epoch": 134.14328358208957, "grad_norm": 27.573955535888672, "learning_rate": 9.70918367346939e-06, "loss": 40.6451, "step": 5634 }, { "epoch": 134.16716417910447, "grad_norm": 35.54013442993164, "learning_rate": 9.707482993197278e-06, "loss": 41.3382, "step": 5635 }, { "epoch": 134.1910447761194, "grad_norm": 33.757408142089844, "learning_rate": 9.70578231292517e-06, "loss": 39.4768, "step": 5636 }, { "epoch": 134.21492537313432, "grad_norm": 29.37469482421875, "learning_rate": 9.704081632653061e-06, "loss": 39.8421, "step": 5637 }, { "epoch": 134.23880597014926, "grad_norm": 29.495834350585938, "learning_rate": 9.702380952380953e-06, "loss": 39.2846, "step": 5638 }, { "epoch": 134.26268656716417, "grad_norm": 28.723642349243164, "learning_rate": 9.700680272108845e-06, "loss": 39.4364, "step": 5639 }, { "epoch": 134.2865671641791, "grad_norm": 25.51703453063965, "learning_rate": 9.698979591836736e-06, "loss": 39.4578, "step": 5640 }, { "epoch": 134.31044776119404, "grad_norm": 34.16410446166992, "learning_rate": 9.697278911564628e-06, "loss": 40.5937, "step": 5641 }, { "epoch": 134.33432835820895, "grad_norm": 30.546810150146484, "learning_rate": 9.695578231292518e-06, "loss": 39.912, "step": 5642 }, { "epoch": 134.3582089552239, "grad_norm": 30.73379898071289, "learning_rate": 9.693877551020408e-06, "loss": 41.5471, "step": 5643 }, { "epoch": 134.3820895522388, "grad_norm": 30.759567260742188, "learning_rate": 9.6921768707483e-06, "loss": 40.3315, "step": 5644 }, { "epoch": 134.40597014925373, "grad_norm": 28.02313995361328, "learning_rate": 9.690476190476191e-06, "loss": 40.2851, "step": 5645 }, { "epoch": 134.42985074626867, "grad_norm": 24.580036163330078, "learning_rate": 9.688775510204083e-06, "loss": 40.9942, "step": 5646 }, { "epoch": 134.45373134328358, "grad_norm": 32.100738525390625, "learning_rate": 9.687074829931973e-06, "loss": 40.2184, "step": 5647 }, { "epoch": 134.47761194029852, "grad_norm": 30.24114418029785, "learning_rate": 9.685374149659866e-06, "loss": 40.3371, "step": 5648 }, { "epoch": 134.50149253731342, "grad_norm": 32.3997917175293, "learning_rate": 9.683673469387756e-06, "loss": 40.7586, "step": 5649 }, { "epoch": 134.52537313432836, "grad_norm": 25.58622169494629, "learning_rate": 9.681972789115646e-06, "loss": 40.1238, "step": 5650 }, { "epoch": 134.54925373134327, "grad_norm": 32.82097244262695, "learning_rate": 9.680272108843538e-06, "loss": 40.6563, "step": 5651 }, { "epoch": 134.5731343283582, "grad_norm": 27.216670989990234, "learning_rate": 9.678571428571429e-06, "loss": 38.6664, "step": 5652 }, { "epoch": 134.59701492537314, "grad_norm": 30.91448211669922, "learning_rate": 9.676870748299321e-06, "loss": 40.0405, "step": 5653 }, { "epoch": 134.62089552238805, "grad_norm": 27.467674255371094, "learning_rate": 9.675170068027211e-06, "loss": 40.8484, "step": 5654 }, { "epoch": 134.644776119403, "grad_norm": 33.313507080078125, "learning_rate": 9.673469387755103e-06, "loss": 40.5139, "step": 5655 }, { "epoch": 134.6686567164179, "grad_norm": 28.826663970947266, "learning_rate": 9.671768707482994e-06, "loss": 39.9436, "step": 5656 }, { "epoch": 134.69253731343284, "grad_norm": 31.69590950012207, "learning_rate": 9.670068027210884e-06, "loss": 40.458, "step": 5657 }, { "epoch": 134.71641791044777, "grad_norm": 24.371248245239258, "learning_rate": 9.668367346938776e-06, "loss": 40.4455, "step": 5658 }, { "epoch": 134.74029850746268, "grad_norm": 31.334495544433594, "learning_rate": 9.666666666666667e-06, "loss": 40.7902, "step": 5659 }, { "epoch": 134.76417910447762, "grad_norm": 27.586498260498047, "learning_rate": 9.664965986394559e-06, "loss": 40.5867, "step": 5660 }, { "epoch": 134.78805970149253, "grad_norm": 28.80315399169922, "learning_rate": 9.663265306122451e-06, "loss": 39.4688, "step": 5661 }, { "epoch": 134.81194029850747, "grad_norm": 24.875734329223633, "learning_rate": 9.66156462585034e-06, "loss": 39.2296, "step": 5662 }, { "epoch": 134.83582089552237, "grad_norm": 26.77202033996582, "learning_rate": 9.659863945578232e-06, "loss": 41.5271, "step": 5663 }, { "epoch": 134.8597014925373, "grad_norm": 21.632478713989258, "learning_rate": 9.658163265306124e-06, "loss": 39.7494, "step": 5664 }, { "epoch": 134.88358208955225, "grad_norm": 33.85261154174805, "learning_rate": 9.656462585034014e-06, "loss": 39.4471, "step": 5665 }, { "epoch": 134.90746268656716, "grad_norm": 27.42376708984375, "learning_rate": 9.654761904761906e-06, "loss": 40.2511, "step": 5666 }, { "epoch": 134.9313432835821, "grad_norm": 29.52701187133789, "learning_rate": 9.653061224489797e-06, "loss": 39.9535, "step": 5667 }, { "epoch": 134.955223880597, "grad_norm": 25.98667335510254, "learning_rate": 9.651360544217689e-06, "loss": 40.6712, "step": 5668 }, { "epoch": 134.97910447761194, "grad_norm": 26.950590133666992, "learning_rate": 9.64965986394558e-06, "loss": 40.4322, "step": 5669 }, { "epoch": 135.0, "grad_norm": 18.281841278076172, "learning_rate": 9.64795918367347e-06, "loss": 34.9402, "step": 5670 }, { "epoch": 135.02388059701494, "grad_norm": 30.714963912963867, "learning_rate": 9.646258503401362e-06, "loss": 40.2777, "step": 5671 }, { "epoch": 135.04776119402985, "grad_norm": 21.559858322143555, "learning_rate": 9.644557823129252e-06, "loss": 39.8327, "step": 5672 }, { "epoch": 135.07164179104478, "grad_norm": 27.76194953918457, "learning_rate": 9.642857142857144e-06, "loss": 40.0835, "step": 5673 }, { "epoch": 135.0955223880597, "grad_norm": 22.750877380371094, "learning_rate": 9.641156462585035e-06, "loss": 40.3429, "step": 5674 }, { "epoch": 135.11940298507463, "grad_norm": 28.511995315551758, "learning_rate": 9.639455782312927e-06, "loss": 39.3794, "step": 5675 }, { "epoch": 135.14328358208957, "grad_norm": 21.04129409790039, "learning_rate": 9.637755102040817e-06, "loss": 41.34, "step": 5676 }, { "epoch": 135.16716417910447, "grad_norm": 27.8126277923584, "learning_rate": 9.636054421768707e-06, "loss": 40.3671, "step": 5677 }, { "epoch": 135.1910447761194, "grad_norm": 23.500349044799805, "learning_rate": 9.6343537414966e-06, "loss": 40.235, "step": 5678 }, { "epoch": 135.21492537313432, "grad_norm": 25.186744689941406, "learning_rate": 9.63265306122449e-06, "loss": 40.7387, "step": 5679 }, { "epoch": 135.23880597014926, "grad_norm": 21.36899185180664, "learning_rate": 9.630952380952382e-06, "loss": 40.0717, "step": 5680 }, { "epoch": 135.26268656716417, "grad_norm": 23.584760665893555, "learning_rate": 9.629251700680272e-06, "loss": 40.1511, "step": 5681 }, { "epoch": 135.2865671641791, "grad_norm": 22.20633316040039, "learning_rate": 9.627551020408165e-06, "loss": 40.0655, "step": 5682 }, { "epoch": 135.31044776119404, "grad_norm": 19.99517822265625, "learning_rate": 9.625850340136055e-06, "loss": 39.8154, "step": 5683 }, { "epoch": 135.33432835820895, "grad_norm": 22.59499168395996, "learning_rate": 9.624149659863945e-06, "loss": 40.6277, "step": 5684 }, { "epoch": 135.3582089552239, "grad_norm": 17.33830451965332, "learning_rate": 9.622448979591837e-06, "loss": 39.4982, "step": 5685 }, { "epoch": 135.3820895522388, "grad_norm": 22.377470016479492, "learning_rate": 9.62074829931973e-06, "loss": 39.4303, "step": 5686 }, { "epoch": 135.40597014925373, "grad_norm": 22.994571685791016, "learning_rate": 9.61904761904762e-06, "loss": 38.643, "step": 5687 }, { "epoch": 135.42985074626867, "grad_norm": 17.39454460144043, "learning_rate": 9.617346938775512e-06, "loss": 41.7366, "step": 5688 }, { "epoch": 135.45373134328358, "grad_norm": 24.441268920898438, "learning_rate": 9.6156462585034e-06, "loss": 41.1602, "step": 5689 }, { "epoch": 135.47761194029852, "grad_norm": 16.182247161865234, "learning_rate": 9.613945578231293e-06, "loss": 40.4013, "step": 5690 }, { "epoch": 135.50149253731342, "grad_norm": 23.803049087524414, "learning_rate": 9.612244897959185e-06, "loss": 41.3252, "step": 5691 }, { "epoch": 135.52537313432836, "grad_norm": 20.565837860107422, "learning_rate": 9.610544217687075e-06, "loss": 40.3434, "step": 5692 }, { "epoch": 135.54925373134327, "grad_norm": 26.256967544555664, "learning_rate": 9.608843537414967e-06, "loss": 40.2281, "step": 5693 }, { "epoch": 135.5731343283582, "grad_norm": 18.350553512573242, "learning_rate": 9.607142857142858e-06, "loss": 39.1361, "step": 5694 }, { "epoch": 135.59701492537314, "grad_norm": 25.684616088867188, "learning_rate": 9.60544217687075e-06, "loss": 39.7602, "step": 5695 }, { "epoch": 135.62089552238805, "grad_norm": 22.026763916015625, "learning_rate": 9.60374149659864e-06, "loss": 40.2298, "step": 5696 }, { "epoch": 135.644776119403, "grad_norm": 15.483604431152344, "learning_rate": 9.60204081632653e-06, "loss": 39.8388, "step": 5697 }, { "epoch": 135.6686567164179, "grad_norm": 21.13356590270996, "learning_rate": 9.600340136054423e-06, "loss": 39.5239, "step": 5698 }, { "epoch": 135.69253731343284, "grad_norm": 17.695802688598633, "learning_rate": 9.598639455782313e-06, "loss": 40.336, "step": 5699 }, { "epoch": 135.71641791044777, "grad_norm": 16.947023391723633, "learning_rate": 9.596938775510205e-06, "loss": 39.7942, "step": 5700 }, { "epoch": 135.74029850746268, "grad_norm": 18.580827713012695, "learning_rate": 9.595238095238096e-06, "loss": 41.1081, "step": 5701 }, { "epoch": 135.76417910447762, "grad_norm": 19.310028076171875, "learning_rate": 9.593537414965988e-06, "loss": 38.773, "step": 5702 }, { "epoch": 135.78805970149253, "grad_norm": 17.71697235107422, "learning_rate": 9.591836734693878e-06, "loss": 41.1084, "step": 5703 }, { "epoch": 135.81194029850747, "grad_norm": 19.53215217590332, "learning_rate": 9.590136054421769e-06, "loss": 40.7152, "step": 5704 }, { "epoch": 135.83582089552237, "grad_norm": 26.050701141357422, "learning_rate": 9.58843537414966e-06, "loss": 41.2326, "step": 5705 }, { "epoch": 135.8597014925373, "grad_norm": 21.59418296813965, "learning_rate": 9.586734693877551e-06, "loss": 39.7008, "step": 5706 }, { "epoch": 135.88358208955225, "grad_norm": 17.44019889831543, "learning_rate": 9.585034013605443e-06, "loss": 40.0913, "step": 5707 }, { "epoch": 135.90746268656716, "grad_norm": 22.628219604492188, "learning_rate": 9.583333333333335e-06, "loss": 41.0258, "step": 5708 }, { "epoch": 135.9313432835821, "grad_norm": 18.4293155670166, "learning_rate": 9.581632653061226e-06, "loss": 39.4461, "step": 5709 }, { "epoch": 135.955223880597, "grad_norm": 17.186227798461914, "learning_rate": 9.579931972789116e-06, "loss": 38.9269, "step": 5710 }, { "epoch": 135.97910447761194, "grad_norm": 20.301193237304688, "learning_rate": 9.578231292517007e-06, "loss": 39.9266, "step": 5711 }, { "epoch": 136.0, "grad_norm": 18.15862464904785, "learning_rate": 9.576530612244899e-06, "loss": 35.705, "step": 5712 }, { "epoch": 136.02388059701494, "grad_norm": 17.27276611328125, "learning_rate": 9.57482993197279e-06, "loss": 39.9615, "step": 5713 }, { "epoch": 136.04776119402985, "grad_norm": 16.116933822631836, "learning_rate": 9.573129251700681e-06, "loss": 40.3388, "step": 5714 }, { "epoch": 136.07164179104478, "grad_norm": 14.106700897216797, "learning_rate": 9.571428571428573e-06, "loss": 39.8286, "step": 5715 }, { "epoch": 136.0955223880597, "grad_norm": 18.357019424438477, "learning_rate": 9.569727891156464e-06, "loss": 40.6918, "step": 5716 }, { "epoch": 136.11940298507463, "grad_norm": 16.41695213317871, "learning_rate": 9.568027210884354e-06, "loss": 40.1238, "step": 5717 }, { "epoch": 136.14328358208957, "grad_norm": 15.24857234954834, "learning_rate": 9.566326530612246e-06, "loss": 39.5314, "step": 5718 }, { "epoch": 136.16716417910447, "grad_norm": 21.097612380981445, "learning_rate": 9.564625850340137e-06, "loss": 39.4418, "step": 5719 }, { "epoch": 136.1910447761194, "grad_norm": 15.658564567565918, "learning_rate": 9.562925170068029e-06, "loss": 40.4354, "step": 5720 }, { "epoch": 136.21492537313432, "grad_norm": 18.364137649536133, "learning_rate": 9.561224489795919e-06, "loss": 39.4063, "step": 5721 }, { "epoch": 136.23880597014926, "grad_norm": 16.437915802001953, "learning_rate": 9.559523809523811e-06, "loss": 39.2412, "step": 5722 }, { "epoch": 136.26268656716417, "grad_norm": 18.161527633666992, "learning_rate": 9.557823129251701e-06, "loss": 40.1167, "step": 5723 }, { "epoch": 136.2865671641791, "grad_norm": 19.824352264404297, "learning_rate": 9.556122448979592e-06, "loss": 39.5653, "step": 5724 }, { "epoch": 136.31044776119404, "grad_norm": 16.736989974975586, "learning_rate": 9.554421768707484e-06, "loss": 39.4445, "step": 5725 }, { "epoch": 136.33432835820895, "grad_norm": NaN, "learning_rate": 9.552721088435374e-06, "loss": 40.8717, "step": 5726 }, { "epoch": 136.3582089552239, "grad_norm": 16.963516235351562, "learning_rate": 9.552721088435374e-06, "loss": 40.3213, "step": 5727 }, { "epoch": 136.3820895522388, "grad_norm": 18.735271453857422, "learning_rate": 9.551020408163266e-06, "loss": 40.8078, "step": 5728 }, { "epoch": 136.40597014925373, "grad_norm": 19.308032989501953, "learning_rate": 9.549319727891157e-06, "loss": 39.9691, "step": 5729 }, { "epoch": 136.42985074626867, "grad_norm": 14.293987274169922, "learning_rate": 9.547619047619049e-06, "loss": 39.0772, "step": 5730 }, { "epoch": 136.45373134328358, "grad_norm": 21.123519897460938, "learning_rate": 9.54591836734694e-06, "loss": 40.995, "step": 5731 }, { "epoch": 136.47761194029852, "grad_norm": 16.979511260986328, "learning_rate": 9.54421768707483e-06, "loss": 41.6872, "step": 5732 }, { "epoch": 136.50149253731342, "grad_norm": 20.042757034301758, "learning_rate": 9.542517006802722e-06, "loss": 40.1547, "step": 5733 }, { "epoch": 136.52537313432836, "grad_norm": 19.689138412475586, "learning_rate": 9.540816326530612e-06, "loss": 40.4422, "step": 5734 }, { "epoch": 136.54925373134327, "grad_norm": 19.830251693725586, "learning_rate": 9.539115646258504e-06, "loss": 38.6685, "step": 5735 }, { "epoch": 136.5731343283582, "grad_norm": 19.68994903564453, "learning_rate": 9.537414965986396e-06, "loss": 40.3769, "step": 5736 }, { "epoch": 136.59701492537314, "grad_norm": 19.520610809326172, "learning_rate": 9.535714285714287e-06, "loss": 39.8802, "step": 5737 }, { "epoch": 136.62089552238805, "grad_norm": 20.209075927734375, "learning_rate": 9.534013605442177e-06, "loss": 40.5337, "step": 5738 }, { "epoch": 136.644776119403, "grad_norm": 18.009183883666992, "learning_rate": 9.53231292517007e-06, "loss": 40.5237, "step": 5739 }, { "epoch": 136.6686567164179, "grad_norm": 17.618444442749023, "learning_rate": 9.53061224489796e-06, "loss": 39.4263, "step": 5740 }, { "epoch": 136.69253731343284, "grad_norm": 17.066255569458008, "learning_rate": 9.528911564625852e-06, "loss": 39.0451, "step": 5741 }, { "epoch": 136.71641791044777, "grad_norm": 16.11752700805664, "learning_rate": 9.527210884353742e-06, "loss": 40.5889, "step": 5742 }, { "epoch": 136.74029850746268, "grad_norm": 24.23548126220703, "learning_rate": 9.525510204081634e-06, "loss": 40.1915, "step": 5743 }, { "epoch": 136.76417910447762, "grad_norm": 17.77320671081543, "learning_rate": 9.523809523809525e-06, "loss": 40.4259, "step": 5744 }, { "epoch": 136.78805970149253, "grad_norm": 18.783700942993164, "learning_rate": 9.522108843537415e-06, "loss": 40.112, "step": 5745 }, { "epoch": 136.81194029850747, "grad_norm": 19.52975845336914, "learning_rate": 9.520408163265307e-06, "loss": 40.2694, "step": 5746 }, { "epoch": 136.83582089552237, "grad_norm": 22.467615127563477, "learning_rate": 9.518707482993198e-06, "loss": 41.4122, "step": 5747 }, { "epoch": 136.8597014925373, "grad_norm": 16.6851806640625, "learning_rate": 9.51700680272109e-06, "loss": 40.2696, "step": 5748 }, { "epoch": 136.88358208955225, "grad_norm": 19.272367477416992, "learning_rate": 9.51530612244898e-06, "loss": 40.4809, "step": 5749 }, { "epoch": 136.90746268656716, "grad_norm": 25.64748764038086, "learning_rate": 9.513605442176872e-06, "loss": 40.3818, "step": 5750 }, { "epoch": 136.9313432835821, "grad_norm": 17.339828491210938, "learning_rate": 9.511904761904763e-06, "loss": 40.3506, "step": 5751 }, { "epoch": 136.955223880597, "grad_norm": 23.17976188659668, "learning_rate": 9.510204081632653e-06, "loss": 39.2061, "step": 5752 }, { "epoch": 136.97910447761194, "grad_norm": 23.762033462524414, "learning_rate": 9.508503401360545e-06, "loss": 40.4341, "step": 5753 }, { "epoch": 137.0, "grad_norm": 14.065231323242188, "learning_rate": 9.506802721088436e-06, "loss": 35.9299, "step": 5754 }, { "epoch": 137.02388059701494, "grad_norm": 22.709367752075195, "learning_rate": 9.505102040816328e-06, "loss": 39.375, "step": 5755 }, { "epoch": 137.04776119402985, "grad_norm": 20.099899291992188, "learning_rate": 9.503401360544218e-06, "loss": 39.1875, "step": 5756 }, { "epoch": 137.07164179104478, "grad_norm": 16.251981735229492, "learning_rate": 9.50170068027211e-06, "loss": 39.7174, "step": 5757 }, { "epoch": 137.0955223880597, "grad_norm": 17.096813201904297, "learning_rate": 9.5e-06, "loss": 39.1641, "step": 5758 }, { "epoch": 137.11940298507463, "grad_norm": 21.969449996948242, "learning_rate": 9.498299319727891e-06, "loss": 39.6796, "step": 5759 }, { "epoch": 137.14328358208957, "grad_norm": 13.48315715789795, "learning_rate": 9.496598639455783e-06, "loss": 40.1312, "step": 5760 }, { "epoch": 137.16716417910447, "grad_norm": 15.142317771911621, "learning_rate": 9.494897959183675e-06, "loss": 39.0918, "step": 5761 }, { "epoch": 137.1910447761194, "grad_norm": 15.083260536193848, "learning_rate": 9.493197278911566e-06, "loss": 40.3378, "step": 5762 }, { "epoch": 137.21492537313432, "grad_norm": 16.5947208404541, "learning_rate": 9.491496598639458e-06, "loss": 40.0254, "step": 5763 }, { "epoch": 137.23880597014926, "grad_norm": 17.31525421142578, "learning_rate": 9.489795918367348e-06, "loss": 39.7925, "step": 5764 }, { "epoch": 137.26268656716417, "grad_norm": 13.33224105834961, "learning_rate": 9.488095238095238e-06, "loss": 39.1608, "step": 5765 }, { "epoch": 137.2865671641791, "grad_norm": 18.62505340576172, "learning_rate": 9.48639455782313e-06, "loss": 39.6955, "step": 5766 }, { "epoch": 137.31044776119404, "grad_norm": 20.526426315307617, "learning_rate": 9.484693877551021e-06, "loss": 40.8692, "step": 5767 }, { "epoch": 137.33432835820895, "grad_norm": 17.54509162902832, "learning_rate": 9.482993197278913e-06, "loss": 38.9902, "step": 5768 }, { "epoch": 137.3582089552239, "grad_norm": 13.5675048828125, "learning_rate": 9.481292517006803e-06, "loss": 40.2917, "step": 5769 }, { "epoch": 137.3820895522388, "grad_norm": 17.16435432434082, "learning_rate": 9.479591836734695e-06, "loss": 39.8777, "step": 5770 }, { "epoch": 137.40597014925373, "grad_norm": NaN, "learning_rate": 9.477891156462586e-06, "loss": 40.3914, "step": 5771 }, { "epoch": 137.42985074626867, "grad_norm": 18.361515045166016, "learning_rate": 9.477891156462586e-06, "loss": 42.1308, "step": 5772 }, { "epoch": 137.45373134328358, "grad_norm": 15.623734474182129, "learning_rate": 9.476190476190476e-06, "loss": 41.3761, "step": 5773 }, { "epoch": 137.47761194029852, "grad_norm": 16.020898818969727, "learning_rate": 9.474489795918368e-06, "loss": 40.9852, "step": 5774 }, { "epoch": 137.50149253731342, "grad_norm": 24.223079681396484, "learning_rate": 9.472789115646259e-06, "loss": 40.3601, "step": 5775 }, { "epoch": 137.52537313432836, "grad_norm": 16.226585388183594, "learning_rate": 9.471088435374151e-06, "loss": 39.25, "step": 5776 }, { "epoch": 137.54925373134327, "grad_norm": 14.546438217163086, "learning_rate": 9.469387755102041e-06, "loss": 41.5317, "step": 5777 }, { "epoch": 137.5731343283582, "grad_norm": 25.475976943969727, "learning_rate": 9.467687074829933e-06, "loss": 40.7448, "step": 5778 }, { "epoch": 137.59701492537314, "grad_norm": 21.050052642822266, "learning_rate": 9.465986394557824e-06, "loss": 39.494, "step": 5779 }, { "epoch": 137.62089552238805, "grad_norm": 14.88813591003418, "learning_rate": 9.464285714285714e-06, "loss": 40.073, "step": 5780 }, { "epoch": 137.644776119403, "grad_norm": 21.426273345947266, "learning_rate": 9.462585034013606e-06, "loss": 39.7685, "step": 5781 }, { "epoch": 137.6686567164179, "grad_norm": 21.00870704650879, "learning_rate": 9.460884353741497e-06, "loss": 39.4509, "step": 5782 }, { "epoch": 137.69253731343284, "grad_norm": 14.929703712463379, "learning_rate": 9.459183673469389e-06, "loss": 41.4336, "step": 5783 }, { "epoch": 137.71641791044777, "grad_norm": 13.802526473999023, "learning_rate": 9.457482993197281e-06, "loss": 40.6038, "step": 5784 }, { "epoch": 137.74029850746268, "grad_norm": 25.661685943603516, "learning_rate": 9.455782312925171e-06, "loss": 39.2058, "step": 5785 }, { "epoch": 137.76417910447762, "grad_norm": 17.845937728881836, "learning_rate": 9.454081632653062e-06, "loss": 41.1643, "step": 5786 }, { "epoch": 137.78805970149253, "grad_norm": 25.97015953063965, "learning_rate": 9.452380952380952e-06, "loss": 39.7219, "step": 5787 }, { "epoch": 137.81194029850747, "grad_norm": 17.875333786010742, "learning_rate": 9.450680272108844e-06, "loss": 39.7798, "step": 5788 }, { "epoch": 137.83582089552237, "grad_norm": 18.28219223022461, "learning_rate": 9.448979591836736e-06, "loss": 39.3965, "step": 5789 }, { "epoch": 137.8597014925373, "grad_norm": 19.815677642822266, "learning_rate": 9.447278911564627e-06, "loss": 40.5489, "step": 5790 }, { "epoch": 137.88358208955225, "grad_norm": 20.447330474853516, "learning_rate": 9.445578231292519e-06, "loss": 39.633, "step": 5791 }, { "epoch": 137.90746268656716, "grad_norm": 16.50349998474121, "learning_rate": 9.44387755102041e-06, "loss": 39.9416, "step": 5792 }, { "epoch": 137.9313432835821, "grad_norm": 22.41202735900879, "learning_rate": 9.4421768707483e-06, "loss": 41.0672, "step": 5793 }, { "epoch": 137.955223880597, "grad_norm": 17.746328353881836, "learning_rate": 9.440476190476192e-06, "loss": 39.8027, "step": 5794 }, { "epoch": 137.97910447761194, "grad_norm": 18.95381736755371, "learning_rate": 9.438775510204082e-06, "loss": 40.2487, "step": 5795 }, { "epoch": 138.0, "grad_norm": 14.501996994018555, "learning_rate": 9.437074829931974e-06, "loss": 35.2047, "step": 5796 }, { "epoch": 138.02388059701494, "grad_norm": 23.209070205688477, "learning_rate": 9.435374149659865e-06, "loss": 40.6912, "step": 5797 }, { "epoch": 138.04776119402985, "grad_norm": 19.782623291015625, "learning_rate": 9.433673469387757e-06, "loss": 40.41, "step": 5798 }, { "epoch": 138.07164179104478, "grad_norm": 18.50634002685547, "learning_rate": 9.431972789115647e-06, "loss": 40.1066, "step": 5799 }, { "epoch": 138.0955223880597, "grad_norm": 19.37914276123047, "learning_rate": 9.430272108843537e-06, "loss": 39.8479, "step": 5800 }, { "epoch": 138.11940298507463, "grad_norm": NaN, "learning_rate": 9.42857142857143e-06, "loss": 69.6561, "step": 5801 }, { "epoch": 138.14328358208957, "grad_norm": 20.234079360961914, "learning_rate": 9.42857142857143e-06, "loss": 40.7243, "step": 5802 }, { "epoch": 138.16716417910447, "grad_norm": 19.689483642578125, "learning_rate": 9.42687074829932e-06, "loss": 40.2885, "step": 5803 }, { "epoch": 138.1910447761194, "grad_norm": 15.311651229858398, "learning_rate": 9.425170068027212e-06, "loss": 40.0564, "step": 5804 }, { "epoch": 138.21492537313432, "grad_norm": 22.144147872924805, "learning_rate": 9.423469387755102e-06, "loss": 40.0064, "step": 5805 }, { "epoch": 138.23880597014926, "grad_norm": 19.106332778930664, "learning_rate": 9.421768707482995e-06, "loss": 38.9603, "step": 5806 }, { "epoch": 138.26268656716417, "grad_norm": 17.888164520263672, "learning_rate": 9.420068027210885e-06, "loss": 38.8051, "step": 5807 }, { "epoch": 138.2865671641791, "grad_norm": NaN, "learning_rate": 9.418367346938775e-06, "loss": 34.5894, "step": 5808 }, { "epoch": 138.31044776119404, "grad_norm": 16.295089721679688, "learning_rate": 9.418367346938775e-06, "loss": 39.8359, "step": 5809 }, { "epoch": 138.33432835820895, "grad_norm": 17.898618698120117, "learning_rate": 9.416666666666667e-06, "loss": 40.9419, "step": 5810 }, { "epoch": 138.3582089552239, "grad_norm": 15.549861907958984, "learning_rate": 9.414965986394558e-06, "loss": 38.8705, "step": 5811 }, { "epoch": 138.3820895522388, "grad_norm": 21.414033889770508, "learning_rate": 9.41326530612245e-06, "loss": 40.366, "step": 5812 }, { "epoch": 138.40597014925373, "grad_norm": 18.34477996826172, "learning_rate": 9.411564625850342e-06, "loss": 41.358, "step": 5813 }, { "epoch": 138.42985074626867, "grad_norm": 18.43037223815918, "learning_rate": 9.409863945578232e-06, "loss": 41.0623, "step": 5814 }, { "epoch": 138.45373134328358, "grad_norm": 22.278278350830078, "learning_rate": 9.408163265306123e-06, "loss": 39.3361, "step": 5815 }, { "epoch": 138.47761194029852, "grad_norm": NaN, "learning_rate": 9.406462585034015e-06, "loss": 39.6796, "step": 5816 }, { "epoch": 138.50149253731342, "grad_norm": 18.300764083862305, "learning_rate": 9.406462585034015e-06, "loss": 40.625, "step": 5817 }, { "epoch": 138.52537313432836, "grad_norm": 16.382335662841797, "learning_rate": 9.404761904761905e-06, "loss": 40.4843, "step": 5818 }, { "epoch": 138.54925373134327, "grad_norm": 20.629667282104492, "learning_rate": 9.403061224489797e-06, "loss": 39.1476, "step": 5819 }, { "epoch": 138.5731343283582, "grad_norm": 25.73557472229004, "learning_rate": 9.401360544217688e-06, "loss": 41.4348, "step": 5820 }, { "epoch": 138.59701492537314, "grad_norm": 15.648715019226074, "learning_rate": 9.39965986394558e-06, "loss": 40.4888, "step": 5821 }, { "epoch": 138.62089552238805, "grad_norm": 16.803377151489258, "learning_rate": 9.39795918367347e-06, "loss": 40.5578, "step": 5822 }, { "epoch": 138.644776119403, "grad_norm": 29.228322982788086, "learning_rate": 9.39625850340136e-06, "loss": 40.6632, "step": 5823 }, { "epoch": 138.6686567164179, "grad_norm": 15.427154541015625, "learning_rate": 9.394557823129253e-06, "loss": 39.1214, "step": 5824 }, { "epoch": 138.69253731343284, "grad_norm": 28.359830856323242, "learning_rate": 9.392857142857143e-06, "loss": 40.3437, "step": 5825 }, { "epoch": 138.71641791044777, "grad_norm": 18.356201171875, "learning_rate": 9.391156462585035e-06, "loss": 40.6458, "step": 5826 }, { "epoch": 138.74029850746268, "grad_norm": 19.94028091430664, "learning_rate": 9.389455782312926e-06, "loss": 39.7405, "step": 5827 }, { "epoch": 138.76417910447762, "grad_norm": 27.602651596069336, "learning_rate": 9.387755102040818e-06, "loss": 40.1602, "step": 5828 }, { "epoch": 138.78805970149253, "grad_norm": 16.400421142578125, "learning_rate": 9.386054421768708e-06, "loss": 38.7246, "step": 5829 }, { "epoch": 138.81194029850747, "grad_norm": 26.579187393188477, "learning_rate": 9.384353741496599e-06, "loss": 38.5539, "step": 5830 }, { "epoch": 138.83582089552237, "grad_norm": 21.284912109375, "learning_rate": 9.38265306122449e-06, "loss": 40.3989, "step": 5831 }, { "epoch": 138.8597014925373, "grad_norm": 23.772157669067383, "learning_rate": 9.380952380952381e-06, "loss": 39.4185, "step": 5832 }, { "epoch": 138.88358208955225, "grad_norm": 29.053791046142578, "learning_rate": 9.379251700680273e-06, "loss": 41.1253, "step": 5833 }, { "epoch": 138.90746268656716, "grad_norm": 16.733402252197266, "learning_rate": 9.377551020408164e-06, "loss": 40.1286, "step": 5834 }, { "epoch": 138.9313432835821, "grad_norm": 36.72946548461914, "learning_rate": 9.375850340136056e-06, "loss": 38.9963, "step": 5835 }, { "epoch": 138.955223880597, "grad_norm": 26.51390838623047, "learning_rate": 9.374149659863946e-06, "loss": 40.031, "step": 5836 }, { "epoch": 138.97910447761194, "grad_norm": 31.663070678710938, "learning_rate": 9.372448979591836e-06, "loss": 40.099, "step": 5837 }, { "epoch": 139.0, "grad_norm": 21.624252319335938, "learning_rate": 9.370748299319729e-06, "loss": 35.3932, "step": 5838 }, { "epoch": 139.02388059701494, "grad_norm": 33.74135971069336, "learning_rate": 9.36904761904762e-06, "loss": 40.1791, "step": 5839 }, { "epoch": 139.04776119402985, "grad_norm": 21.488868713378906, "learning_rate": 9.367346938775511e-06, "loss": 38.745, "step": 5840 }, { "epoch": 139.07164179104478, "grad_norm": 35.68408203125, "learning_rate": 9.365646258503403e-06, "loss": 40.4261, "step": 5841 }, { "epoch": 139.0955223880597, "grad_norm": 27.531938552856445, "learning_rate": 9.363945578231294e-06, "loss": 39.8502, "step": 5842 }, { "epoch": 139.11940298507463, "grad_norm": 41.84492874145508, "learning_rate": 9.362244897959184e-06, "loss": 40.0804, "step": 5843 }, { "epoch": 139.14328358208957, "grad_norm": 34.03583526611328, "learning_rate": 9.360544217687076e-06, "loss": 40.4201, "step": 5844 }, { "epoch": 139.16716417910447, "grad_norm": 27.924837112426758, "learning_rate": 9.358843537414966e-06, "loss": 40.006, "step": 5845 }, { "epoch": 139.1910447761194, "grad_norm": 29.39901351928711, "learning_rate": 9.357142857142859e-06, "loss": 39.8178, "step": 5846 }, { "epoch": 139.21492537313432, "grad_norm": 31.170534133911133, "learning_rate": 9.355442176870749e-06, "loss": 39.8539, "step": 5847 }, { "epoch": 139.23880597014926, "grad_norm": 24.115842819213867, "learning_rate": 9.353741496598641e-06, "loss": 39.5716, "step": 5848 }, { "epoch": 139.26268656716417, "grad_norm": 30.86311149597168, "learning_rate": 9.352040816326531e-06, "loss": 39.3648, "step": 5849 }, { "epoch": 139.2865671641791, "grad_norm": 23.781522750854492, "learning_rate": 9.350340136054422e-06, "loss": 39.8786, "step": 5850 }, { "epoch": 139.31044776119404, "grad_norm": 38.31922149658203, "learning_rate": 9.348639455782314e-06, "loss": 39.9693, "step": 5851 }, { "epoch": 139.33432835820895, "grad_norm": 26.093704223632812, "learning_rate": 9.346938775510204e-06, "loss": 41.1107, "step": 5852 }, { "epoch": 139.3582089552239, "grad_norm": 35.27671813964844, "learning_rate": 9.345238095238096e-06, "loss": 39.3532, "step": 5853 }, { "epoch": 139.3820895522388, "grad_norm": 28.302349090576172, "learning_rate": 9.343537414965987e-06, "loss": 40.0611, "step": 5854 }, { "epoch": 139.40597014925373, "grad_norm": 34.33140182495117, "learning_rate": 9.341836734693879e-06, "loss": 39.3752, "step": 5855 }, { "epoch": 139.42985074626867, "grad_norm": 30.547216415405273, "learning_rate": 9.34013605442177e-06, "loss": 41.2377, "step": 5856 }, { "epoch": 139.45373134328358, "grad_norm": 30.49541664123535, "learning_rate": 9.33843537414966e-06, "loss": 39.2959, "step": 5857 }, { "epoch": 139.47761194029852, "grad_norm": 25.64105796813965, "learning_rate": 9.336734693877552e-06, "loss": 41.0909, "step": 5858 }, { "epoch": 139.50149253731342, "grad_norm": NaN, "learning_rate": 9.335034013605442e-06, "loss": 60.6154, "step": 5859 }, { "epoch": 139.52537313432836, "grad_norm": 35.060829162597656, "learning_rate": 9.335034013605442e-06, "loss": 39.9802, "step": 5860 }, { "epoch": 139.54925373134327, "grad_norm": 27.54969024658203, "learning_rate": 9.333333333333334e-06, "loss": 39.3133, "step": 5861 }, { "epoch": 139.5731343283582, "grad_norm": 38.9669075012207, "learning_rate": 9.331632653061225e-06, "loss": 41.312, "step": 5862 }, { "epoch": 139.59701492537314, "grad_norm": 34.280067443847656, "learning_rate": 9.329931972789117e-06, "loss": 39.4449, "step": 5863 }, { "epoch": 139.62089552238805, "grad_norm": 29.73419189453125, "learning_rate": 9.328231292517007e-06, "loss": 40.2213, "step": 5864 }, { "epoch": 139.644776119403, "grad_norm": 24.6690673828125, "learning_rate": 9.326530612244898e-06, "loss": 40.6231, "step": 5865 }, { "epoch": 139.6686567164179, "grad_norm": 35.420101165771484, "learning_rate": 9.32482993197279e-06, "loss": 39.6659, "step": 5866 }, { "epoch": 139.69253731343284, "grad_norm": 32.64005661010742, "learning_rate": 9.323129251700682e-06, "loss": 39.3794, "step": 5867 }, { "epoch": 139.71641791044777, "grad_norm": 30.59541893005371, "learning_rate": 9.321428571428572e-06, "loss": 40.0863, "step": 5868 }, { "epoch": 139.74029850746268, "grad_norm": 28.199207305908203, "learning_rate": 9.319727891156464e-06, "loss": 39.7205, "step": 5869 }, { "epoch": 139.76417910447762, "grad_norm": 28.04796600341797, "learning_rate": 9.318027210884355e-06, "loss": 39.0113, "step": 5870 }, { "epoch": 139.78805970149253, "grad_norm": 24.012332916259766, "learning_rate": 9.316326530612245e-06, "loss": 40.5084, "step": 5871 }, { "epoch": 139.81194029850747, "grad_norm": 34.23363494873047, "learning_rate": 9.314625850340137e-06, "loss": 39.5083, "step": 5872 }, { "epoch": 139.83582089552237, "grad_norm": 29.06350326538086, "learning_rate": 9.312925170068028e-06, "loss": 39.3723, "step": 5873 }, { "epoch": 139.8597014925373, "grad_norm": 28.527681350708008, "learning_rate": 9.31122448979592e-06, "loss": 40.6419, "step": 5874 }, { "epoch": 139.88358208955225, "grad_norm": 29.12566375732422, "learning_rate": 9.30952380952381e-06, "loss": 40.6302, "step": 5875 }, { "epoch": 139.90746268656716, "grad_norm": 31.133377075195312, "learning_rate": 9.307823129251702e-06, "loss": 40.5931, "step": 5876 }, { "epoch": 139.9313432835821, "grad_norm": 24.83881378173828, "learning_rate": 9.306122448979593e-06, "loss": 39.411, "step": 5877 }, { "epoch": 139.955223880597, "grad_norm": 31.597652435302734, "learning_rate": 9.304421768707483e-06, "loss": 39.0108, "step": 5878 }, { "epoch": 139.97910447761194, "grad_norm": 29.325529098510742, "learning_rate": 9.302721088435375e-06, "loss": 39.4128, "step": 5879 }, { "epoch": 140.0, "grad_norm": 25.26357078552246, "learning_rate": 9.301020408163265e-06, "loss": 35.0399, "step": 5880 }, { "epoch": 140.0, "step": 5880, "total_flos": 2.890707963725509e+17, "train_loss": 2.877911633861308, "train_runtime": 12847.333, "train_samples_per_second": 58.322, "train_steps_per_second": 0.458 }, { "epoch": 140.02388059701494, "grad_norm": 27.523469924926758, "learning_rate": 1e-05, "loss": 39.3141, "step": 5881 }, { "epoch": 140.04776119402985, "grad_norm": Infinity, "learning_rate": 9.998412698412699e-06, "loss": 48.2819, "step": 5882 }, { "epoch": 140.07164179104478, "grad_norm": Infinity, "learning_rate": 9.998412698412699e-06, "loss": 48.6322, "step": 5883 }, { "epoch": 140.0955223880597, "grad_norm": 486.48309326171875, "learning_rate": 9.998412698412699e-06, "loss": 47.6507, "step": 5884 }, { "epoch": 140.11940298507463, "grad_norm": 283.4151306152344, "learning_rate": 9.996825396825399e-06, "loss": 43.9795, "step": 5885 }, { "epoch": 140.14328358208957, "grad_norm": 100.155517578125, "learning_rate": 9.995238095238095e-06, "loss": 42.0815, "step": 5886 }, { "epoch": 140.16716417910447, "grad_norm": 87.53604888916016, "learning_rate": 9.993650793650793e-06, "loss": 41.1747, "step": 5887 }, { "epoch": 140.1910447761194, "grad_norm": 57.2898063659668, "learning_rate": 9.992063492063493e-06, "loss": 40.5261, "step": 5888 }, { "epoch": 140.21492537313432, "grad_norm": 67.39340209960938, "learning_rate": 9.990476190476191e-06, "loss": 41.0557, "step": 5889 }, { "epoch": 140.23880597014926, "grad_norm": 52.16965103149414, "learning_rate": 9.98888888888889e-06, "loss": 42.013, "step": 5890 }, { "epoch": 140.26268656716417, "grad_norm": 59.99985885620117, "learning_rate": 9.987301587301588e-06, "loss": 40.2188, "step": 5891 }, { "epoch": 140.2865671641791, "grad_norm": 44.25408935546875, "learning_rate": 9.985714285714286e-06, "loss": 40.8221, "step": 5892 }, { "epoch": 140.31044776119404, "grad_norm": 55.65086364746094, "learning_rate": 9.984126984126986e-06, "loss": 40.2852, "step": 5893 }, { "epoch": 140.33432835820895, "grad_norm": 36.05537033081055, "learning_rate": 9.982539682539684e-06, "loss": 40.1435, "step": 5894 }, { "epoch": 140.3582089552239, "grad_norm": 48.20842361450195, "learning_rate": 9.980952380952382e-06, "loss": 40.599, "step": 5895 }, { "epoch": 140.3820895522388, "grad_norm": 32.273136138916016, "learning_rate": 9.97936507936508e-06, "loss": 41.1943, "step": 5896 }, { "epoch": 140.40597014925373, "grad_norm": 45.55663299560547, "learning_rate": 9.977777777777778e-06, "loss": 39.6306, "step": 5897 }, { "epoch": 140.42985074626867, "grad_norm": NaN, "learning_rate": 9.976190476190477e-06, "loss": 39.3811, "step": 5898 }, { "epoch": 140.45373134328358, "grad_norm": 45.397613525390625, "learning_rate": 9.976190476190477e-06, "loss": 41.2507, "step": 5899 }, { "epoch": 140.47761194029852, "grad_norm": 29.530902862548828, "learning_rate": 9.974603174603176e-06, "loss": 41.1053, "step": 5900 }, { "epoch": 140.50149253731342, "grad_norm": 27.80730628967285, "learning_rate": 9.973015873015875e-06, "loss": 40.5605, "step": 5901 }, { "epoch": 140.52537313432836, "grad_norm": 30.194534301757812, "learning_rate": 9.971428571428571e-06, "loss": 40.2929, "step": 5902 }, { "epoch": 140.54925373134327, "grad_norm": 25.78581428527832, "learning_rate": 9.969841269841271e-06, "loss": 40.735, "step": 5903 }, { "epoch": 140.5731343283582, "grad_norm": 33.138694763183594, "learning_rate": 9.968253968253969e-06, "loss": 40.1703, "step": 5904 }, { "epoch": 140.59701492537314, "grad_norm": 28.775943756103516, "learning_rate": 9.966666666666667e-06, "loss": 40.9193, "step": 5905 }, { "epoch": 140.62089552238805, "grad_norm": 32.31502914428711, "learning_rate": 9.965079365079365e-06, "loss": 40.8351, "step": 5906 }, { "epoch": 140.644776119403, "grad_norm": 26.532331466674805, "learning_rate": 9.963492063492064e-06, "loss": 40.9574, "step": 5907 }, { "epoch": 140.6686567164179, "grad_norm": 29.57513427734375, "learning_rate": 9.961904761904763e-06, "loss": 40.6701, "step": 5908 }, { "epoch": 140.69253731343284, "grad_norm": 24.19226837158203, "learning_rate": 9.960317460317462e-06, "loss": 40.8407, "step": 5909 }, { "epoch": 140.71641791044777, "grad_norm": 31.81574058532715, "learning_rate": 9.95873015873016e-06, "loss": 39.5199, "step": 5910 }, { "epoch": 140.74029850746268, "grad_norm": 22.69314956665039, "learning_rate": 9.957142857142858e-06, "loss": 39.5871, "step": 5911 }, { "epoch": 140.76417910447762, "grad_norm": 32.679744720458984, "learning_rate": 9.955555555555556e-06, "loss": 40.8722, "step": 5912 }, { "epoch": 140.78805970149253, "grad_norm": 29.58128547668457, "learning_rate": 9.953968253968254e-06, "loss": 40.1016, "step": 5913 }, { "epoch": 140.81194029850747, "grad_norm": 28.29144287109375, "learning_rate": 9.952380952380954e-06, "loss": 40.1457, "step": 5914 }, { "epoch": 140.83582089552237, "grad_norm": 29.59137725830078, "learning_rate": 9.950793650793652e-06, "loss": 39.9583, "step": 5915 }, { "epoch": 140.8597014925373, "grad_norm": 26.7771053314209, "learning_rate": 9.94920634920635e-06, "loss": 39.75, "step": 5916 }, { "epoch": 140.88358208955225, "grad_norm": 22.15979766845703, "learning_rate": 9.947619047619049e-06, "loss": 40.9388, "step": 5917 }, { "epoch": 140.90746268656716, "grad_norm": 27.87674903869629, "learning_rate": 9.946031746031747e-06, "loss": 39.6195, "step": 5918 }, { "epoch": 140.9313432835821, "grad_norm": 20.211345672607422, "learning_rate": 9.944444444444445e-06, "loss": 38.7449, "step": 5919 }, { "epoch": 140.955223880597, "grad_norm": 18.372730255126953, "learning_rate": 9.942857142857145e-06, "loss": 39.9378, "step": 5920 }, { "epoch": 140.97910447761194, "grad_norm": NaN, "learning_rate": 9.941269841269841e-06, "loss": 34.089, "step": 5921 }, { "epoch": 141.0, "grad_norm": 19.061168670654297, "learning_rate": 9.941269841269841e-06, "loss": 34.6002, "step": 5922 }, { "epoch": 141.02388059701494, "grad_norm": 15.835738182067871, "learning_rate": 9.939682539682541e-06, "loss": 39.6239, "step": 5923 }, { "epoch": 141.04776119402985, "grad_norm": 17.03848648071289, "learning_rate": 9.93809523809524e-06, "loss": 39.4062, "step": 5924 }, { "epoch": 141.07164179104478, "grad_norm": 20.666255950927734, "learning_rate": 9.936507936507937e-06, "loss": 39.2874, "step": 5925 }, { "epoch": 141.0955223880597, "grad_norm": 18.830081939697266, "learning_rate": 9.934920634920636e-06, "loss": 38.9847, "step": 5926 }, { "epoch": 141.11940298507463, "grad_norm": 18.74386215209961, "learning_rate": 9.933333333333334e-06, "loss": 39.0351, "step": 5927 }, { "epoch": 141.14328358208957, "grad_norm": 20.883689880371094, "learning_rate": 9.931746031746032e-06, "loss": 39.7748, "step": 5928 }, { "epoch": 141.16716417910447, "grad_norm": 20.63790512084961, "learning_rate": 9.930158730158732e-06, "loss": 40.9277, "step": 5929 }, { "epoch": 141.1910447761194, "grad_norm": 16.963199615478516, "learning_rate": 9.92857142857143e-06, "loss": 40.1546, "step": 5930 }, { "epoch": 141.21492537313432, "grad_norm": 21.610614776611328, "learning_rate": 9.926984126984128e-06, "loss": 40.1432, "step": 5931 }, { "epoch": 141.23880597014926, "grad_norm": 15.654236793518066, "learning_rate": 9.925396825396826e-06, "loss": 40.1835, "step": 5932 }, { "epoch": 141.26268656716417, "grad_norm": 22.736265182495117, "learning_rate": 9.923809523809524e-06, "loss": 40.5883, "step": 5933 }, { "epoch": 141.2865671641791, "grad_norm": 17.99093246459961, "learning_rate": 9.922222222222222e-06, "loss": 41.2193, "step": 5934 }, { "epoch": 141.31044776119404, "grad_norm": 19.520702362060547, "learning_rate": 9.920634920634922e-06, "loss": 40.4416, "step": 5935 }, { "epoch": 141.33432835820895, "grad_norm": 15.596070289611816, "learning_rate": 9.91904761904762e-06, "loss": 40.2213, "step": 5936 }, { "epoch": 141.3582089552239, "grad_norm": 20.863988876342773, "learning_rate": 9.917460317460319e-06, "loss": 40.0126, "step": 5937 }, { "epoch": 141.3820895522388, "grad_norm": NaN, "learning_rate": 9.915873015873017e-06, "loss": 59.6905, "step": 5938 }, { "epoch": 141.40597014925373, "grad_norm": 16.92725372314453, "learning_rate": 9.915873015873017e-06, "loss": 39.2636, "step": 5939 }, { "epoch": 141.42985074626867, "grad_norm": 20.527759552001953, "learning_rate": 9.914285714285715e-06, "loss": 40.1856, "step": 5940 }, { "epoch": 141.45373134328358, "grad_norm": 18.0676212310791, "learning_rate": 9.912698412698413e-06, "loss": 39.8544, "step": 5941 }, { "epoch": 141.47761194029852, "grad_norm": 18.4247989654541, "learning_rate": 9.911111111111113e-06, "loss": 39.4032, "step": 5942 }, { "epoch": 141.50149253731342, "grad_norm": 16.13834571838379, "learning_rate": 9.90952380952381e-06, "loss": 39.2789, "step": 5943 }, { "epoch": 141.52537313432836, "grad_norm": 20.949169158935547, "learning_rate": 9.90793650793651e-06, "loss": 40.7231, "step": 5944 }, { "epoch": 141.54925373134327, "grad_norm": 20.491546630859375, "learning_rate": 9.906349206349207e-06, "loss": 39.8461, "step": 5945 }, { "epoch": 141.5731343283582, "grad_norm": 15.535492897033691, "learning_rate": 9.904761904761906e-06, "loss": 40.4749, "step": 5946 }, { "epoch": 141.59701492537314, "grad_norm": 20.440784454345703, "learning_rate": 9.903174603174604e-06, "loss": 39.4721, "step": 5947 }, { "epoch": 141.62089552238805, "grad_norm": 15.877060890197754, "learning_rate": 9.901587301587302e-06, "loss": 39.6905, "step": 5948 }, { "epoch": 141.644776119403, "grad_norm": 17.53740692138672, "learning_rate": 9.9e-06, "loss": 40.3163, "step": 5949 }, { "epoch": 141.6686567164179, "grad_norm": 15.804143905639648, "learning_rate": 9.8984126984127e-06, "loss": 39.4714, "step": 5950 }, { "epoch": 141.69253731343284, "grad_norm": 16.576641082763672, "learning_rate": 9.896825396825398e-06, "loss": 39.2348, "step": 5951 }, { "epoch": 141.71641791044777, "grad_norm": 17.831815719604492, "learning_rate": 9.895238095238096e-06, "loss": 39.6573, "step": 5952 }, { "epoch": 141.74029850746268, "grad_norm": 18.616697311401367, "learning_rate": 9.893650793650794e-06, "loss": 38.9384, "step": 5953 }, { "epoch": 141.76417910447762, "grad_norm": 19.397676467895508, "learning_rate": 9.892063492063493e-06, "loss": 39.7287, "step": 5954 }, { "epoch": 141.78805970149253, "grad_norm": 16.5959415435791, "learning_rate": 9.89047619047619e-06, "loss": 40.8669, "step": 5955 }, { "epoch": 141.81194029850747, "grad_norm": 16.951446533203125, "learning_rate": 9.88888888888889e-06, "loss": 39.6911, "step": 5956 }, { "epoch": 141.83582089552237, "grad_norm": 30.49028778076172, "learning_rate": 9.887301587301587e-06, "loss": 40.0326, "step": 5957 }, { "epoch": 141.8597014925373, "grad_norm": 17.2624568939209, "learning_rate": 9.885714285714287e-06, "loss": 39.2385, "step": 5958 }, { "epoch": 141.88358208955225, "grad_norm": 31.927696228027344, "learning_rate": 9.884126984126985e-06, "loss": 40.8017, "step": 5959 }, { "epoch": 141.90746268656716, "grad_norm": 21.58512306213379, "learning_rate": 9.882539682539683e-06, "loss": 39.3581, "step": 5960 }, { "epoch": 141.9313432835821, "grad_norm": 31.855587005615234, "learning_rate": 9.880952380952381e-06, "loss": 40.7001, "step": 5961 }, { "epoch": 141.955223880597, "grad_norm": 20.689455032348633, "learning_rate": 9.87936507936508e-06, "loss": 40.1663, "step": 5962 }, { "epoch": 141.97910447761194, "grad_norm": 30.823978424072266, "learning_rate": 9.877777777777778e-06, "loss": 40.2097, "step": 5963 }, { "epoch": 142.0, "grad_norm": 21.405845642089844, "learning_rate": 9.876190476190478e-06, "loss": 34.6216, "step": 5964 }, { "epoch": 142.02388059701494, "grad_norm": 26.935768127441406, "learning_rate": 9.874603174603176e-06, "loss": 40.7151, "step": 5965 }, { "epoch": 142.04776119402985, "grad_norm": 25.882448196411133, "learning_rate": 9.873015873015874e-06, "loss": 39.7308, "step": 5966 }, { "epoch": 142.07164179104478, "grad_norm": 26.444034576416016, "learning_rate": 9.871428571428572e-06, "loss": 39.9286, "step": 5967 }, { "epoch": 142.0955223880597, "grad_norm": 31.098644256591797, "learning_rate": 9.86984126984127e-06, "loss": 39.8352, "step": 5968 }, { "epoch": 142.11940298507463, "grad_norm": 22.535404205322266, "learning_rate": 9.868253968253968e-06, "loss": 39.7821, "step": 5969 }, { "epoch": 142.14328358208957, "grad_norm": 35.592140197753906, "learning_rate": 9.866666666666668e-06, "loss": 39.095, "step": 5970 }, { "epoch": 142.16716417910447, "grad_norm": 31.111549377441406, "learning_rate": 9.865079365079366e-06, "loss": 39.7111, "step": 5971 }, { "epoch": 142.1910447761194, "grad_norm": 29.2420654296875, "learning_rate": 9.863492063492065e-06, "loss": 39.5971, "step": 5972 }, { "epoch": 142.21492537313432, "grad_norm": 24.482894897460938, "learning_rate": 9.861904761904763e-06, "loss": 40.3702, "step": 5973 }, { "epoch": 142.23880597014926, "grad_norm": 33.55892562866211, "learning_rate": 9.86031746031746e-06, "loss": 40.1212, "step": 5974 }, { "epoch": 142.26268656716417, "grad_norm": 26.067710876464844, "learning_rate": 9.858730158730159e-06, "loss": 38.6022, "step": 5975 }, { "epoch": 142.2865671641791, "grad_norm": 37.217899322509766, "learning_rate": 9.857142857142859e-06, "loss": 39.6364, "step": 5976 }, { "epoch": 142.31044776119404, "grad_norm": 28.208200454711914, "learning_rate": 9.855555555555555e-06, "loss": 39.2563, "step": 5977 }, { "epoch": 142.33432835820895, "grad_norm": 34.90814971923828, "learning_rate": 9.853968253968255e-06, "loss": 41.1621, "step": 5978 }, { "epoch": 142.3582089552239, "grad_norm": 30.80927848815918, "learning_rate": 9.852380952380953e-06, "loss": 40.3969, "step": 5979 }, { "epoch": 142.3820895522388, "grad_norm": 27.33124351501465, "learning_rate": 9.850793650793651e-06, "loss": 38.5036, "step": 5980 }, { "epoch": 142.40597014925373, "grad_norm": 26.730077743530273, "learning_rate": 9.849206349206351e-06, "loss": 40.0659, "step": 5981 }, { "epoch": 142.42985074626867, "grad_norm": 28.404932022094727, "learning_rate": 9.847619047619048e-06, "loss": 40.1738, "step": 5982 }, { "epoch": 142.45373134328358, "grad_norm": 21.64544677734375, "learning_rate": 9.846031746031746e-06, "loss": 39.3644, "step": 5983 }, { "epoch": 142.47761194029852, "grad_norm": 31.69153594970703, "learning_rate": 9.844444444444446e-06, "loss": 40.0543, "step": 5984 }, { "epoch": 142.50149253731342, "grad_norm": 24.971776962280273, "learning_rate": 9.842857142857144e-06, "loss": 40.4007, "step": 5985 }, { "epoch": 142.52537313432836, "grad_norm": 32.081085205078125, "learning_rate": 9.841269841269842e-06, "loss": 39.7853, "step": 5986 }, { "epoch": 142.54925373134327, "grad_norm": 25.004484176635742, "learning_rate": 9.83968253968254e-06, "loss": 41.2327, "step": 5987 }, { "epoch": 142.5731343283582, "grad_norm": 28.96761703491211, "learning_rate": 9.838095238095238e-06, "loss": 39.2866, "step": 5988 }, { "epoch": 142.59701492537314, "grad_norm": 24.388214111328125, "learning_rate": 9.836507936507937e-06, "loss": 39.2277, "step": 5989 }, { "epoch": 142.62089552238805, "grad_norm": 30.253482818603516, "learning_rate": 9.834920634920636e-06, "loss": 39.7101, "step": 5990 }, { "epoch": 142.644776119403, "grad_norm": 26.706071853637695, "learning_rate": 9.833333333333333e-06, "loss": 39.947, "step": 5991 }, { "epoch": 142.6686567164179, "grad_norm": 29.053794860839844, "learning_rate": 9.831746031746033e-06, "loss": 40.5273, "step": 5992 }, { "epoch": 142.69253731343284, "grad_norm": 23.27960968017578, "learning_rate": 9.830158730158731e-06, "loss": 39.6124, "step": 5993 }, { "epoch": 142.71641791044777, "grad_norm": 24.988405227661133, "learning_rate": 9.828571428571429e-06, "loss": 38.7563, "step": 5994 }, { "epoch": 142.74029850746268, "grad_norm": 22.26626205444336, "learning_rate": 9.826984126984129e-06, "loss": 39.4067, "step": 5995 }, { "epoch": 142.76417910447762, "grad_norm": 21.31068992614746, "learning_rate": 9.825396825396825e-06, "loss": 40.6265, "step": 5996 }, { "epoch": 142.78805970149253, "grad_norm": 16.70918083190918, "learning_rate": 9.823809523809524e-06, "loss": 41.1972, "step": 5997 }, { "epoch": 142.81194029850747, "grad_norm": 25.48511505126953, "learning_rate": 9.822222222222223e-06, "loss": 40.3997, "step": 5998 }, { "epoch": 142.83582089552237, "grad_norm": 16.19297218322754, "learning_rate": 9.820634920634922e-06, "loss": 39.2985, "step": 5999 }, { "epoch": 142.8597014925373, "grad_norm": 30.387081146240234, "learning_rate": 9.81904761904762e-06, "loss": 39.6372, "step": 6000 }, { "epoch": 142.88358208955225, "grad_norm": 26.224910736083984, "learning_rate": 9.817460317460318e-06, "loss": 40.123, "step": 6001 }, { "epoch": 142.90746268656716, "grad_norm": 26.33165168762207, "learning_rate": 9.815873015873016e-06, "loss": 38.9846, "step": 6002 }, { "epoch": 142.9313432835821, "grad_norm": 27.266569137573242, "learning_rate": 9.814285714285716e-06, "loss": 39.3959, "step": 6003 }, { "epoch": 142.955223880597, "grad_norm": 22.750720977783203, "learning_rate": 9.812698412698414e-06, "loss": 39.962, "step": 6004 }, { "epoch": 142.97910447761194, "grad_norm": 28.63392448425293, "learning_rate": 9.811111111111112e-06, "loss": 38.9944, "step": 6005 }, { "epoch": 143.0, "grad_norm": 18.615793228149414, "learning_rate": 9.80952380952381e-06, "loss": 35.8858, "step": 6006 }, { "epoch": 143.02388059701494, "grad_norm": 27.762741088867188, "learning_rate": 9.807936507936509e-06, "loss": 40.7509, "step": 6007 }, { "epoch": 143.04776119402985, "grad_norm": 20.943986892700195, "learning_rate": 9.806349206349207e-06, "loss": 39.5899, "step": 6008 }, { "epoch": 143.07164179104478, "grad_norm": 26.191465377807617, "learning_rate": 9.804761904761907e-06, "loss": 40.6444, "step": 6009 }, { "epoch": 143.0955223880597, "grad_norm": 19.90812110900879, "learning_rate": 9.803174603174605e-06, "loss": 40.5871, "step": 6010 }, { "epoch": 143.11940298507463, "grad_norm": 18.703001022338867, "learning_rate": 9.801587301587301e-06, "loss": 39.6964, "step": 6011 }, { "epoch": 143.14328358208957, "grad_norm": 24.919872283935547, "learning_rate": 9.800000000000001e-06, "loss": 39.6181, "step": 6012 }, { "epoch": 143.16716417910447, "grad_norm": 17.14714813232422, "learning_rate": 9.7984126984127e-06, "loss": 38.8468, "step": 6013 }, { "epoch": 143.1910447761194, "grad_norm": 17.806344985961914, "learning_rate": 9.796825396825397e-06, "loss": 37.7955, "step": 6014 }, { "epoch": 143.21492537313432, "grad_norm": 20.45462989807129, "learning_rate": 9.795238095238097e-06, "loss": 39.7501, "step": 6015 }, { "epoch": 143.23880597014926, "grad_norm": 15.431315422058105, "learning_rate": 9.793650793650794e-06, "loss": 40.5709, "step": 6016 }, { "epoch": 143.26268656716417, "grad_norm": 17.685319900512695, "learning_rate": 9.792063492063494e-06, "loss": 39.8803, "step": 6017 }, { "epoch": 143.2865671641791, "grad_norm": 17.555204391479492, "learning_rate": 9.790476190476192e-06, "loss": 40.2518, "step": 6018 }, { "epoch": 143.31044776119404, "grad_norm": 21.020915985107422, "learning_rate": 9.78888888888889e-06, "loss": 39.6208, "step": 6019 }, { "epoch": 143.33432835820895, "grad_norm": 18.242265701293945, "learning_rate": 9.787301587301588e-06, "loss": 39.7219, "step": 6020 }, { "epoch": 143.3582089552239, "grad_norm": 15.243906021118164, "learning_rate": 9.785714285714286e-06, "loss": 41.2384, "step": 6021 }, { "epoch": 143.3820895522388, "grad_norm": 17.748985290527344, "learning_rate": 9.784126984126984e-06, "loss": 40.7005, "step": 6022 }, { "epoch": 143.40597014925373, "grad_norm": 16.003299713134766, "learning_rate": 9.782539682539684e-06, "loss": 39.4961, "step": 6023 }, { "epoch": 143.42985074626867, "grad_norm": NaN, "learning_rate": 9.780952380952382e-06, "loss": 49.4703, "step": 6024 }, { "epoch": 143.45373134328358, "grad_norm": 18.312435150146484, "learning_rate": 9.780952380952382e-06, "loss": 38.8234, "step": 6025 }, { "epoch": 143.47761194029852, "grad_norm": 16.544918060302734, "learning_rate": 9.779365079365079e-06, "loss": 40.889, "step": 6026 }, { "epoch": 143.50149253731342, "grad_norm": 22.112247467041016, "learning_rate": 9.777777777777779e-06, "loss": 40.1041, "step": 6027 }, { "epoch": 143.52537313432836, "grad_norm": 17.069169998168945, "learning_rate": 9.776190476190477e-06, "loss": 39.9842, "step": 6028 }, { "epoch": 143.54925373134327, "grad_norm": 20.36438751220703, "learning_rate": 9.774603174603175e-06, "loss": 39.4701, "step": 6029 }, { "epoch": 143.5731343283582, "grad_norm": 17.346471786499023, "learning_rate": 9.773015873015875e-06, "loss": 39.0406, "step": 6030 }, { "epoch": 143.59701492537314, "grad_norm": 24.81892967224121, "learning_rate": 9.771428571428571e-06, "loss": 39.2244, "step": 6031 }, { "epoch": 143.62089552238805, "grad_norm": 16.587474822998047, "learning_rate": 9.769841269841271e-06, "loss": 40.7776, "step": 6032 }, { "epoch": 143.644776119403, "grad_norm": 26.76886749267578, "learning_rate": 9.76825396825397e-06, "loss": 39.578, "step": 6033 }, { "epoch": 143.6686567164179, "grad_norm": 23.013551712036133, "learning_rate": 9.766666666666667e-06, "loss": 40.1283, "step": 6034 }, { "epoch": 143.69253731343284, "grad_norm": 21.895034790039062, "learning_rate": 9.765079365079366e-06, "loss": 40.0862, "step": 6035 }, { "epoch": 143.71641791044777, "grad_norm": 20.68297004699707, "learning_rate": 9.763492063492064e-06, "loss": 40.8754, "step": 6036 }, { "epoch": 143.74029850746268, "grad_norm": 25.544919967651367, "learning_rate": 9.761904761904762e-06, "loss": 40.3303, "step": 6037 }, { "epoch": 143.76417910447762, "grad_norm": 18.004247665405273, "learning_rate": 9.760317460317462e-06, "loss": 40.8136, "step": 6038 }, { "epoch": 143.78805970149253, "grad_norm": 34.06446838378906, "learning_rate": 9.75873015873016e-06, "loss": 39.3411, "step": 6039 }, { "epoch": 143.81194029850747, "grad_norm": 26.361446380615234, "learning_rate": 9.757142857142858e-06, "loss": 39.6386, "step": 6040 }, { "epoch": 143.83582089552237, "grad_norm": 34.16010284423828, "learning_rate": 9.755555555555556e-06, "loss": 38.8731, "step": 6041 }, { "epoch": 143.8597014925373, "grad_norm": 25.978195190429688, "learning_rate": 9.753968253968254e-06, "loss": 38.3753, "step": 6042 }, { "epoch": 143.88358208955225, "grad_norm": 30.363014221191406, "learning_rate": 9.752380952380953e-06, "loss": 40.2135, "step": 6043 }, { "epoch": 143.90746268656716, "grad_norm": 21.55478858947754, "learning_rate": 9.750793650793652e-06, "loss": 38.6819, "step": 6044 }, { "epoch": 143.9313432835821, "grad_norm": 24.41329574584961, "learning_rate": 9.74920634920635e-06, "loss": 40.9191, "step": 6045 }, { "epoch": 143.955223880597, "grad_norm": 20.223283767700195, "learning_rate": 9.747619047619049e-06, "loss": 37.8833, "step": 6046 }, { "epoch": 143.97910447761194, "grad_norm": 18.49294662475586, "learning_rate": 9.746031746031747e-06, "loss": 39.6541, "step": 6047 }, { "epoch": 144.0, "grad_norm": 21.152721405029297, "learning_rate": 9.744444444444445e-06, "loss": 35.0013, "step": 6048 }, { "epoch": 144.02388059701494, "grad_norm": 18.513105392456055, "learning_rate": 9.742857142857143e-06, "loss": 39.0689, "step": 6049 }, { "epoch": 144.04776119402985, "grad_norm": 19.236661911010742, "learning_rate": 9.741269841269843e-06, "loss": 40.3136, "step": 6050 }, { "epoch": 144.07164179104478, "grad_norm": 20.3817138671875, "learning_rate": 9.73968253968254e-06, "loss": 39.5201, "step": 6051 }, { "epoch": 144.0955223880597, "grad_norm": 16.75079917907715, "learning_rate": 9.73809523809524e-06, "loss": 40.7713, "step": 6052 }, { "epoch": 144.11940298507463, "grad_norm": 18.25192642211914, "learning_rate": 9.736507936507938e-06, "loss": 37.5623, "step": 6053 }, { "epoch": 144.14328358208957, "grad_norm": 20.650714874267578, "learning_rate": 9.734920634920636e-06, "loss": 40.4165, "step": 6054 }, { "epoch": 144.16716417910447, "grad_norm": 17.22085952758789, "learning_rate": 9.733333333333334e-06, "loss": 39.791, "step": 6055 }, { "epoch": 144.1910447761194, "grad_norm": 17.359233856201172, "learning_rate": 9.731746031746032e-06, "loss": 40.0045, "step": 6056 }, { "epoch": 144.21492537313432, "grad_norm": 18.853940963745117, "learning_rate": 9.73015873015873e-06, "loss": 39.8853, "step": 6057 }, { "epoch": 144.23880597014926, "grad_norm": 17.358612060546875, "learning_rate": 9.72857142857143e-06, "loss": 40.4062, "step": 6058 }, { "epoch": 144.26268656716417, "grad_norm": 17.910709381103516, "learning_rate": 9.726984126984128e-06, "loss": 39.096, "step": 6059 }, { "epoch": 144.2865671641791, "grad_norm": 15.399354934692383, "learning_rate": 9.725396825396826e-06, "loss": 40.6485, "step": 6060 }, { "epoch": 144.31044776119404, "grad_norm": 17.468482971191406, "learning_rate": 9.723809523809525e-06, "loss": 40.331, "step": 6061 }, { "epoch": 144.33432835820895, "grad_norm": 13.7400541305542, "learning_rate": 9.722222222222223e-06, "loss": 38.4288, "step": 6062 }, { "epoch": 144.3582089552239, "grad_norm": 17.06818962097168, "learning_rate": 9.720634920634921e-06, "loss": 40.0401, "step": 6063 }, { "epoch": 144.3820895522388, "grad_norm": 20.590816497802734, "learning_rate": 9.71904761904762e-06, "loss": 38.3167, "step": 6064 }, { "epoch": 144.40597014925373, "grad_norm": 17.27370262145996, "learning_rate": 9.717460317460317e-06, "loss": 39.3935, "step": 6065 }, { "epoch": 144.42985074626867, "grad_norm": 18.074583053588867, "learning_rate": 9.715873015873017e-06, "loss": 40.6159, "step": 6066 }, { "epoch": 144.45373134328358, "grad_norm": 20.387073516845703, "learning_rate": 9.714285714285715e-06, "loss": 40.2223, "step": 6067 }, { "epoch": 144.47761194029852, "grad_norm": 24.666194915771484, "learning_rate": 9.712698412698413e-06, "loss": 39.9985, "step": 6068 }, { "epoch": 144.50149253731342, "grad_norm": 17.105199813842773, "learning_rate": 9.711111111111111e-06, "loss": 39.8627, "step": 6069 }, { "epoch": 144.52537313432836, "grad_norm": 21.538379669189453, "learning_rate": 9.70952380952381e-06, "loss": 38.4146, "step": 6070 }, { "epoch": 144.54925373134327, "grad_norm": 20.19131851196289, "learning_rate": 9.707936507936508e-06, "loss": 39.3941, "step": 6071 }, { "epoch": 144.5731343283582, "grad_norm": NaN, "learning_rate": 9.706349206349208e-06, "loss": 60.62, "step": 6072 }, { "epoch": 144.59701492537314, "grad_norm": 22.036714553833008, "learning_rate": 9.706349206349208e-06, "loss": 41.4121, "step": 6073 }, { "epoch": 144.62089552238805, "grad_norm": 18.917593002319336, "learning_rate": 9.704761904761906e-06, "loss": 40.0146, "step": 6074 }, { "epoch": 144.644776119403, "grad_norm": 15.137066841125488, "learning_rate": 9.703174603174604e-06, "loss": 38.8101, "step": 6075 }, { "epoch": 144.6686567164179, "grad_norm": 22.35431671142578, "learning_rate": 9.701587301587302e-06, "loss": 39.9032, "step": 6076 }, { "epoch": 144.69253731343284, "grad_norm": 16.393959045410156, "learning_rate": 9.7e-06, "loss": 39.0491, "step": 6077 }, { "epoch": 144.71641791044777, "grad_norm": 23.372652053833008, "learning_rate": 9.698412698412698e-06, "loss": 38.3159, "step": 6078 }, { "epoch": 144.74029850746268, "grad_norm": 19.953815460205078, "learning_rate": 9.696825396825398e-06, "loss": 40.3867, "step": 6079 }, { "epoch": 144.76417910447762, "grad_norm": 30.77141761779785, "learning_rate": 9.695238095238096e-06, "loss": 40.612, "step": 6080 }, { "epoch": 144.78805970149253, "grad_norm": 25.008193969726562, "learning_rate": 9.693650793650795e-06, "loss": 40.3145, "step": 6081 }, { "epoch": 144.81194029850747, "grad_norm": 24.88791275024414, "learning_rate": 9.692063492063493e-06, "loss": 40.0597, "step": 6082 }, { "epoch": 144.83582089552237, "grad_norm": 23.809860229492188, "learning_rate": 9.690476190476191e-06, "loss": 39.1451, "step": 6083 }, { "epoch": 144.8597014925373, "grad_norm": 18.984691619873047, "learning_rate": 9.688888888888889e-06, "loss": 40.2938, "step": 6084 }, { "epoch": 144.88358208955225, "grad_norm": 19.395414352416992, "learning_rate": 9.687301587301589e-06, "loss": 39.0497, "step": 6085 }, { "epoch": 144.90746268656716, "grad_norm": 22.040620803833008, "learning_rate": 9.685714285714285e-06, "loss": 40.2134, "step": 6086 }, { "epoch": 144.9313432835821, "grad_norm": 18.223390579223633, "learning_rate": 9.684126984126985e-06, "loss": 40.2474, "step": 6087 }, { "epoch": 144.955223880597, "grad_norm": 16.415504455566406, "learning_rate": 9.682539682539683e-06, "loss": 39.446, "step": 6088 }, { "epoch": 144.97910447761194, "grad_norm": 18.761838912963867, "learning_rate": 9.680952380952382e-06, "loss": 40.2324, "step": 6089 }, { "epoch": 145.0, "grad_norm": 14.694183349609375, "learning_rate": 9.679365079365081e-06, "loss": 33.4446, "step": 6090 }, { "epoch": 145.02388059701494, "grad_norm": 15.788558959960938, "learning_rate": 9.677777777777778e-06, "loss": 39.7514, "step": 6091 }, { "epoch": 145.04776119402985, "grad_norm": 19.04860496520996, "learning_rate": 9.676190476190476e-06, "loss": 39.7695, "step": 6092 }, { "epoch": 145.07164179104478, "grad_norm": 18.22698974609375, "learning_rate": 9.674603174603176e-06, "loss": 38.9197, "step": 6093 }, { "epoch": 145.0955223880597, "grad_norm": 15.227489471435547, "learning_rate": 9.673015873015874e-06, "loss": 39.3056, "step": 6094 }, { "epoch": 145.11940298507463, "grad_norm": 16.864139556884766, "learning_rate": 9.671428571428572e-06, "loss": 39.1712, "step": 6095 }, { "epoch": 145.14328358208957, "grad_norm": 19.5656795501709, "learning_rate": 9.66984126984127e-06, "loss": 40.002, "step": 6096 }, { "epoch": 145.16716417910447, "grad_norm": 15.475809097290039, "learning_rate": 9.668253968253969e-06, "loss": 38.7754, "step": 6097 }, { "epoch": 145.1910447761194, "grad_norm": 21.379589080810547, "learning_rate": 9.666666666666667e-06, "loss": 39.2871, "step": 6098 }, { "epoch": 145.21492537313432, "grad_norm": 15.879408836364746, "learning_rate": 9.665079365079367e-06, "loss": 39.6373, "step": 6099 }, { "epoch": 145.23880597014926, "grad_norm": 20.632753372192383, "learning_rate": 9.663492063492065e-06, "loss": 38.4285, "step": 6100 }, { "epoch": 145.26268656716417, "grad_norm": 21.47446632385254, "learning_rate": 9.661904761904763e-06, "loss": 40.8862, "step": 6101 }, { "epoch": 145.2865671641791, "grad_norm": 17.442068099975586, "learning_rate": 9.660317460317461e-06, "loss": 38.6985, "step": 6102 }, { "epoch": 145.31044776119404, "grad_norm": 19.083768844604492, "learning_rate": 9.65873015873016e-06, "loss": 40.7921, "step": 6103 }, { "epoch": 145.33432835820895, "grad_norm": 18.121421813964844, "learning_rate": 9.657142857142859e-06, "loss": 37.8805, "step": 6104 }, { "epoch": 145.3582089552239, "grad_norm": 17.6599063873291, "learning_rate": 9.655555555555556e-06, "loss": 40.5698, "step": 6105 }, { "epoch": 145.3820895522388, "grad_norm": 19.329008102416992, "learning_rate": 9.653968253968254e-06, "loss": 40.4532, "step": 6106 }, { "epoch": 145.40597014925373, "grad_norm": 16.720779418945312, "learning_rate": 9.652380952380954e-06, "loss": 40.1488, "step": 6107 }, { "epoch": 145.42985074626867, "grad_norm": 21.048095703125, "learning_rate": 9.650793650793652e-06, "loss": 39.7219, "step": 6108 }, { "epoch": 145.45373134328358, "grad_norm": 22.04213523864746, "learning_rate": 9.64920634920635e-06, "loss": 39.7511, "step": 6109 }, { "epoch": 145.47761194029852, "grad_norm": 16.699317932128906, "learning_rate": 9.647619047619048e-06, "loss": 40.4039, "step": 6110 }, { "epoch": 145.50149253731342, "grad_norm": 20.744373321533203, "learning_rate": 9.646031746031746e-06, "loss": 39.7408, "step": 6111 }, { "epoch": 145.52537313432836, "grad_norm": 22.63360595703125, "learning_rate": 9.644444444444444e-06, "loss": 39.3722, "step": 6112 }, { "epoch": 145.54925373134327, "grad_norm": 19.45470428466797, "learning_rate": 9.642857142857144e-06, "loss": 38.0393, "step": 6113 }, { "epoch": 145.5731343283582, "grad_norm": 22.894981384277344, "learning_rate": 9.641269841269842e-06, "loss": 40.2133, "step": 6114 }, { "epoch": 145.59701492537314, "grad_norm": 24.637012481689453, "learning_rate": 9.63968253968254e-06, "loss": 40.3819, "step": 6115 }, { "epoch": 145.62089552238805, "grad_norm": 18.199817657470703, "learning_rate": 9.638095238095239e-06, "loss": 39.591, "step": 6116 }, { "epoch": 145.644776119403, "grad_norm": 26.813262939453125, "learning_rate": 9.636507936507937e-06, "loss": 40.4985, "step": 6117 }, { "epoch": 145.6686567164179, "grad_norm": 20.786285400390625, "learning_rate": 9.634920634920637e-06, "loss": 40.7269, "step": 6118 }, { "epoch": 145.69253731343284, "grad_norm": 23.935718536376953, "learning_rate": 9.633333333333335e-06, "loss": 39.3393, "step": 6119 }, { "epoch": 145.71641791044777, "grad_norm": 23.803821563720703, "learning_rate": 9.631746031746031e-06, "loss": 38.7576, "step": 6120 }, { "epoch": 145.74029850746268, "grad_norm": 20.230010986328125, "learning_rate": 9.630158730158731e-06, "loss": 39.0025, "step": 6121 }, { "epoch": 145.76417910447762, "grad_norm": 26.59383773803711, "learning_rate": 9.62857142857143e-06, "loss": 38.8942, "step": 6122 }, { "epoch": 145.78805970149253, "grad_norm": 26.656192779541016, "learning_rate": 9.626984126984127e-06, "loss": 40.5026, "step": 6123 }, { "epoch": 145.81194029850747, "grad_norm": 17.649873733520508, "learning_rate": 9.625396825396827e-06, "loss": 41.5825, "step": 6124 }, { "epoch": 145.83582089552237, "grad_norm": 25.234466552734375, "learning_rate": 9.623809523809524e-06, "loss": 38.4286, "step": 6125 }, { "epoch": 145.8597014925373, "grad_norm": 21.319074630737305, "learning_rate": 9.622222222222222e-06, "loss": 38.8864, "step": 6126 }, { "epoch": 145.88358208955225, "grad_norm": 18.06063461303711, "learning_rate": 9.620634920634922e-06, "loss": 39.6841, "step": 6127 }, { "epoch": 145.90746268656716, "grad_norm": 30.18861961364746, "learning_rate": 9.61904761904762e-06, "loss": 39.9368, "step": 6128 }, { "epoch": 145.9313432835821, "grad_norm": 17.992721557617188, "learning_rate": 9.617460317460318e-06, "loss": 40.3737, "step": 6129 }, { "epoch": 145.955223880597, "grad_norm": 23.077625274658203, "learning_rate": 9.615873015873016e-06, "loss": 40.5656, "step": 6130 }, { "epoch": 145.97910447761194, "grad_norm": 23.092899322509766, "learning_rate": 9.614285714285714e-06, "loss": 40.3125, "step": 6131 }, { "epoch": 146.0, "grad_norm": 15.744735717773438, "learning_rate": 9.612698412698414e-06, "loss": 34.232, "step": 6132 }, { "epoch": 146.02388059701494, "grad_norm": 26.01073455810547, "learning_rate": 9.611111111111112e-06, "loss": 40.3078, "step": 6133 }, { "epoch": 146.04776119402985, "grad_norm": 19.39035987854004, "learning_rate": 9.60952380952381e-06, "loss": 40.5784, "step": 6134 }, { "epoch": 146.07164179104478, "grad_norm": 15.700801849365234, "learning_rate": 9.607936507936509e-06, "loss": 39.5239, "step": 6135 }, { "epoch": 146.0955223880597, "grad_norm": 19.383134841918945, "learning_rate": 9.606349206349207e-06, "loss": 38.7188, "step": 6136 }, { "epoch": 146.11940298507463, "grad_norm": 16.338594436645508, "learning_rate": 9.604761904761905e-06, "loss": 37.9652, "step": 6137 }, { "epoch": 146.14328358208957, "grad_norm": 16.093175888061523, "learning_rate": 9.603174603174605e-06, "loss": 39.2513, "step": 6138 }, { "epoch": 146.16716417910447, "grad_norm": 17.29031753540039, "learning_rate": 9.601587301587303e-06, "loss": 40.0027, "step": 6139 }, { "epoch": 146.1910447761194, "grad_norm": 19.29131317138672, "learning_rate": 9.600000000000001e-06, "loss": 40.0402, "step": 6140 }, { "epoch": 146.21492537313432, "grad_norm": 14.723686218261719, "learning_rate": 9.5984126984127e-06, "loss": 39.6575, "step": 6141 }, { "epoch": 146.23880597014926, "grad_norm": 13.284400939941406, "learning_rate": 9.596825396825398e-06, "loss": 38.2423, "step": 6142 }, { "epoch": 146.26268656716417, "grad_norm": 14.448948860168457, "learning_rate": 9.595238095238096e-06, "loss": 39.5564, "step": 6143 }, { "epoch": 146.2865671641791, "grad_norm": 15.912578582763672, "learning_rate": 9.593650793650794e-06, "loss": 41.1268, "step": 6144 }, { "epoch": 146.31044776119404, "grad_norm": 13.982776641845703, "learning_rate": 9.592063492063492e-06, "loss": 38.4374, "step": 6145 }, { "epoch": 146.33432835820895, "grad_norm": 18.495389938354492, "learning_rate": 9.590476190476192e-06, "loss": 39.379, "step": 6146 }, { "epoch": 146.3582089552239, "grad_norm": 27.54531478881836, "learning_rate": 9.58888888888889e-06, "loss": 40.0996, "step": 6147 }, { "epoch": 146.3820895522388, "grad_norm": 16.7718563079834, "learning_rate": 9.587301587301588e-06, "loss": 40.0521, "step": 6148 }, { "epoch": 146.40597014925373, "grad_norm": 13.470280647277832, "learning_rate": 9.585714285714286e-06, "loss": 39.616, "step": 6149 }, { "epoch": 146.42985074626867, "grad_norm": 18.987812042236328, "learning_rate": 9.584126984126985e-06, "loss": 40.2916, "step": 6150 }, { "epoch": 146.45373134328358, "grad_norm": 17.242666244506836, "learning_rate": 9.582539682539683e-06, "loss": 39.5103, "step": 6151 }, { "epoch": 146.47761194029852, "grad_norm": 19.262651443481445, "learning_rate": 9.580952380952383e-06, "loss": 39.479, "step": 6152 }, { "epoch": 146.50149253731342, "grad_norm": 19.4732608795166, "learning_rate": 9.57936507936508e-06, "loss": 40.6662, "step": 6153 }, { "epoch": 146.52537313432836, "grad_norm": 18.1159610748291, "learning_rate": 9.577777777777779e-06, "loss": 40.4998, "step": 6154 }, { "epoch": 146.54925373134327, "grad_norm": 16.363819122314453, "learning_rate": 9.576190476190477e-06, "loss": 36.286, "step": 6155 }, { "epoch": 146.5731343283582, "grad_norm": NaN, "learning_rate": 9.574603174603175e-06, "loss": 33.9266, "step": 6156 }, { "epoch": 146.59701492537314, "grad_norm": 17.450937271118164, "learning_rate": 9.574603174603175e-06, "loss": 39.6215, "step": 6157 }, { "epoch": 146.62089552238805, "grad_norm": 20.69955062866211, "learning_rate": 9.573015873015873e-06, "loss": 39.0897, "step": 6158 }, { "epoch": 146.644776119403, "grad_norm": 26.637802124023438, "learning_rate": 9.571428571428573e-06, "loss": 40.1247, "step": 6159 }, { "epoch": 146.6686567164179, "grad_norm": 16.69516372680664, "learning_rate": 9.56984126984127e-06, "loss": 40.1396, "step": 6160 }, { "epoch": 146.69253731343284, "grad_norm": 24.776458740234375, "learning_rate": 9.56825396825397e-06, "loss": 40.1209, "step": 6161 }, { "epoch": 146.71641791044777, "grad_norm": 22.97787094116211, "learning_rate": 9.566666666666668e-06, "loss": 40.8353, "step": 6162 }, { "epoch": 146.74029850746268, "grad_norm": 17.57745361328125, "learning_rate": 9.565079365079366e-06, "loss": 39.4058, "step": 6163 }, { "epoch": 146.76417910447762, "grad_norm": 32.396968841552734, "learning_rate": 9.563492063492064e-06, "loss": 39.9972, "step": 6164 }, { "epoch": 146.78805970149253, "grad_norm": 20.528043746948242, "learning_rate": 9.561904761904762e-06, "loss": 39.2347, "step": 6165 }, { "epoch": 146.81194029850747, "grad_norm": 35.68081283569336, "learning_rate": 9.56031746031746e-06, "loss": 39.7637, "step": 6166 }, { "epoch": 146.83582089552237, "grad_norm": 22.645538330078125, "learning_rate": 9.55873015873016e-06, "loss": 39.389, "step": 6167 }, { "epoch": 146.8597014925373, "grad_norm": 40.081722259521484, "learning_rate": 9.557142857142858e-06, "loss": 39.2152, "step": 6168 }, { "epoch": 146.88358208955225, "grad_norm": 30.616613388061523, "learning_rate": 9.555555555555556e-06, "loss": 41.0145, "step": 6169 }, { "epoch": 146.90746268656716, "grad_norm": 40.12171173095703, "learning_rate": 9.553968253968255e-06, "loss": 38.9865, "step": 6170 }, { "epoch": 146.9313432835821, "grad_norm": 35.75667953491211, "learning_rate": 9.552380952380953e-06, "loss": 39.5375, "step": 6171 }, { "epoch": 146.955223880597, "grad_norm": 33.94740676879883, "learning_rate": 9.550793650793651e-06, "loss": 40.1784, "step": 6172 }, { "epoch": 146.97910447761194, "grad_norm": 30.814422607421875, "learning_rate": 9.54920634920635e-06, "loss": 39.5753, "step": 6173 }, { "epoch": 147.0, "grad_norm": 30.17840003967285, "learning_rate": 9.547619047619049e-06, "loss": 35.3718, "step": 6174 }, { "epoch": 147.02388059701494, "grad_norm": 28.895395278930664, "learning_rate": 9.546031746031747e-06, "loss": 38.3438, "step": 6175 }, { "epoch": 147.04776119402985, "grad_norm": 35.01318359375, "learning_rate": 9.544444444444445e-06, "loss": 40.1541, "step": 6176 }, { "epoch": 147.07164179104478, "grad_norm": 26.909517288208008, "learning_rate": 9.542857142857143e-06, "loss": 40.7689, "step": 6177 }, { "epoch": 147.0955223880597, "grad_norm": 34.66661071777344, "learning_rate": 9.541269841269842e-06, "loss": 39.4185, "step": 6178 }, { "epoch": 147.11940298507463, "grad_norm": 26.95038414001465, "learning_rate": 9.539682539682541e-06, "loss": 39.2899, "step": 6179 }, { "epoch": 147.14328358208957, "grad_norm": 35.695613861083984, "learning_rate": 9.538095238095238e-06, "loss": 38.2958, "step": 6180 }, { "epoch": 147.16716417910447, "grad_norm": 33.42219543457031, "learning_rate": 9.536507936507938e-06, "loss": 38.6355, "step": 6181 }, { "epoch": 147.1910447761194, "grad_norm": 32.482948303222656, "learning_rate": 9.534920634920636e-06, "loss": 39.308, "step": 6182 }, { "epoch": 147.21492537313432, "grad_norm": 30.571081161499023, "learning_rate": 9.533333333333334e-06, "loss": 39.6363, "step": 6183 }, { "epoch": 147.23880597014926, "grad_norm": 32.807952880859375, "learning_rate": 9.531746031746032e-06, "loss": 40.0385, "step": 6184 }, { "epoch": 147.26268656716417, "grad_norm": 32.24506378173828, "learning_rate": 9.53015873015873e-06, "loss": 39.7579, "step": 6185 }, { "epoch": 147.2865671641791, "grad_norm": 30.932525634765625, "learning_rate": 9.528571428571429e-06, "loss": 39.03, "step": 6186 }, { "epoch": 147.31044776119404, "grad_norm": 25.31529998779297, "learning_rate": 9.526984126984128e-06, "loss": 39.6358, "step": 6187 }, { "epoch": 147.33432835820895, "grad_norm": 36.5391960144043, "learning_rate": 9.525396825396827e-06, "loss": 39.6683, "step": 6188 }, { "epoch": 147.3582089552239, "grad_norm": 31.384052276611328, "learning_rate": 9.523809523809525e-06, "loss": 40.0828, "step": 6189 }, { "epoch": 147.3820895522388, "grad_norm": 34.303096771240234, "learning_rate": 9.522222222222223e-06, "loss": 39.218, "step": 6190 }, { "epoch": 147.40597014925373, "grad_norm": 33.030216217041016, "learning_rate": 9.520634920634921e-06, "loss": 40.3229, "step": 6191 }, { "epoch": 147.42985074626867, "grad_norm": 26.773529052734375, "learning_rate": 9.51904761904762e-06, "loss": 39.3039, "step": 6192 }, { "epoch": 147.45373134328358, "grad_norm": 22.8935546875, "learning_rate": 9.517460317460319e-06, "loss": 39.6118, "step": 6193 }, { "epoch": 147.47761194029852, "grad_norm": 36.28474044799805, "learning_rate": 9.515873015873016e-06, "loss": 39.2833, "step": 6194 }, { "epoch": 147.50149253731342, "grad_norm": 30.737974166870117, "learning_rate": 9.514285714285715e-06, "loss": 38.1781, "step": 6195 }, { "epoch": 147.52537313432836, "grad_norm": 35.12816619873047, "learning_rate": 9.512698412698414e-06, "loss": 38.8607, "step": 6196 }, { "epoch": 147.54925373134327, "grad_norm": NaN, "learning_rate": 9.511111111111112e-06, "loss": 60.3704, "step": 6197 }, { "epoch": 147.5731343283582, "grad_norm": 32.037559509277344, "learning_rate": 9.511111111111112e-06, "loss": 39.4876, "step": 6198 }, { "epoch": 147.59701492537314, "grad_norm": 26.51078987121582, "learning_rate": 9.50952380952381e-06, "loss": 38.7221, "step": 6199 }, { "epoch": 147.62089552238805, "grad_norm": 23.575544357299805, "learning_rate": 9.507936507936508e-06, "loss": 39.454, "step": 6200 }, { "epoch": 147.644776119403, "grad_norm": 31.265623092651367, "learning_rate": 9.506349206349206e-06, "loss": 39.9971, "step": 6201 }, { "epoch": 147.6686567164179, "grad_norm": 26.96292495727539, "learning_rate": 9.504761904761906e-06, "loss": 40.717, "step": 6202 }, { "epoch": 147.69253731343284, "grad_norm": 35.76007843017578, "learning_rate": 9.503174603174604e-06, "loss": 40.3732, "step": 6203 }, { "epoch": 147.71641791044777, "grad_norm": 32.061424255371094, "learning_rate": 9.501587301587302e-06, "loss": 40.2162, "step": 6204 }, { "epoch": 147.74029850746268, "grad_norm": 30.06816291809082, "learning_rate": 9.5e-06, "loss": 40.1051, "step": 6205 }, { "epoch": 147.76417910447762, "grad_norm": 26.645023345947266, "learning_rate": 9.498412698412699e-06, "loss": 39.4436, "step": 6206 }, { "epoch": 147.78805970149253, "grad_norm": 31.481412887573242, "learning_rate": 9.496825396825397e-06, "loss": 38.5907, "step": 6207 }, { "epoch": 147.81194029850747, "grad_norm": 25.024534225463867, "learning_rate": 9.495238095238097e-06, "loss": 39.9247, "step": 6208 }, { "epoch": 147.83582089552237, "grad_norm": 37.80125427246094, "learning_rate": 9.493650793650795e-06, "loss": 40.4303, "step": 6209 }, { "epoch": 147.8597014925373, "grad_norm": 30.088382720947266, "learning_rate": 9.492063492063493e-06, "loss": 38.2834, "step": 6210 }, { "epoch": 147.88358208955225, "grad_norm": 33.478736877441406, "learning_rate": 9.490476190476191e-06, "loss": 40.277, "step": 6211 }, { "epoch": 147.90746268656716, "grad_norm": 30.780107498168945, "learning_rate": 9.48888888888889e-06, "loss": 39.0267, "step": 6212 }, { "epoch": 147.9313432835821, "grad_norm": 26.820985794067383, "learning_rate": 9.48730158730159e-06, "loss": 39.6983, "step": 6213 }, { "epoch": 147.955223880597, "grad_norm": 28.82769775390625, "learning_rate": 9.485714285714287e-06, "loss": 39.2138, "step": 6214 }, { "epoch": 147.97910447761194, "grad_norm": 31.736270904541016, "learning_rate": 9.484126984126984e-06, "loss": 40.4601, "step": 6215 }, { "epoch": 148.0, "grad_norm": 21.348119735717773, "learning_rate": 9.482539682539684e-06, "loss": 34.7459, "step": 6216 }, { "epoch": 148.02388059701494, "grad_norm": 28.806316375732422, "learning_rate": 9.480952380952382e-06, "loss": 37.9885, "step": 6217 }, { "epoch": 148.04776119402985, "grad_norm": 28.255447387695312, "learning_rate": 9.47936507936508e-06, "loss": 40.8785, "step": 6218 }, { "epoch": 148.07164179104478, "grad_norm": 30.397302627563477, "learning_rate": 9.47777777777778e-06, "loss": 38.6547, "step": 6219 }, { "epoch": 148.0955223880597, "grad_norm": 27.137815475463867, "learning_rate": 9.476190476190476e-06, "loss": 40.4444, "step": 6220 }, { "epoch": 148.11940298507463, "grad_norm": 31.6801815032959, "learning_rate": 9.474603174603174e-06, "loss": 39.4132, "step": 6221 }, { "epoch": 148.14328358208957, "grad_norm": 30.153980255126953, "learning_rate": 9.473015873015874e-06, "loss": 39.8864, "step": 6222 }, { "epoch": 148.16716417910447, "grad_norm": 30.102392196655273, "learning_rate": 9.471428571428572e-06, "loss": 39.0032, "step": 6223 }, { "epoch": 148.1910447761194, "grad_norm": 28.591657638549805, "learning_rate": 9.46984126984127e-06, "loss": 40.2621, "step": 6224 }, { "epoch": 148.21492537313432, "grad_norm": 31.42799949645996, "learning_rate": 9.468253968253969e-06, "loss": 39.3243, "step": 6225 }, { "epoch": 148.23880597014926, "grad_norm": 27.41240692138672, "learning_rate": 9.466666666666667e-06, "loss": 40.58, "step": 6226 }, { "epoch": 148.26268656716417, "grad_norm": 30.882951736450195, "learning_rate": 9.465079365079367e-06, "loss": 40.3588, "step": 6227 }, { "epoch": 148.2865671641791, "grad_norm": 27.335161209106445, "learning_rate": 9.463492063492065e-06, "loss": 39.879, "step": 6228 }, { "epoch": 148.31044776119404, "grad_norm": 30.169204711914062, "learning_rate": 9.461904761904761e-06, "loss": 39.3827, "step": 6229 }, { "epoch": 148.33432835820895, "grad_norm": 27.254920959472656, "learning_rate": 9.460317460317461e-06, "loss": 40.1883, "step": 6230 }, { "epoch": 148.3582089552239, "grad_norm": 26.484397888183594, "learning_rate": 9.45873015873016e-06, "loss": 38.9431, "step": 6231 }, { "epoch": 148.3820895522388, "grad_norm": 23.49915313720703, "learning_rate": 9.457142857142858e-06, "loss": 39.6657, "step": 6232 }, { "epoch": 148.40597014925373, "grad_norm": 31.44021224975586, "learning_rate": 9.455555555555557e-06, "loss": 40.3813, "step": 6233 }, { "epoch": 148.42985074626867, "grad_norm": 24.419384002685547, "learning_rate": 9.453968253968254e-06, "loss": 40.4738, "step": 6234 }, { "epoch": 148.45373134328358, "grad_norm": 33.38460922241211, "learning_rate": 9.452380952380952e-06, "loss": 40.3119, "step": 6235 }, { "epoch": 148.47761194029852, "grad_norm": 27.34727668762207, "learning_rate": 9.450793650793652e-06, "loss": 39.2309, "step": 6236 }, { "epoch": 148.50149253731342, "grad_norm": 32.275970458984375, "learning_rate": 9.44920634920635e-06, "loss": 38.9233, "step": 6237 }, { "epoch": 148.52537313432836, "grad_norm": 29.967649459838867, "learning_rate": 9.447619047619048e-06, "loss": 39.0778, "step": 6238 }, { "epoch": 148.54925373134327, "grad_norm": 27.539777755737305, "learning_rate": 9.446031746031746e-06, "loss": 38.6598, "step": 6239 }, { "epoch": 148.5731343283582, "grad_norm": 23.671335220336914, "learning_rate": 9.444444444444445e-06, "loss": 39.1927, "step": 6240 }, { "epoch": 148.59701492537314, "grad_norm": 33.137210845947266, "learning_rate": 9.442857142857144e-06, "loss": 38.3873, "step": 6241 }, { "epoch": 148.62089552238805, "grad_norm": 27.124778747558594, "learning_rate": 9.441269841269843e-06, "loss": 39.8926, "step": 6242 }, { "epoch": 148.644776119403, "grad_norm": 27.500160217285156, "learning_rate": 9.43968253968254e-06, "loss": 41.0731, "step": 6243 }, { "epoch": 148.6686567164179, "grad_norm": 20.350675582885742, "learning_rate": 9.438095238095239e-06, "loss": 38.183, "step": 6244 }, { "epoch": 148.69253731343284, "grad_norm": 27.957395553588867, "learning_rate": 9.436507936507937e-06, "loss": 39.4262, "step": 6245 }, { "epoch": 148.71641791044777, "grad_norm": 23.82541275024414, "learning_rate": 9.434920634920635e-06, "loss": 39.3012, "step": 6246 }, { "epoch": 148.74029850746268, "grad_norm": 34.506385803222656, "learning_rate": 9.433333333333335e-06, "loss": 38.9767, "step": 6247 }, { "epoch": 148.76417910447762, "grad_norm": 29.38323974609375, "learning_rate": 9.431746031746033e-06, "loss": 39.8884, "step": 6248 }, { "epoch": 148.78805970149253, "grad_norm": 28.864707946777344, "learning_rate": 9.43015873015873e-06, "loss": 38.4068, "step": 6249 }, { "epoch": 148.81194029850747, "grad_norm": 26.762096405029297, "learning_rate": 9.42857142857143e-06, "loss": 40.1152, "step": 6250 }, { "epoch": 148.83582089552237, "grad_norm": 24.719018936157227, "learning_rate": 9.426984126984128e-06, "loss": 38.8501, "step": 6251 }, { "epoch": 148.8597014925373, "grad_norm": 20.85268211364746, "learning_rate": 9.425396825396826e-06, "loss": 39.1373, "step": 6252 }, { "epoch": 148.88358208955225, "grad_norm": 24.713544845581055, "learning_rate": 9.423809523809526e-06, "loss": 38.214, "step": 6253 }, { "epoch": 148.90746268656716, "grad_norm": 19.68970489501953, "learning_rate": 9.422222222222222e-06, "loss": 38.4203, "step": 6254 }, { "epoch": 148.9313432835821, "grad_norm": 28.356327056884766, "learning_rate": 9.420634920634922e-06, "loss": 40.5012, "step": 6255 }, { "epoch": 148.955223880597, "grad_norm": 24.059450149536133, "learning_rate": 9.41904761904762e-06, "loss": 40.1857, "step": 6256 }, { "epoch": 148.97910447761194, "grad_norm": 24.31195640563965, "learning_rate": 9.417460317460318e-06, "loss": 38.768, "step": 6257 }, { "epoch": 149.0, "grad_norm": 21.859241485595703, "learning_rate": 9.415873015873017e-06, "loss": 36.3293, "step": 6258 }, { "epoch": 149.02388059701494, "grad_norm": 21.140018463134766, "learning_rate": 9.414285714285715e-06, "loss": 39.1804, "step": 6259 }, { "epoch": 149.04776119402985, "grad_norm": 19.773056030273438, "learning_rate": 9.412698412698413e-06, "loss": 38.3969, "step": 6260 }, { "epoch": 149.07164179104478, "grad_norm": 16.957059860229492, "learning_rate": 9.411111111111113e-06, "loss": 40.5906, "step": 6261 }, { "epoch": 149.0955223880597, "grad_norm": 22.389060974121094, "learning_rate": 9.40952380952381e-06, "loss": 39.2568, "step": 6262 }, { "epoch": 149.11940298507463, "grad_norm": 19.9559326171875, "learning_rate": 9.407936507936509e-06, "loss": 40.3229, "step": 6263 }, { "epoch": 149.14328358208957, "grad_norm": 15.990846633911133, "learning_rate": 9.406349206349207e-06, "loss": 39.7053, "step": 6264 }, { "epoch": 149.16716417910447, "grad_norm": 20.258081436157227, "learning_rate": 9.404761904761905e-06, "loss": 37.9371, "step": 6265 }, { "epoch": 149.1910447761194, "grad_norm": 17.81937599182129, "learning_rate": 9.403174603174603e-06, "loss": 39.1156, "step": 6266 }, { "epoch": 149.21492537313432, "grad_norm": 17.503934860229492, "learning_rate": 9.401587301587303e-06, "loss": 38.8225, "step": 6267 }, { "epoch": 149.23880597014926, "grad_norm": 24.96233558654785, "learning_rate": 9.4e-06, "loss": 39.4187, "step": 6268 }, { "epoch": 149.26268656716417, "grad_norm": 16.594804763793945, "learning_rate": 9.3984126984127e-06, "loss": 38.9482, "step": 6269 }, { "epoch": 149.2865671641791, "grad_norm": 23.703876495361328, "learning_rate": 9.396825396825398e-06, "loss": 38.9453, "step": 6270 }, { "epoch": 149.31044776119404, "grad_norm": 19.64229393005371, "learning_rate": 9.395238095238096e-06, "loss": 39.8369, "step": 6271 }, { "epoch": 149.33432835820895, "grad_norm": 24.94515609741211, "learning_rate": 9.393650793650794e-06, "loss": 39.2128, "step": 6272 }, { "epoch": 149.3582089552239, "grad_norm": 20.51252555847168, "learning_rate": 9.392063492063492e-06, "loss": 39.5531, "step": 6273 }, { "epoch": 149.3820895522388, "grad_norm": 22.185834884643555, "learning_rate": 9.39047619047619e-06, "loss": 41.1707, "step": 6274 }, { "epoch": 149.40597014925373, "grad_norm": 20.402530670166016, "learning_rate": 9.38888888888889e-06, "loss": 38.2911, "step": 6275 }, { "epoch": 149.42985074626867, "grad_norm": 21.563535690307617, "learning_rate": 9.387301587301588e-06, "loss": 38.7527, "step": 6276 }, { "epoch": 149.45373134328358, "grad_norm": 20.71843719482422, "learning_rate": 9.385714285714287e-06, "loss": 39.2306, "step": 6277 }, { "epoch": 149.47761194029852, "grad_norm": 22.216678619384766, "learning_rate": 9.384126984126985e-06, "loss": 39.1267, "step": 6278 }, { "epoch": 149.50149253731342, "grad_norm": 17.682825088500977, "learning_rate": 9.382539682539683e-06, "loss": 40.6194, "step": 6279 }, { "epoch": 149.52537313432836, "grad_norm": 22.47300910949707, "learning_rate": 9.380952380952381e-06, "loss": 38.7946, "step": 6280 }, { "epoch": 149.54925373134327, "grad_norm": 16.163511276245117, "learning_rate": 9.379365079365081e-06, "loss": 40.6577, "step": 6281 }, { "epoch": 149.5731343283582, "grad_norm": 25.112506866455078, "learning_rate": 9.377777777777779e-06, "loss": 38.9654, "step": 6282 }, { "epoch": 149.59701492537314, "grad_norm": 24.64344596862793, "learning_rate": 9.376190476190477e-06, "loss": 40.4548, "step": 6283 }, { "epoch": 149.62089552238805, "grad_norm": 17.872581481933594, "learning_rate": 9.374603174603175e-06, "loss": 39.0748, "step": 6284 }, { "epoch": 149.644776119403, "grad_norm": 22.106613159179688, "learning_rate": 9.373015873015874e-06, "loss": 40.1216, "step": 6285 }, { "epoch": 149.6686567164179, "grad_norm": 19.284791946411133, "learning_rate": 9.371428571428572e-06, "loss": 39.2773, "step": 6286 }, { "epoch": 149.69253731343284, "grad_norm": 19.2523136138916, "learning_rate": 9.369841269841272e-06, "loss": 40.2495, "step": 6287 }, { "epoch": 149.71641791044777, "grad_norm": 18.848800659179688, "learning_rate": 9.368253968253968e-06, "loss": 39.1348, "step": 6288 }, { "epoch": 149.74029850746268, "grad_norm": 21.043163299560547, "learning_rate": 9.366666666666668e-06, "loss": 40.5966, "step": 6289 }, { "epoch": 149.76417910447762, "grad_norm": 17.63033676147461, "learning_rate": 9.365079365079366e-06, "loss": 40.1343, "step": 6290 }, { "epoch": 149.78805970149253, "grad_norm": 13.709476470947266, "learning_rate": 9.363492063492064e-06, "loss": 38.4333, "step": 6291 }, { "epoch": 149.81194029850747, "grad_norm": 21.13844108581543, "learning_rate": 9.361904761904762e-06, "loss": 39.8698, "step": 6292 }, { "epoch": 149.83582089552237, "grad_norm": 16.18905258178711, "learning_rate": 9.36031746031746e-06, "loss": 38.9989, "step": 6293 }, { "epoch": 149.8597014925373, "grad_norm": 21.491127014160156, "learning_rate": 9.358730158730159e-06, "loss": 39.4642, "step": 6294 }, { "epoch": 149.88358208955225, "grad_norm": 22.262081146240234, "learning_rate": 9.357142857142859e-06, "loss": 40.0517, "step": 6295 }, { "epoch": 149.90746268656716, "grad_norm": 17.686460494995117, "learning_rate": 9.355555555555557e-06, "loss": 40.9072, "step": 6296 }, { "epoch": 149.9313432835821, "grad_norm": 25.11887550354004, "learning_rate": 9.353968253968255e-06, "loss": 38.7171, "step": 6297 }, { "epoch": 149.955223880597, "grad_norm": 17.390230178833008, "learning_rate": 9.352380952380953e-06, "loss": 38.9999, "step": 6298 }, { "epoch": 149.97910447761194, "grad_norm": 26.161745071411133, "learning_rate": 9.350793650793651e-06, "loss": 39.9917, "step": 6299 }, { "epoch": 150.0, "grad_norm": 18.304304122924805, "learning_rate": 9.34920634920635e-06, "loss": 34.2472, "step": 6300 }, { "epoch": 150.0, "step": 6300, "total_flos": 3.0974324482122125e+17, "train_loss": 2.6563439275347998, "train_runtime": 12847.7486, "train_samples_per_second": 62.486, "train_steps_per_second": 0.49 }, { "epoch": 150.02388059701494, "grad_norm": 19.16498374938965, "learning_rate": 1e-05, "loss": 40.1809, "step": 6301 }, { "epoch": 150.04776119402985, "grad_norm": Infinity, "learning_rate": 9.998599439775911e-06, "loss": 45.4434, "step": 6302 }, { "epoch": 150.07164179104478, "grad_norm": 279.3161926269531, "learning_rate": 9.998599439775911e-06, "loss": 45.5818, "step": 6303 }, { "epoch": 150.0955223880597, "grad_norm": 139.1039581298828, "learning_rate": 9.997198879551822e-06, "loss": 43.4873, "step": 6304 }, { "epoch": 150.11940298507463, "grad_norm": 67.4908676147461, "learning_rate": 9.995798319327733e-06, "loss": 40.3046, "step": 6305 }, { "epoch": 150.14328358208957, "grad_norm": 42.27750015258789, "learning_rate": 9.994397759103642e-06, "loss": 40.384, "step": 6306 }, { "epoch": 150.16716417910447, "grad_norm": 50.98027420043945, "learning_rate": 9.992997198879552e-06, "loss": 40.5811, "step": 6307 }, { "epoch": 150.1910447761194, "grad_norm": 45.15121078491211, "learning_rate": 9.991596638655463e-06, "loss": 40.4276, "step": 6308 }, { "epoch": 150.21492537313432, "grad_norm": 35.41339111328125, "learning_rate": 9.990196078431374e-06, "loss": 41.498, "step": 6309 }, { "epoch": 150.23880597014926, "grad_norm": 24.025501251220703, "learning_rate": 9.988795518207284e-06, "loss": 39.3925, "step": 6310 }, { "epoch": 150.26268656716417, "grad_norm": 28.514684677124023, "learning_rate": 9.987394957983195e-06, "loss": 39.7632, "step": 6311 }, { "epoch": 150.2865671641791, "grad_norm": 24.55237579345703, "learning_rate": 9.985994397759104e-06, "loss": 39.7918, "step": 6312 }, { "epoch": 150.31044776119404, "grad_norm": 24.92273712158203, "learning_rate": 9.984593837535014e-06, "loss": 39.4903, "step": 6313 }, { "epoch": 150.33432835820895, "grad_norm": 22.315248489379883, "learning_rate": 9.983193277310925e-06, "loss": 38.6691, "step": 6314 }, { "epoch": 150.3582089552239, "grad_norm": 21.531641006469727, "learning_rate": 9.981792717086836e-06, "loss": 39.9104, "step": 6315 }, { "epoch": 150.3820895522388, "grad_norm": 22.9049072265625, "learning_rate": 9.980392156862746e-06, "loss": 40.5113, "step": 6316 }, { "epoch": 150.40597014925373, "grad_norm": 22.430377960205078, "learning_rate": 9.978991596638657e-06, "loss": 39.4948, "step": 6317 }, { "epoch": 150.42985074626867, "grad_norm": 18.36344337463379, "learning_rate": 9.977591036414566e-06, "loss": 40.5239, "step": 6318 }, { "epoch": 150.45373134328358, "grad_norm": 21.41360855102539, "learning_rate": 9.976190476190477e-06, "loss": 40.053, "step": 6319 }, { "epoch": 150.47761194029852, "grad_norm": 17.077104568481445, "learning_rate": 9.974789915966387e-06, "loss": 39.8097, "step": 6320 }, { "epoch": 150.50149253731342, "grad_norm": 21.632736206054688, "learning_rate": 9.973389355742298e-06, "loss": 39.7021, "step": 6321 }, { "epoch": 150.52537313432836, "grad_norm": 20.047056198120117, "learning_rate": 9.971988795518209e-06, "loss": 40.2315, "step": 6322 }, { "epoch": 150.54925373134327, "grad_norm": 20.613143920898438, "learning_rate": 9.970588235294119e-06, "loss": 40.0007, "step": 6323 }, { "epoch": 150.5731343283582, "grad_norm": 17.636415481567383, "learning_rate": 9.969187675070028e-06, "loss": 40.4111, "step": 6324 }, { "epoch": 150.59701492537314, "grad_norm": 20.692312240600586, "learning_rate": 9.967787114845939e-06, "loss": 39.2482, "step": 6325 }, { "epoch": 150.62089552238805, "grad_norm": 14.423230171203613, "learning_rate": 9.96638655462185e-06, "loss": 38.3858, "step": 6326 }, { "epoch": 150.644776119403, "grad_norm": 15.524177551269531, "learning_rate": 9.96498599439776e-06, "loss": 38.3366, "step": 6327 }, { "epoch": 150.6686567164179, "grad_norm": 18.633893966674805, "learning_rate": 9.96358543417367e-06, "loss": 39.3271, "step": 6328 }, { "epoch": 150.69253731343284, "grad_norm": 19.223590850830078, "learning_rate": 9.962184873949581e-06, "loss": 39.2428, "step": 6329 }, { "epoch": 150.71641791044777, "grad_norm": 18.664216995239258, "learning_rate": 9.960784313725492e-06, "loss": 38.9784, "step": 6330 }, { "epoch": 150.74029850746268, "grad_norm": 16.15790367126465, "learning_rate": 9.959383753501401e-06, "loss": 39.3126, "step": 6331 }, { "epoch": 150.76417910447762, "grad_norm": 14.722516059875488, "learning_rate": 9.957983193277312e-06, "loss": 39.4758, "step": 6332 }, { "epoch": 150.78805970149253, "grad_norm": 18.573301315307617, "learning_rate": 9.956582633053222e-06, "loss": 39.6617, "step": 6333 }, { "epoch": 150.81194029850747, "grad_norm": 21.44211769104004, "learning_rate": 9.955182072829133e-06, "loss": 38.9266, "step": 6334 }, { "epoch": 150.83582089552237, "grad_norm": 19.530872344970703, "learning_rate": 9.953781512605043e-06, "loss": 39.3849, "step": 6335 }, { "epoch": 150.8597014925373, "grad_norm": 14.259345054626465, "learning_rate": 9.952380952380954e-06, "loss": 40.0807, "step": 6336 }, { "epoch": 150.88358208955225, "grad_norm": 18.270769119262695, "learning_rate": 9.950980392156863e-06, "loss": 38.9433, "step": 6337 }, { "epoch": 150.90746268656716, "grad_norm": 23.193754196166992, "learning_rate": 9.949579831932774e-06, "loss": 39.1875, "step": 6338 }, { "epoch": 150.9313432835821, "grad_norm": 16.407913208007812, "learning_rate": 9.948179271708684e-06, "loss": 39.411, "step": 6339 }, { "epoch": 150.955223880597, "grad_norm": 14.759758949279785, "learning_rate": 9.946778711484595e-06, "loss": 39.4402, "step": 6340 }, { "epoch": 150.97910447761194, "grad_norm": 22.55985450744629, "learning_rate": 9.945378151260506e-06, "loss": 38.5775, "step": 6341 }, { "epoch": 151.0, "grad_norm": 17.532306671142578, "learning_rate": 9.943977591036416e-06, "loss": 33.8262, "step": 6342 }, { "epoch": 151.02388059701494, "grad_norm": 19.179887771606445, "learning_rate": 9.942577030812325e-06, "loss": 41.4249, "step": 6343 }, { "epoch": 151.04776119402985, "grad_norm": 15.04033374786377, "learning_rate": 9.941176470588236e-06, "loss": 39.8833, "step": 6344 }, { "epoch": 151.07164179104478, "grad_norm": 16.083768844604492, "learning_rate": 9.939775910364146e-06, "loss": 38.9967, "step": 6345 }, { "epoch": 151.0955223880597, "grad_norm": 16.410978317260742, "learning_rate": 9.938375350140057e-06, "loss": 38.7727, "step": 6346 }, { "epoch": 151.11940298507463, "grad_norm": 21.35173988342285, "learning_rate": 9.936974789915968e-06, "loss": 40.7854, "step": 6347 }, { "epoch": 151.14328358208957, "grad_norm": 19.4251766204834, "learning_rate": 9.935574229691878e-06, "loss": 39.4606, "step": 6348 }, { "epoch": 151.16716417910447, "grad_norm": 18.5143985748291, "learning_rate": 9.934173669467789e-06, "loss": 39.06, "step": 6349 }, { "epoch": 151.1910447761194, "grad_norm": 15.860893249511719, "learning_rate": 9.932773109243698e-06, "loss": 40.4898, "step": 6350 }, { "epoch": 151.21492537313432, "grad_norm": 18.80919075012207, "learning_rate": 9.931372549019609e-06, "loss": 38.6399, "step": 6351 }, { "epoch": 151.23880597014926, "grad_norm": 18.1273250579834, "learning_rate": 9.92997198879552e-06, "loss": 40.0928, "step": 6352 }, { "epoch": 151.26268656716417, "grad_norm": 17.43776512145996, "learning_rate": 9.92857142857143e-06, "loss": 38.6823, "step": 6353 }, { "epoch": 151.2865671641791, "grad_norm": 15.621599197387695, "learning_rate": 9.92717086834734e-06, "loss": 39.3631, "step": 6354 }, { "epoch": 151.31044776119404, "grad_norm": 20.585025787353516, "learning_rate": 9.925770308123251e-06, "loss": 39.4492, "step": 6355 }, { "epoch": 151.33432835820895, "grad_norm": 23.47856330871582, "learning_rate": 9.92436974789916e-06, "loss": 39.4244, "step": 6356 }, { "epoch": 151.3582089552239, "grad_norm": 12.733617782592773, "learning_rate": 9.92296918767507e-06, "loss": 39.6639, "step": 6357 }, { "epoch": 151.3820895522388, "grad_norm": 29.050830841064453, "learning_rate": 9.921568627450981e-06, "loss": 39.0073, "step": 6358 }, { "epoch": 151.40597014925373, "grad_norm": 20.334535598754883, "learning_rate": 9.920168067226892e-06, "loss": 40.0843, "step": 6359 }, { "epoch": 151.42985074626867, "grad_norm": 18.922494888305664, "learning_rate": 9.918767507002803e-06, "loss": 39.7166, "step": 6360 }, { "epoch": 151.45373134328358, "grad_norm": 22.9791259765625, "learning_rate": 9.917366946778713e-06, "loss": 39.1399, "step": 6361 }, { "epoch": 151.47761194029852, "grad_norm": 19.109474182128906, "learning_rate": 9.915966386554622e-06, "loss": 39.8001, "step": 6362 }, { "epoch": 151.50149253731342, "grad_norm": 16.108705520629883, "learning_rate": 9.914565826330533e-06, "loss": 38.9864, "step": 6363 }, { "epoch": 151.52537313432836, "grad_norm": 19.968387603759766, "learning_rate": 9.913165266106443e-06, "loss": 40.3586, "step": 6364 }, { "epoch": 151.54925373134327, "grad_norm": 19.269989013671875, "learning_rate": 9.911764705882354e-06, "loss": 39.6837, "step": 6365 }, { "epoch": 151.5731343283582, "grad_norm": 19.654542922973633, "learning_rate": 9.910364145658265e-06, "loss": 39.6919, "step": 6366 }, { "epoch": 151.59701492537314, "grad_norm": 16.44729232788086, "learning_rate": 9.908963585434175e-06, "loss": 39.3837, "step": 6367 }, { "epoch": 151.62089552238805, "grad_norm": 21.828369140625, "learning_rate": 9.907563025210084e-06, "loss": 38.4021, "step": 6368 }, { "epoch": 151.644776119403, "grad_norm": 18.849733352661133, "learning_rate": 9.906162464985995e-06, "loss": 38.9801, "step": 6369 }, { "epoch": 151.6686567164179, "grad_norm": 15.600545883178711, "learning_rate": 9.904761904761906e-06, "loss": 39.9175, "step": 6370 }, { "epoch": 151.69253731343284, "grad_norm": 27.39472770690918, "learning_rate": 9.903361344537816e-06, "loss": 38.6417, "step": 6371 }, { "epoch": 151.71641791044777, "grad_norm": 18.112911224365234, "learning_rate": 9.901960784313727e-06, "loss": 40.6345, "step": 6372 }, { "epoch": 151.74029850746268, "grad_norm": 16.16978645324707, "learning_rate": 9.900560224089638e-06, "loss": 39.7246, "step": 6373 }, { "epoch": 151.76417910447762, "grad_norm": 23.89118766784668, "learning_rate": 9.899159663865548e-06, "loss": 39.8915, "step": 6374 }, { "epoch": 151.78805970149253, "grad_norm": 17.466960906982422, "learning_rate": 9.897759103641457e-06, "loss": 38.2378, "step": 6375 }, { "epoch": 151.81194029850747, "grad_norm": 22.093915939331055, "learning_rate": 9.896358543417368e-06, "loss": 38.6562, "step": 6376 }, { "epoch": 151.83582089552237, "grad_norm": 24.06990623474121, "learning_rate": 9.894957983193278e-06, "loss": 40.2845, "step": 6377 }, { "epoch": 151.8597014925373, "grad_norm": 18.051422119140625, "learning_rate": 9.893557422969189e-06, "loss": 39.2352, "step": 6378 }, { "epoch": 151.88358208955225, "grad_norm": 21.78950309753418, "learning_rate": 9.8921568627451e-06, "loss": 38.5991, "step": 6379 }, { "epoch": 151.90746268656716, "grad_norm": 18.29522132873535, "learning_rate": 9.89075630252101e-06, "loss": 38.2873, "step": 6380 }, { "epoch": 151.9313432835821, "grad_norm": 24.120948791503906, "learning_rate": 9.88935574229692e-06, "loss": 38.6462, "step": 6381 }, { "epoch": 151.955223880597, "grad_norm": 17.51114845275879, "learning_rate": 9.88795518207283e-06, "loss": 39.1381, "step": 6382 }, { "epoch": 151.97910447761194, "grad_norm": 22.926584243774414, "learning_rate": 9.88655462184874e-06, "loss": 38.8322, "step": 6383 }, { "epoch": 152.0, "grad_norm": 18.463191986083984, "learning_rate": 9.885154061624651e-06, "loss": 34.0882, "step": 6384 }, { "epoch": 152.02388059701494, "grad_norm": 23.521703720092773, "learning_rate": 9.883753501400562e-06, "loss": 41.0516, "step": 6385 }, { "epoch": 152.04776119402985, "grad_norm": 19.793399810791016, "learning_rate": 9.882352941176472e-06, "loss": 40.0187, "step": 6386 }, { "epoch": 152.07164179104478, "grad_norm": 26.50450897216797, "learning_rate": 9.880952380952381e-06, "loss": 38.7966, "step": 6387 }, { "epoch": 152.0955223880597, "grad_norm": 18.84419822692871, "learning_rate": 9.879551820728292e-06, "loss": 39.4662, "step": 6388 }, { "epoch": 152.11940298507463, "grad_norm": 23.414226531982422, "learning_rate": 9.878151260504203e-06, "loss": 39.6741, "step": 6389 }, { "epoch": 152.14328358208957, "grad_norm": 24.03367042541504, "learning_rate": 9.876750700280113e-06, "loss": 39.5593, "step": 6390 }, { "epoch": 152.16716417910447, "grad_norm": 17.131258010864258, "learning_rate": 9.875350140056024e-06, "loss": 39.0036, "step": 6391 }, { "epoch": 152.1910447761194, "grad_norm": 20.72348976135254, "learning_rate": 9.873949579831935e-06, "loss": 38.6815, "step": 6392 }, { "epoch": 152.21492537313432, "grad_norm": 16.66873550415039, "learning_rate": 9.872549019607845e-06, "loss": 39.2612, "step": 6393 }, { "epoch": 152.23880597014926, "grad_norm": 17.73870849609375, "learning_rate": 9.871148459383754e-06, "loss": 39.6521, "step": 6394 }, { "epoch": 152.26268656716417, "grad_norm": 26.47159767150879, "learning_rate": 9.869747899159665e-06, "loss": 40.0595, "step": 6395 }, { "epoch": 152.2865671641791, "grad_norm": 15.393265724182129, "learning_rate": 9.868347338935575e-06, "loss": 39.7537, "step": 6396 }, { "epoch": 152.31044776119404, "grad_norm": NaN, "learning_rate": 9.866946778711486e-06, "loss": 69.9822, "step": 6397 }, { "epoch": 152.33432835820895, "grad_norm": 17.150754928588867, "learning_rate": 9.866946778711486e-06, "loss": 39.7025, "step": 6398 }, { "epoch": 152.3582089552239, "grad_norm": 19.011913299560547, "learning_rate": 9.865546218487397e-06, "loss": 40.6093, "step": 6399 }, { "epoch": 152.3820895522388, "grad_norm": 18.70676612854004, "learning_rate": 9.864145658263307e-06, "loss": 38.6835, "step": 6400 }, { "epoch": 152.40597014925373, "grad_norm": 16.673490524291992, "learning_rate": 9.862745098039216e-06, "loss": 38.8727, "step": 6401 }, { "epoch": 152.42985074626867, "grad_norm": 15.896980285644531, "learning_rate": 9.861344537815127e-06, "loss": 39.4872, "step": 6402 }, { "epoch": 152.45373134328358, "grad_norm": 19.734540939331055, "learning_rate": 9.859943977591038e-06, "loss": 40.6661, "step": 6403 }, { "epoch": 152.47761194029852, "grad_norm": 20.29810333251953, "learning_rate": 9.858543417366948e-06, "loss": 38.7285, "step": 6404 }, { "epoch": 152.50149253731342, "grad_norm": 19.600051879882812, "learning_rate": 9.857142857142859e-06, "loss": 39.5563, "step": 6405 }, { "epoch": 152.52537313432836, "grad_norm": 17.804553985595703, "learning_rate": 9.85574229691877e-06, "loss": 37.6632, "step": 6406 }, { "epoch": 152.54925373134327, "grad_norm": 16.141559600830078, "learning_rate": 9.854341736694678e-06, "loss": 39.5719, "step": 6407 }, { "epoch": 152.5731343283582, "grad_norm": 13.319794654846191, "learning_rate": 9.852941176470589e-06, "loss": 39.1491, "step": 6408 }, { "epoch": 152.59701492537314, "grad_norm": 16.20441246032715, "learning_rate": 9.8515406162465e-06, "loss": 38.9358, "step": 6409 }, { "epoch": 152.62089552238805, "grad_norm": 16.522918701171875, "learning_rate": 9.85014005602241e-06, "loss": 40.0617, "step": 6410 }, { "epoch": 152.644776119403, "grad_norm": 21.760295867919922, "learning_rate": 9.848739495798321e-06, "loss": 39.4102, "step": 6411 }, { "epoch": 152.6686567164179, "grad_norm": 18.06766700744629, "learning_rate": 9.847338935574232e-06, "loss": 39.4243, "step": 6412 }, { "epoch": 152.69253731343284, "grad_norm": 15.790227890014648, "learning_rate": 9.84593837535014e-06, "loss": 39.2864, "step": 6413 }, { "epoch": 152.71641791044777, "grad_norm": 12.733904838562012, "learning_rate": 9.844537815126051e-06, "loss": 38.8609, "step": 6414 }, { "epoch": 152.74029850746268, "grad_norm": 17.441940307617188, "learning_rate": 9.843137254901962e-06, "loss": 39.965, "step": 6415 }, { "epoch": 152.76417910447762, "grad_norm": 18.61382293701172, "learning_rate": 9.84173669467787e-06, "loss": 38.837, "step": 6416 }, { "epoch": 152.78805970149253, "grad_norm": 16.26108169555664, "learning_rate": 9.840336134453781e-06, "loss": 39.2805, "step": 6417 }, { "epoch": 152.81194029850747, "grad_norm": 17.11221694946289, "learning_rate": 9.838935574229692e-06, "loss": 39.2356, "step": 6418 }, { "epoch": 152.83582089552237, "grad_norm": 11.892387390136719, "learning_rate": 9.837535014005603e-06, "loss": 37.877, "step": 6419 }, { "epoch": 152.8597014925373, "grad_norm": 15.409753799438477, "learning_rate": 9.836134453781513e-06, "loss": 38.1532, "step": 6420 }, { "epoch": 152.88358208955225, "grad_norm": 16.553173065185547, "learning_rate": 9.834733893557424e-06, "loss": 39.9671, "step": 6421 }, { "epoch": 152.90746268656716, "grad_norm": 15.028477668762207, "learning_rate": 9.833333333333333e-06, "loss": 38.5096, "step": 6422 }, { "epoch": 152.9313432835821, "grad_norm": 15.761276245117188, "learning_rate": 9.831932773109244e-06, "loss": 39.7583, "step": 6423 }, { "epoch": 152.955223880597, "grad_norm": 17.00655174255371, "learning_rate": 9.830532212885154e-06, "loss": 38.7234, "step": 6424 }, { "epoch": 152.97910447761194, "grad_norm": 15.511736869812012, "learning_rate": 9.829131652661065e-06, "loss": 39.256, "step": 6425 }, { "epoch": 153.0, "grad_norm": 19.71329116821289, "learning_rate": 9.827731092436975e-06, "loss": 34.6084, "step": 6426 }, { "epoch": 153.02388059701494, "grad_norm": 17.619407653808594, "learning_rate": 9.826330532212886e-06, "loss": 39.3749, "step": 6427 }, { "epoch": 153.04776119402985, "grad_norm": 21.454179763793945, "learning_rate": 9.824929971988795e-06, "loss": 39.189, "step": 6428 }, { "epoch": 153.07164179104478, "grad_norm": 15.592042922973633, "learning_rate": 9.823529411764706e-06, "loss": 40.1688, "step": 6429 }, { "epoch": 153.0955223880597, "grad_norm": 18.97234344482422, "learning_rate": 9.822128851540616e-06, "loss": 38.9257, "step": 6430 }, { "epoch": 153.11940298507463, "grad_norm": 22.144588470458984, "learning_rate": 9.820728291316527e-06, "loss": 39.408, "step": 6431 }, { "epoch": 153.14328358208957, "grad_norm": 22.970861434936523, "learning_rate": 9.819327731092438e-06, "loss": 39.0693, "step": 6432 }, { "epoch": 153.16716417910447, "grad_norm": 14.850386619567871, "learning_rate": 9.817927170868348e-06, "loss": 38.848, "step": 6433 }, { "epoch": 153.1910447761194, "grad_norm": 23.122474670410156, "learning_rate": 9.816526610644259e-06, "loss": 38.69, "step": 6434 }, { "epoch": 153.21492537313432, "grad_norm": 20.157529830932617, "learning_rate": 9.815126050420168e-06, "loss": 38.522, "step": 6435 }, { "epoch": 153.23880597014926, "grad_norm": 16.902263641357422, "learning_rate": 9.813725490196078e-06, "loss": 39.1863, "step": 6436 }, { "epoch": 153.26268656716417, "grad_norm": 20.05976676940918, "learning_rate": 9.812324929971989e-06, "loss": 38.8603, "step": 6437 }, { "epoch": 153.2865671641791, "grad_norm": 21.53315544128418, "learning_rate": 9.8109243697479e-06, "loss": 40.4414, "step": 6438 }, { "epoch": 153.31044776119404, "grad_norm": 17.6829891204834, "learning_rate": 9.80952380952381e-06, "loss": 39.3266, "step": 6439 }, { "epoch": 153.33432835820895, "grad_norm": 18.332361221313477, "learning_rate": 9.808123249299721e-06, "loss": 39.7485, "step": 6440 }, { "epoch": 153.3582089552239, "grad_norm": 13.068806648254395, "learning_rate": 9.80672268907563e-06, "loss": 40.4783, "step": 6441 }, { "epoch": 153.3820895522388, "grad_norm": 24.55851173400879, "learning_rate": 9.80532212885154e-06, "loss": 38.5473, "step": 6442 }, { "epoch": 153.40597014925373, "grad_norm": 20.670440673828125, "learning_rate": 9.803921568627451e-06, "loss": 39.1716, "step": 6443 }, { "epoch": 153.42985074626867, "grad_norm": 16.37117576599121, "learning_rate": 9.802521008403362e-06, "loss": 39.5912, "step": 6444 }, { "epoch": 153.45373134328358, "grad_norm": 22.2893009185791, "learning_rate": 9.801120448179273e-06, "loss": 39.633, "step": 6445 }, { "epoch": 153.47761194029852, "grad_norm": 17.918243408203125, "learning_rate": 9.799719887955183e-06, "loss": 40.6878, "step": 6446 }, { "epoch": 153.50149253731342, "grad_norm": 15.346482276916504, "learning_rate": 9.798319327731092e-06, "loss": 38.6619, "step": 6447 }, { "epoch": 153.52537313432836, "grad_norm": 14.18155574798584, "learning_rate": 9.796918767507003e-06, "loss": 38.8895, "step": 6448 }, { "epoch": 153.54925373134327, "grad_norm": 17.546628952026367, "learning_rate": 9.795518207282913e-06, "loss": 40.0357, "step": 6449 }, { "epoch": 153.5731343283582, "grad_norm": 20.402925491333008, "learning_rate": 9.794117647058824e-06, "loss": 39.3683, "step": 6450 }, { "epoch": 153.59701492537314, "grad_norm": 19.142513275146484, "learning_rate": 9.792717086834735e-06, "loss": 39.1011, "step": 6451 }, { "epoch": 153.62089552238805, "grad_norm": 22.166929244995117, "learning_rate": 9.791316526610645e-06, "loss": 38.6932, "step": 6452 }, { "epoch": 153.644776119403, "grad_norm": 16.393999099731445, "learning_rate": 9.789915966386554e-06, "loss": 39.9522, "step": 6453 }, { "epoch": 153.6686567164179, "grad_norm": 20.74897003173828, "learning_rate": 9.788515406162465e-06, "loss": 39.4043, "step": 6454 }, { "epoch": 153.69253731343284, "grad_norm": 18.162193298339844, "learning_rate": 9.787114845938376e-06, "loss": 38.9868, "step": 6455 }, { "epoch": 153.71641791044777, "grad_norm": 17.61060905456543, "learning_rate": 9.785714285714286e-06, "loss": 39.2859, "step": 6456 }, { "epoch": 153.74029850746268, "grad_norm": 17.78687858581543, "learning_rate": 9.784313725490197e-06, "loss": 39.4746, "step": 6457 }, { "epoch": 153.76417910447762, "grad_norm": 20.72416114807129, "learning_rate": 9.782913165266107e-06, "loss": 39.1889, "step": 6458 }, { "epoch": 153.78805970149253, "grad_norm": 14.441688537597656, "learning_rate": 9.781512605042018e-06, "loss": 38.0719, "step": 6459 }, { "epoch": 153.81194029850747, "grad_norm": 18.633865356445312, "learning_rate": 9.780112044817927e-06, "loss": 40.0062, "step": 6460 }, { "epoch": 153.83582089552237, "grad_norm": 16.08208656311035, "learning_rate": 9.778711484593838e-06, "loss": 39.807, "step": 6461 }, { "epoch": 153.8597014925373, "grad_norm": 21.865211486816406, "learning_rate": 9.777310924369748e-06, "loss": 38.8119, "step": 6462 }, { "epoch": 153.88358208955225, "grad_norm": 18.500150680541992, "learning_rate": 9.775910364145659e-06, "loss": 39.2655, "step": 6463 }, { "epoch": 153.90746268656716, "grad_norm": 17.913738250732422, "learning_rate": 9.77450980392157e-06, "loss": 39.6616, "step": 6464 }, { "epoch": 153.9313432835821, "grad_norm": 19.963655471801758, "learning_rate": 9.77310924369748e-06, "loss": 38.6005, "step": 6465 }, { "epoch": 153.955223880597, "grad_norm": 21.53469467163086, "learning_rate": 9.77170868347339e-06, "loss": 38.5381, "step": 6466 }, { "epoch": 153.97910447761194, "grad_norm": 23.95380973815918, "learning_rate": 9.7703081232493e-06, "loss": 39.2278, "step": 6467 }, { "epoch": 154.0, "grad_norm": NaN, "learning_rate": 9.76890756302521e-06, "loss": 54.3577, "step": 6468 }, { "epoch": 154.02388059701494, "grad_norm": 15.17541217803955, "learning_rate": 9.76890756302521e-06, "loss": 37.9482, "step": 6469 }, { "epoch": 154.04776119402985, "grad_norm": 21.523469924926758, "learning_rate": 9.767507002801121e-06, "loss": 40.279, "step": 6470 }, { "epoch": 154.07164179104478, "grad_norm": 21.303739547729492, "learning_rate": 9.766106442577032e-06, "loss": 38.5729, "step": 6471 }, { "epoch": 154.0955223880597, "grad_norm": 20.074615478515625, "learning_rate": 9.764705882352942e-06, "loss": 40.2056, "step": 6472 }, { "epoch": 154.11940298507463, "grad_norm": 15.129366874694824, "learning_rate": 9.763305322128851e-06, "loss": 39.6186, "step": 6473 }, { "epoch": 154.14328358208957, "grad_norm": 20.436534881591797, "learning_rate": 9.761904761904762e-06, "loss": 38.1005, "step": 6474 }, { "epoch": 154.16716417910447, "grad_norm": 20.725431442260742, "learning_rate": 9.760504201680673e-06, "loss": 40.411, "step": 6475 }, { "epoch": 154.1910447761194, "grad_norm": 19.512540817260742, "learning_rate": 9.759103641456583e-06, "loss": 40.3164, "step": 6476 }, { "epoch": 154.21492537313432, "grad_norm": 17.709447860717773, "learning_rate": 9.757703081232494e-06, "loss": 39.4096, "step": 6477 }, { "epoch": 154.23880597014926, "grad_norm": 18.833778381347656, "learning_rate": 9.756302521008404e-06, "loss": 38.1548, "step": 6478 }, { "epoch": 154.26268656716417, "grad_norm": 16.236936569213867, "learning_rate": 9.754901960784315e-06, "loss": 39.5843, "step": 6479 }, { "epoch": 154.2865671641791, "grad_norm": 18.85942268371582, "learning_rate": 9.753501400560224e-06, "loss": 39.1643, "step": 6480 }, { "epoch": 154.31044776119404, "grad_norm": 14.820978164672852, "learning_rate": 9.752100840336135e-06, "loss": 38.4749, "step": 6481 }, { "epoch": 154.33432835820895, "grad_norm": 18.847061157226562, "learning_rate": 9.750700280112045e-06, "loss": 38.5442, "step": 6482 }, { "epoch": 154.3582089552239, "grad_norm": 16.622020721435547, "learning_rate": 9.749299719887956e-06, "loss": 39.7202, "step": 6483 }, { "epoch": 154.3820895522388, "grad_norm": 17.376924514770508, "learning_rate": 9.747899159663867e-06, "loss": 37.7172, "step": 6484 }, { "epoch": 154.40597014925373, "grad_norm": 14.59231185913086, "learning_rate": 9.746498599439777e-06, "loss": 39.013, "step": 6485 }, { "epoch": 154.42985074626867, "grad_norm": 19.942195892333984, "learning_rate": 9.745098039215686e-06, "loss": 39.7675, "step": 6486 }, { "epoch": 154.45373134328358, "grad_norm": 17.211233139038086, "learning_rate": 9.743697478991597e-06, "loss": 40.5986, "step": 6487 }, { "epoch": 154.47761194029852, "grad_norm": 19.75026512145996, "learning_rate": 9.742296918767507e-06, "loss": 36.8232, "step": 6488 }, { "epoch": 154.50149253731342, "grad_norm": 21.47499656677246, "learning_rate": 9.740896358543418e-06, "loss": 38.4644, "step": 6489 }, { "epoch": 154.52537313432836, "grad_norm": 15.849268913269043, "learning_rate": 9.739495798319329e-06, "loss": 40.2922, "step": 6490 }, { "epoch": 154.54925373134327, "grad_norm": 16.159347534179688, "learning_rate": 9.73809523809524e-06, "loss": 39.124, "step": 6491 }, { "epoch": 154.5731343283582, "grad_norm": 18.291040420532227, "learning_rate": 9.736694677871148e-06, "loss": 38.9641, "step": 6492 }, { "epoch": 154.59701492537314, "grad_norm": 17.57895278930664, "learning_rate": 9.735294117647059e-06, "loss": 40.3195, "step": 6493 }, { "epoch": 154.62089552238805, "grad_norm": 15.273983001708984, "learning_rate": 9.73389355742297e-06, "loss": 39.4168, "step": 6494 }, { "epoch": 154.644776119403, "grad_norm": 18.084733963012695, "learning_rate": 9.73249299719888e-06, "loss": 39.686, "step": 6495 }, { "epoch": 154.6686567164179, "grad_norm": 14.910906791687012, "learning_rate": 9.731092436974791e-06, "loss": 39.4061, "step": 6496 }, { "epoch": 154.69253731343284, "grad_norm": 20.256969451904297, "learning_rate": 9.729691876750702e-06, "loss": 40.062, "step": 6497 }, { "epoch": 154.71641791044777, "grad_norm": 17.579275131225586, "learning_rate": 9.72829131652661e-06, "loss": 38.9151, "step": 6498 }, { "epoch": 154.74029850746268, "grad_norm": 15.15353775024414, "learning_rate": 9.726890756302521e-06, "loss": 37.7658, "step": 6499 }, { "epoch": 154.76417910447762, "grad_norm": 18.60456085205078, "learning_rate": 9.725490196078432e-06, "loss": 39.4208, "step": 6500 }, { "epoch": 154.78805970149253, "grad_norm": 23.808212280273438, "learning_rate": 9.724089635854342e-06, "loss": 39.0267, "step": 6501 }, { "epoch": 154.81194029850747, "grad_norm": 17.379362106323242, "learning_rate": 9.722689075630253e-06, "loss": 40.4376, "step": 6502 }, { "epoch": 154.83582089552237, "grad_norm": 17.4511775970459, "learning_rate": 9.721288515406164e-06, "loss": 39.1269, "step": 6503 }, { "epoch": 154.8597014925373, "grad_norm": 15.80515193939209, "learning_rate": 9.719887955182074e-06, "loss": 38.1002, "step": 6504 }, { "epoch": 154.88358208955225, "grad_norm": 17.451370239257812, "learning_rate": 9.718487394957983e-06, "loss": 40.8626, "step": 6505 }, { "epoch": 154.90746268656716, "grad_norm": 16.299781799316406, "learning_rate": 9.717086834733894e-06, "loss": 38.9524, "step": 6506 }, { "epoch": 154.9313432835821, "grad_norm": 30.887243270874023, "learning_rate": 9.715686274509805e-06, "loss": 38.2993, "step": 6507 }, { "epoch": 154.955223880597, "grad_norm": 17.55278205871582, "learning_rate": 9.714285714285715e-06, "loss": 39.7168, "step": 6508 }, { "epoch": 154.97910447761194, "grad_norm": 20.347652435302734, "learning_rate": 9.712885154061626e-06, "loss": 39.61, "step": 6509 }, { "epoch": 155.0, "grad_norm": 24.018211364746094, "learning_rate": 9.711484593837536e-06, "loss": 34.3992, "step": 6510 }, { "epoch": 155.02388059701494, "grad_norm": 18.324909210205078, "learning_rate": 9.710084033613445e-06, "loss": 38.85, "step": 6511 }, { "epoch": 155.04776119402985, "grad_norm": 36.07960510253906, "learning_rate": 9.708683473389356e-06, "loss": 40.9112, "step": 6512 }, { "epoch": 155.07164179104478, "grad_norm": 21.635616302490234, "learning_rate": 9.707282913165267e-06, "loss": 38.5548, "step": 6513 }, { "epoch": 155.0955223880597, "grad_norm": 34.621952056884766, "learning_rate": 9.705882352941177e-06, "loss": 38.9943, "step": 6514 }, { "epoch": 155.11940298507463, "grad_norm": 21.3360652923584, "learning_rate": 9.704481792717088e-06, "loss": 39.6249, "step": 6515 }, { "epoch": 155.14328358208957, "grad_norm": 29.964282989501953, "learning_rate": 9.703081232492999e-06, "loss": 40.3016, "step": 6516 }, { "epoch": 155.16716417910447, "grad_norm": 21.944597244262695, "learning_rate": 9.701680672268908e-06, "loss": 39.8295, "step": 6517 }, { "epoch": 155.1910447761194, "grad_norm": 22.256132125854492, "learning_rate": 9.700280112044818e-06, "loss": 38.3356, "step": 6518 }, { "epoch": 155.21492537313432, "grad_norm": 30.97159194946289, "learning_rate": 9.698879551820729e-06, "loss": 39.0454, "step": 6519 }, { "epoch": 155.23880597014926, "grad_norm": 19.811222076416016, "learning_rate": 9.69747899159664e-06, "loss": 39.5204, "step": 6520 }, { "epoch": 155.26268656716417, "grad_norm": 40.61001205444336, "learning_rate": 9.69607843137255e-06, "loss": 38.2691, "step": 6521 }, { "epoch": 155.2865671641791, "grad_norm": 30.622100830078125, "learning_rate": 9.69467787114846e-06, "loss": 39.5193, "step": 6522 }, { "epoch": 155.31044776119404, "grad_norm": 39.44752883911133, "learning_rate": 9.693277310924371e-06, "loss": 39.8697, "step": 6523 }, { "epoch": 155.33432835820895, "grad_norm": 31.14112663269043, "learning_rate": 9.69187675070028e-06, "loss": 38.5863, "step": 6524 }, { "epoch": 155.3582089552239, "grad_norm": 41.381629943847656, "learning_rate": 9.690476190476191e-06, "loss": 40.1567, "step": 6525 }, { "epoch": 155.3820895522388, "grad_norm": 36.835693359375, "learning_rate": 9.689075630252102e-06, "loss": 39.1249, "step": 6526 }, { "epoch": 155.40597014925373, "grad_norm": 29.718339920043945, "learning_rate": 9.687675070028012e-06, "loss": 38.2004, "step": 6527 }, { "epoch": 155.42985074626867, "grad_norm": 30.30554962158203, "learning_rate": 9.686274509803923e-06, "loss": 38.4457, "step": 6528 }, { "epoch": 155.45373134328358, "grad_norm": 33.8006591796875, "learning_rate": 9.684873949579834e-06, "loss": 39.0332, "step": 6529 }, { "epoch": 155.47761194029852, "grad_norm": 24.40614128112793, "learning_rate": 9.683473389355742e-06, "loss": 37.3829, "step": 6530 }, { "epoch": 155.50149253731342, "grad_norm": 42.24560546875, "learning_rate": 9.682072829131653e-06, "loss": 39.3422, "step": 6531 }, { "epoch": 155.52537313432836, "grad_norm": 37.031776428222656, "learning_rate": 9.680672268907564e-06, "loss": 38.8629, "step": 6532 }, { "epoch": 155.54925373134327, "grad_norm": 33.795902252197266, "learning_rate": 9.679271708683474e-06, "loss": 39.8452, "step": 6533 }, { "epoch": 155.5731343283582, "grad_norm": 33.857696533203125, "learning_rate": 9.677871148459385e-06, "loss": 39.8327, "step": 6534 }, { "epoch": 155.59701492537314, "grad_norm": 29.091102600097656, "learning_rate": 9.676470588235296e-06, "loss": 40.1465, "step": 6535 }, { "epoch": 155.62089552238805, "grad_norm": 24.67340087890625, "learning_rate": 9.675070028011205e-06, "loss": 39.6256, "step": 6536 }, { "epoch": 155.644776119403, "grad_norm": 34.69873809814453, "learning_rate": 9.673669467787115e-06, "loss": 38.4067, "step": 6537 }, { "epoch": 155.6686567164179, "grad_norm": 29.608654022216797, "learning_rate": 9.672268907563026e-06, "loss": 39.2891, "step": 6538 }, { "epoch": 155.69253731343284, "grad_norm": 36.71940994262695, "learning_rate": 9.670868347338937e-06, "loss": 39.1266, "step": 6539 }, { "epoch": 155.71641791044777, "grad_norm": 33.37260437011719, "learning_rate": 9.669467787114847e-06, "loss": 38.7847, "step": 6540 }, { "epoch": 155.74029850746268, "grad_norm": 31.80318832397461, "learning_rate": 9.668067226890758e-06, "loss": 38.5372, "step": 6541 }, { "epoch": 155.76417910447762, "grad_norm": 30.421329498291016, "learning_rate": 9.666666666666667e-06, "loss": 38.2407, "step": 6542 }, { "epoch": 155.78805970149253, "grad_norm": 34.91036605834961, "learning_rate": 9.665266106442577e-06, "loss": 40.0451, "step": 6543 }, { "epoch": 155.81194029850747, "grad_norm": 29.88376808166504, "learning_rate": 9.663865546218488e-06, "loss": 40.8208, "step": 6544 }, { "epoch": 155.83582089552237, "grad_norm": 33.51005935668945, "learning_rate": 9.662464985994399e-06, "loss": 38.3288, "step": 6545 }, { "epoch": 155.8597014925373, "grad_norm": 28.42050552368164, "learning_rate": 9.66106442577031e-06, "loss": 38.4511, "step": 6546 }, { "epoch": 155.88358208955225, "grad_norm": 28.77273941040039, "learning_rate": 9.65966386554622e-06, "loss": 40.1574, "step": 6547 }, { "epoch": 155.90746268656716, "grad_norm": 24.271469116210938, "learning_rate": 9.65826330532213e-06, "loss": 39.5026, "step": 6548 }, { "epoch": 155.9313432835821, "grad_norm": 34.00471115112305, "learning_rate": 9.65686274509804e-06, "loss": 37.3971, "step": 6549 }, { "epoch": 155.955223880597, "grad_norm": 33.8049430847168, "learning_rate": 9.65546218487395e-06, "loss": 39.0453, "step": 6550 }, { "epoch": 155.97910447761194, "grad_norm": 31.642559051513672, "learning_rate": 9.65406162464986e-06, "loss": 40.4236, "step": 6551 }, { "epoch": 156.0, "grad_norm": 29.527297973632812, "learning_rate": 9.652661064425771e-06, "loss": 34.3201, "step": 6552 }, { "epoch": 156.02388059701494, "grad_norm": 29.68943977355957, "learning_rate": 9.651260504201682e-06, "loss": 38.8877, "step": 6553 }, { "epoch": 156.04776119402985, "grad_norm": 26.39272117614746, "learning_rate": 9.649859943977593e-06, "loss": 37.7011, "step": 6554 }, { "epoch": 156.07164179104478, "grad_norm": 33.61505126953125, "learning_rate": 9.648459383753502e-06, "loss": 39.2487, "step": 6555 }, { "epoch": 156.0955223880597, "grad_norm": 31.842918395996094, "learning_rate": 9.647058823529412e-06, "loss": 38.7661, "step": 6556 }, { "epoch": 156.11940298507463, "grad_norm": 29.886465072631836, "learning_rate": 9.645658263305323e-06, "loss": 39.5789, "step": 6557 }, { "epoch": 156.14328358208957, "grad_norm": 26.030107498168945, "learning_rate": 9.644257703081234e-06, "loss": 38.6281, "step": 6558 }, { "epoch": 156.16716417910447, "grad_norm": 32.856842041015625, "learning_rate": 9.642857142857144e-06, "loss": 39.1047, "step": 6559 }, { "epoch": 156.1910447761194, "grad_norm": 29.918258666992188, "learning_rate": 9.641456582633055e-06, "loss": 37.773, "step": 6560 }, { "epoch": 156.21492537313432, "grad_norm": 33.415077209472656, "learning_rate": 9.640056022408964e-06, "loss": 38.7251, "step": 6561 }, { "epoch": 156.23880597014926, "grad_norm": 29.851547241210938, "learning_rate": 9.638655462184874e-06, "loss": 38.9788, "step": 6562 }, { "epoch": 156.26268656716417, "grad_norm": 32.586849212646484, "learning_rate": 9.637254901960785e-06, "loss": 37.6731, "step": 6563 }, { "epoch": 156.2865671641791, "grad_norm": 27.224334716796875, "learning_rate": 9.635854341736696e-06, "loss": 38.7623, "step": 6564 }, { "epoch": 156.31044776119404, "grad_norm": 37.19491958618164, "learning_rate": 9.634453781512606e-06, "loss": 38.1435, "step": 6565 }, { "epoch": 156.33432835820895, "grad_norm": 33.014869689941406, "learning_rate": 9.633053221288517e-06, "loss": 38.6628, "step": 6566 }, { "epoch": 156.3582089552239, "grad_norm": 30.490785598754883, "learning_rate": 9.631652661064426e-06, "loss": 37.5884, "step": 6567 }, { "epoch": 156.3820895522388, "grad_norm": 26.904870986938477, "learning_rate": 9.630252100840337e-06, "loss": 40.9113, "step": 6568 }, { "epoch": 156.40597014925373, "grad_norm": 33.51700973510742, "learning_rate": 9.628851540616247e-06, "loss": 39.6933, "step": 6569 }, { "epoch": 156.42985074626867, "grad_norm": 27.145734786987305, "learning_rate": 9.627450980392158e-06, "loss": 39.7145, "step": 6570 }, { "epoch": 156.45373134328358, "grad_norm": 34.20743179321289, "learning_rate": 9.626050420168068e-06, "loss": 39.9307, "step": 6571 }, { "epoch": 156.47761194029852, "grad_norm": 28.43290138244629, "learning_rate": 9.624649859943979e-06, "loss": 39.7442, "step": 6572 }, { "epoch": 156.50149253731342, "grad_norm": 30.41081428527832, "learning_rate": 9.62324929971989e-06, "loss": 39.0533, "step": 6573 }, { "epoch": 156.52537313432836, "grad_norm": 24.903308868408203, "learning_rate": 9.621848739495799e-06, "loss": 38.7191, "step": 6574 }, { "epoch": 156.54925373134327, "grad_norm": 32.21714401245117, "learning_rate": 9.62044817927171e-06, "loss": 39.7623, "step": 6575 }, { "epoch": 156.5731343283582, "grad_norm": 27.931760787963867, "learning_rate": 9.61904761904762e-06, "loss": 39.1339, "step": 6576 }, { "epoch": 156.59701492537314, "grad_norm": 32.47041702270508, "learning_rate": 9.61764705882353e-06, "loss": 39.9239, "step": 6577 }, { "epoch": 156.62089552238805, "grad_norm": 26.025028228759766, "learning_rate": 9.616246498599441e-06, "loss": 38.3327, "step": 6578 }, { "epoch": 156.644776119403, "grad_norm": 35.96200942993164, "learning_rate": 9.614845938375352e-06, "loss": 39.9313, "step": 6579 }, { "epoch": 156.6686567164179, "grad_norm": 32.426944732666016, "learning_rate": 9.61344537815126e-06, "loss": 39.4115, "step": 6580 }, { "epoch": 156.69253731343284, "grad_norm": 33.29541015625, "learning_rate": 9.612044817927171e-06, "loss": 39.8026, "step": 6581 }, { "epoch": 156.71641791044777, "grad_norm": 26.586023330688477, "learning_rate": 9.610644257703082e-06, "loss": 39.1864, "step": 6582 }, { "epoch": 156.74029850746268, "grad_norm": 31.9718074798584, "learning_rate": 9.609243697478993e-06, "loss": 39.4903, "step": 6583 }, { "epoch": 156.76417910447762, "grad_norm": 31.3813419342041, "learning_rate": 9.607843137254903e-06, "loss": 38.8532, "step": 6584 }, { "epoch": 156.78805970149253, "grad_norm": 34.09993362426758, "learning_rate": 9.606442577030814e-06, "loss": 39.671, "step": 6585 }, { "epoch": 156.81194029850747, "grad_norm": 30.085002899169922, "learning_rate": 9.605042016806723e-06, "loss": 39.0096, "step": 6586 }, { "epoch": 156.83582089552237, "grad_norm": 28.415733337402344, "learning_rate": 9.603641456582634e-06, "loss": 38.4657, "step": 6587 }, { "epoch": 156.8597014925373, "grad_norm": 27.193313598632812, "learning_rate": 9.602240896358544e-06, "loss": 38.1315, "step": 6588 }, { "epoch": 156.88358208955225, "grad_norm": 30.765335083007812, "learning_rate": 9.600840336134455e-06, "loss": 39.3368, "step": 6589 }, { "epoch": 156.90746268656716, "grad_norm": 23.282987594604492, "learning_rate": 9.599439775910366e-06, "loss": 40.039, "step": 6590 }, { "epoch": 156.9313432835821, "grad_norm": 32.54345703125, "learning_rate": 9.598039215686276e-06, "loss": 38.9747, "step": 6591 }, { "epoch": 156.955223880597, "grad_norm": 30.983205795288086, "learning_rate": 9.596638655462187e-06, "loss": 39.2737, "step": 6592 }, { "epoch": 156.97910447761194, "grad_norm": 28.03235626220703, "learning_rate": 9.595238095238096e-06, "loss": 40.9064, "step": 6593 }, { "epoch": 157.0, "grad_norm": 24.515148162841797, "learning_rate": 9.593837535014006e-06, "loss": 34.6439, "step": 6594 }, { "epoch": 157.02388059701494, "grad_norm": 28.694761276245117, "learning_rate": 9.592436974789917e-06, "loss": 38.4482, "step": 6595 }, { "epoch": 157.04776119402985, "grad_norm": 23.540157318115234, "learning_rate": 9.591036414565828e-06, "loss": 38.1782, "step": 6596 }, { "epoch": 157.07164179104478, "grad_norm": 32.222991943359375, "learning_rate": 9.589635854341738e-06, "loss": 38.2852, "step": 6597 }, { "epoch": 157.0955223880597, "grad_norm": 28.29923439025879, "learning_rate": 9.588235294117649e-06, "loss": 39.1217, "step": 6598 }, { "epoch": 157.11940298507463, "grad_norm": 30.50798988342285, "learning_rate": 9.586834733893558e-06, "loss": 37.9906, "step": 6599 }, { "epoch": 157.14328358208957, "grad_norm": 25.299413681030273, "learning_rate": 9.585434173669469e-06, "loss": 40.1151, "step": 6600 }, { "epoch": 157.16716417910447, "grad_norm": 26.846481323242188, "learning_rate": 9.584033613445379e-06, "loss": 39.1387, "step": 6601 }, { "epoch": 157.1910447761194, "grad_norm": 24.088130950927734, "learning_rate": 9.58263305322129e-06, "loss": 39.9527, "step": 6602 }, { "epoch": 157.21492537313432, "grad_norm": 32.9058723449707, "learning_rate": 9.5812324929972e-06, "loss": 39.6031, "step": 6603 }, { "epoch": 157.23880597014926, "grad_norm": 28.707300186157227, "learning_rate": 9.579831932773111e-06, "loss": 38.1865, "step": 6604 }, { "epoch": 157.26268656716417, "grad_norm": 34.44000244140625, "learning_rate": 9.57843137254902e-06, "loss": 40.0412, "step": 6605 }, { "epoch": 157.2865671641791, "grad_norm": 30.934423446655273, "learning_rate": 9.57703081232493e-06, "loss": 39.241, "step": 6606 }, { "epoch": 157.31044776119404, "grad_norm": 29.025680541992188, "learning_rate": 9.575630252100841e-06, "loss": 39.4661, "step": 6607 }, { "epoch": 157.33432835820895, "grad_norm": 24.268417358398438, "learning_rate": 9.574229691876752e-06, "loss": 39.2482, "step": 6608 }, { "epoch": 157.3582089552239, "grad_norm": 32.32265090942383, "learning_rate": 9.572829131652663e-06, "loss": 39.2594, "step": 6609 }, { "epoch": 157.3820895522388, "grad_norm": 28.000333786010742, "learning_rate": 9.571428571428573e-06, "loss": 38.2507, "step": 6610 }, { "epoch": 157.40597014925373, "grad_norm": 31.72787094116211, "learning_rate": 9.570028011204482e-06, "loss": 40.5381, "step": 6611 }, { "epoch": 157.42985074626867, "grad_norm": 31.978450775146484, "learning_rate": 9.568627450980393e-06, "loss": 39.7319, "step": 6612 }, { "epoch": 157.45373134328358, "grad_norm": 30.192304611206055, "learning_rate": 9.567226890756303e-06, "loss": 40.951, "step": 6613 }, { "epoch": 157.47761194029852, "grad_norm": 25.521732330322266, "learning_rate": 9.565826330532214e-06, "loss": 38.8767, "step": 6614 }, { "epoch": 157.50149253731342, "grad_norm": 30.813644409179688, "learning_rate": 9.564425770308125e-06, "loss": 39.302, "step": 6615 }, { "epoch": 157.52537313432836, "grad_norm": 25.71561622619629, "learning_rate": 9.563025210084035e-06, "loss": 38.4408, "step": 6616 }, { "epoch": 157.54925373134327, "grad_norm": 36.52140426635742, "learning_rate": 9.561624649859946e-06, "loss": 38.4121, "step": 6617 }, { "epoch": 157.5731343283582, "grad_norm": 33.191593170166016, "learning_rate": 9.560224089635855e-06, "loss": 38.1299, "step": 6618 }, { "epoch": 157.59701492537314, "grad_norm": 27.148662567138672, "learning_rate": 9.558823529411766e-06, "loss": 39.3005, "step": 6619 }, { "epoch": 157.62089552238805, "grad_norm": 28.000900268554688, "learning_rate": 9.557422969187676e-06, "loss": 40.1389, "step": 6620 }, { "epoch": 157.644776119403, "grad_norm": 32.106048583984375, "learning_rate": 9.556022408963587e-06, "loss": 39.0995, "step": 6621 }, { "epoch": 157.6686567164179, "grad_norm": 28.147275924682617, "learning_rate": 9.554621848739497e-06, "loss": 38.8633, "step": 6622 }, { "epoch": 157.69253731343284, "grad_norm": 31.858997344970703, "learning_rate": 9.553221288515408e-06, "loss": 38.8929, "step": 6623 }, { "epoch": 157.71641791044777, "grad_norm": 27.966960906982422, "learning_rate": 9.551820728291317e-06, "loss": 38.5249, "step": 6624 }, { "epoch": 157.74029850746268, "grad_norm": 29.7442684173584, "learning_rate": 9.550420168067228e-06, "loss": 39.7258, "step": 6625 }, { "epoch": 157.76417910447762, "grad_norm": 25.475791931152344, "learning_rate": 9.549019607843138e-06, "loss": 38.6937, "step": 6626 }, { "epoch": 157.78805970149253, "grad_norm": 31.998165130615234, "learning_rate": 9.547619047619049e-06, "loss": 39.0858, "step": 6627 }, { "epoch": 157.81194029850747, "grad_norm": 27.826169967651367, "learning_rate": 9.54621848739496e-06, "loss": 39.3365, "step": 6628 }, { "epoch": 157.83582089552237, "grad_norm": 30.259963989257812, "learning_rate": 9.54481792717087e-06, "loss": 38.8625, "step": 6629 }, { "epoch": 157.8597014925373, "grad_norm": 25.748193740844727, "learning_rate": 9.54341736694678e-06, "loss": 39.2983, "step": 6630 }, { "epoch": 157.88358208955225, "grad_norm": 29.5993709564209, "learning_rate": 9.54201680672269e-06, "loss": 38.8356, "step": 6631 }, { "epoch": 157.90746268656716, "grad_norm": 24.60677719116211, "learning_rate": 9.5406162464986e-06, "loss": 39.4259, "step": 6632 }, { "epoch": 157.9313432835821, "grad_norm": 34.27214431762695, "learning_rate": 9.539215686274511e-06, "loss": 39.5947, "step": 6633 }, { "epoch": 157.955223880597, "grad_norm": 28.4605655670166, "learning_rate": 9.537815126050422e-06, "loss": 38.9525, "step": 6634 }, { "epoch": 157.97910447761194, "grad_norm": 29.947551727294922, "learning_rate": 9.536414565826332e-06, "loss": 37.9835, "step": 6635 }, { "epoch": 158.0, "grad_norm": 22.99721908569336, "learning_rate": 9.535014005602243e-06, "loss": 33.7123, "step": 6636 }, { "epoch": 158.02388059701494, "grad_norm": 31.333166122436523, "learning_rate": 9.533613445378152e-06, "loss": 39.6887, "step": 6637 }, { "epoch": 158.04776119402985, "grad_norm": 23.77128791809082, "learning_rate": 9.532212885154063e-06, "loss": 39.8605, "step": 6638 }, { "epoch": 158.07164179104478, "grad_norm": 31.62507438659668, "learning_rate": 9.530812324929972e-06, "loss": 38.9037, "step": 6639 }, { "epoch": 158.0955223880597, "grad_norm": 28.015859603881836, "learning_rate": 9.529411764705882e-06, "loss": 39.5294, "step": 6640 }, { "epoch": 158.11940298507463, "grad_norm": 27.46308135986328, "learning_rate": 9.528011204481793e-06, "loss": 39.4117, "step": 6641 }, { "epoch": 158.14328358208957, "grad_norm": 24.952190399169922, "learning_rate": 9.526610644257703e-06, "loss": 40.1572, "step": 6642 }, { "epoch": 158.16716417910447, "grad_norm": 30.921812057495117, "learning_rate": 9.525210084033614e-06, "loss": 38.4166, "step": 6643 }, { "epoch": 158.1910447761194, "grad_norm": 26.232816696166992, "learning_rate": 9.523809523809525e-06, "loss": 39.3362, "step": 6644 }, { "epoch": 158.21492537313432, "grad_norm": 31.015853881835938, "learning_rate": 9.522408963585434e-06, "loss": 38.4154, "step": 6645 }, { "epoch": 158.23880597014926, "grad_norm": 26.870193481445312, "learning_rate": 9.521008403361344e-06, "loss": 37.9386, "step": 6646 }, { "epoch": 158.26268656716417, "grad_norm": 29.11814308166504, "learning_rate": 9.519607843137255e-06, "loss": 39.9507, "step": 6647 }, { "epoch": 158.2865671641791, "grad_norm": 26.853282928466797, "learning_rate": 9.518207282913166e-06, "loss": 38.8224, "step": 6648 }, { "epoch": 158.31044776119404, "grad_norm": 30.874858856201172, "learning_rate": 9.516806722689076e-06, "loss": 40.0089, "step": 6649 }, { "epoch": 158.33432835820895, "grad_norm": 23.252574920654297, "learning_rate": 9.515406162464987e-06, "loss": 38.1102, "step": 6650 }, { "epoch": 158.3582089552239, "grad_norm": 33.164737701416016, "learning_rate": 9.514005602240896e-06, "loss": 40.089, "step": 6651 }, { "epoch": 158.3820895522388, "grad_norm": 25.860107421875, "learning_rate": 9.512605042016806e-06, "loss": 38.4175, "step": 6652 }, { "epoch": 158.40597014925373, "grad_norm": 31.76698875427246, "learning_rate": 9.511204481792717e-06, "loss": 38.516, "step": 6653 }, { "epoch": 158.42985074626867, "grad_norm": 28.989151000976562, "learning_rate": 9.509803921568628e-06, "loss": 38.7914, "step": 6654 }, { "epoch": 158.45373134328358, "grad_norm": 27.455665588378906, "learning_rate": 9.508403361344538e-06, "loss": 39.2981, "step": 6655 }, { "epoch": 158.47761194029852, "grad_norm": 27.4940128326416, "learning_rate": 9.507002801120449e-06, "loss": 39.8574, "step": 6656 }, { "epoch": 158.50149253731342, "grad_norm": 28.886987686157227, "learning_rate": 9.50560224089636e-06, "loss": 38.5423, "step": 6657 }, { "epoch": 158.52537313432836, "grad_norm": 23.92523956298828, "learning_rate": 9.504201680672269e-06, "loss": 38.8931, "step": 6658 }, { "epoch": 158.54925373134327, "grad_norm": 35.76919174194336, "learning_rate": 9.50280112044818e-06, "loss": 39.2734, "step": 6659 }, { "epoch": 158.5731343283582, "grad_norm": 30.03874969482422, "learning_rate": 9.50140056022409e-06, "loss": 39.1651, "step": 6660 }, { "epoch": 158.59701492537314, "grad_norm": NaN, "learning_rate": 9.5e-06, "loss": 53.1676, "step": 6661 }, { "epoch": 158.62089552238805, "grad_norm": 29.244117736816406, "learning_rate": 9.5e-06, "loss": 39.141, "step": 6662 }, { "epoch": 158.644776119403, "grad_norm": 26.69334602355957, "learning_rate": 9.498599439775911e-06, "loss": 38.2187, "step": 6663 }, { "epoch": 158.6686567164179, "grad_norm": 29.462692260742188, "learning_rate": 9.497198879551822e-06, "loss": 39.808, "step": 6664 }, { "epoch": 158.69253731343284, "grad_norm": 25.570072174072266, "learning_rate": 9.49579831932773e-06, "loss": 39.1149, "step": 6665 }, { "epoch": 158.71641791044777, "grad_norm": 29.6662654876709, "learning_rate": 9.494397759103641e-06, "loss": 39.8864, "step": 6666 }, { "epoch": 158.74029850746268, "grad_norm": 26.96592140197754, "learning_rate": 9.492997198879552e-06, "loss": 40.2124, "step": 6667 }, { "epoch": 158.76417910447762, "grad_norm": 28.80194091796875, "learning_rate": 9.491596638655463e-06, "loss": 39.3583, "step": 6668 }, { "epoch": 158.78805970149253, "grad_norm": 25.05339813232422, "learning_rate": 9.490196078431373e-06, "loss": 39.229, "step": 6669 }, { "epoch": 158.81194029850747, "grad_norm": 30.561386108398438, "learning_rate": 9.488795518207284e-06, "loss": 36.3939, "step": 6670 }, { "epoch": 158.83582089552237, "grad_norm": 28.11375617980957, "learning_rate": 9.487394957983193e-06, "loss": 39.0193, "step": 6671 }, { "epoch": 158.8597014925373, "grad_norm": 28.772184371948242, "learning_rate": 9.485994397759104e-06, "loss": 39.018, "step": 6672 }, { "epoch": 158.88358208955225, "grad_norm": 26.138904571533203, "learning_rate": 9.484593837535014e-06, "loss": 38.5065, "step": 6673 }, { "epoch": 158.90746268656716, "grad_norm": 35.049713134765625, "learning_rate": 9.483193277310925e-06, "loss": 39.4819, "step": 6674 }, { "epoch": 158.9313432835821, "grad_norm": 29.197858810424805, "learning_rate": 9.481792717086835e-06, "loss": 39.2848, "step": 6675 }, { "epoch": 158.955223880597, "grad_norm": 28.17118263244629, "learning_rate": 9.480392156862746e-06, "loss": 37.8984, "step": 6676 }, { "epoch": 158.97910447761194, "grad_norm": 25.997028350830078, "learning_rate": 9.478991596638657e-06, "loss": 38.2248, "step": 6677 }, { "epoch": 159.0, "grad_norm": 22.521821975708008, "learning_rate": 9.477591036414566e-06, "loss": 34.6442, "step": 6678 }, { "epoch": 159.02388059701494, "grad_norm": 22.637258529663086, "learning_rate": 9.476190476190476e-06, "loss": 37.9175, "step": 6679 }, { "epoch": 159.04776119402985, "grad_norm": 31.351964950561523, "learning_rate": 9.474789915966387e-06, "loss": 38.5578, "step": 6680 }, { "epoch": 159.07164179104478, "grad_norm": 31.61567497253418, "learning_rate": 9.473389355742298e-06, "loss": 39.0571, "step": 6681 }, { "epoch": 159.0955223880597, "grad_norm": 26.24010467529297, "learning_rate": 9.471988795518208e-06, "loss": 39.5834, "step": 6682 }, { "epoch": 159.11940298507463, "grad_norm": 28.04585838317871, "learning_rate": 9.470588235294119e-06, "loss": 39.3662, "step": 6683 }, { "epoch": 159.14328358208957, "grad_norm": 25.634319305419922, "learning_rate": 9.469187675070028e-06, "loss": 38.7174, "step": 6684 }, { "epoch": 159.16716417910447, "grad_norm": 21.796682357788086, "learning_rate": 9.467787114845938e-06, "loss": 38.982, "step": 6685 }, { "epoch": 159.1910447761194, "grad_norm": 29.920791625976562, "learning_rate": 9.466386554621849e-06, "loss": 38.6116, "step": 6686 }, { "epoch": 159.21492537313432, "grad_norm": 25.356687545776367, "learning_rate": 9.46498599439776e-06, "loss": 39.5341, "step": 6687 }, { "epoch": 159.23880597014926, "grad_norm": 31.633136749267578, "learning_rate": 9.46358543417367e-06, "loss": 38.5544, "step": 6688 }, { "epoch": 159.26268656716417, "grad_norm": 27.863170623779297, "learning_rate": 9.462184873949581e-06, "loss": 39.6266, "step": 6689 }, { "epoch": 159.2865671641791, "grad_norm": 29.66268539428711, "learning_rate": 9.46078431372549e-06, "loss": 39.5999, "step": 6690 }, { "epoch": 159.31044776119404, "grad_norm": 24.517019271850586, "learning_rate": 9.4593837535014e-06, "loss": 38.8792, "step": 6691 }, { "epoch": 159.33432835820895, "grad_norm": 25.91954231262207, "learning_rate": 9.457983193277311e-06, "loss": 39.5027, "step": 6692 }, { "epoch": 159.3582089552239, "grad_norm": 22.744062423706055, "learning_rate": 9.456582633053222e-06, "loss": 39.2578, "step": 6693 }, { "epoch": 159.3820895522388, "grad_norm": 26.101306915283203, "learning_rate": 9.455182072829132e-06, "loss": 39.1526, "step": 6694 }, { "epoch": 159.40597014925373, "grad_norm": 20.470579147338867, "learning_rate": 9.453781512605043e-06, "loss": 39.2099, "step": 6695 }, { "epoch": 159.42985074626867, "grad_norm": 24.81926155090332, "learning_rate": 9.452380952380952e-06, "loss": 38.8535, "step": 6696 }, { "epoch": 159.45373134328358, "grad_norm": 18.473108291625977, "learning_rate": 9.450980392156863e-06, "loss": 39.2421, "step": 6697 }, { "epoch": 159.47761194029852, "grad_norm": 23.971759796142578, "learning_rate": 9.449579831932773e-06, "loss": 38.4099, "step": 6698 }, { "epoch": 159.50149253731342, "grad_norm": 22.947731018066406, "learning_rate": 9.448179271708684e-06, "loss": 38.4733, "step": 6699 }, { "epoch": 159.52537313432836, "grad_norm": 18.715824127197266, "learning_rate": 9.446778711484595e-06, "loss": 39.0546, "step": 6700 }, { "epoch": 159.54925373134327, "grad_norm": 25.644397735595703, "learning_rate": 9.445378151260505e-06, "loss": 39.4029, "step": 6701 }, { "epoch": 159.5731343283582, "grad_norm": 20.00396728515625, "learning_rate": 9.443977591036416e-06, "loss": 39.7051, "step": 6702 }, { "epoch": 159.59701492537314, "grad_norm": 22.225614547729492, "learning_rate": 9.442577030812325e-06, "loss": 39.8751, "step": 6703 }, { "epoch": 159.62089552238805, "grad_norm": 21.25652313232422, "learning_rate": 9.441176470588235e-06, "loss": 38.6573, "step": 6704 }, { "epoch": 159.644776119403, "grad_norm": 20.495485305786133, "learning_rate": 9.439775910364146e-06, "loss": 37.8811, "step": 6705 }, { "epoch": 159.6686567164179, "grad_norm": 18.924156188964844, "learning_rate": 9.438375350140057e-06, "loss": 39.5924, "step": 6706 }, { "epoch": 159.69253731343284, "grad_norm": 21.530715942382812, "learning_rate": 9.436974789915967e-06, "loss": 38.6536, "step": 6707 }, { "epoch": 159.71641791044777, "grad_norm": 20.47351837158203, "learning_rate": 9.435574229691878e-06, "loss": 38.5164, "step": 6708 }, { "epoch": 159.74029850746268, "grad_norm": 18.203227996826172, "learning_rate": 9.434173669467787e-06, "loss": 39.0054, "step": 6709 }, { "epoch": 159.76417910447762, "grad_norm": 17.892553329467773, "learning_rate": 9.432773109243698e-06, "loss": 38.4729, "step": 6710 }, { "epoch": 159.78805970149253, "grad_norm": 19.664037704467773, "learning_rate": 9.431372549019608e-06, "loss": 38.5362, "step": 6711 }, { "epoch": 159.81194029850747, "grad_norm": 14.706823348999023, "learning_rate": 9.429971988795519e-06, "loss": 38.8772, "step": 6712 }, { "epoch": 159.83582089552237, "grad_norm": 23.517322540283203, "learning_rate": 9.42857142857143e-06, "loss": 38.9856, "step": 6713 }, { "epoch": 159.8597014925373, "grad_norm": 18.907791137695312, "learning_rate": 9.42717086834734e-06, "loss": 39.2286, "step": 6714 }, { "epoch": 159.88358208955225, "grad_norm": 18.358661651611328, "learning_rate": 9.425770308123249e-06, "loss": 39.5321, "step": 6715 }, { "epoch": 159.90746268656716, "grad_norm": 21.41598129272461, "learning_rate": 9.42436974789916e-06, "loss": 38.38, "step": 6716 }, { "epoch": 159.9313432835821, "grad_norm": 20.371999740600586, "learning_rate": 9.42296918767507e-06, "loss": 39.1304, "step": 6717 }, { "epoch": 159.955223880597, "grad_norm": 16.01449966430664, "learning_rate": 9.421568627450981e-06, "loss": 39.2435, "step": 6718 }, { "epoch": 159.97910447761194, "grad_norm": 15.86768627166748, "learning_rate": 9.420168067226892e-06, "loss": 40.169, "step": 6719 }, { "epoch": 160.0, "grad_norm": 17.439725875854492, "learning_rate": 9.418767507002802e-06, "loss": 34.4427, "step": 6720 }, { "epoch": 160.02388059701494, "grad_norm": 16.46625518798828, "learning_rate": 9.417366946778713e-06, "loss": 39.5417, "step": 6721 }, { "epoch": 160.04776119402985, "grad_norm": 19.329574584960938, "learning_rate": 9.415966386554622e-06, "loss": 38.4405, "step": 6722 }, { "epoch": 160.07164179104478, "grad_norm": 17.68001937866211, "learning_rate": 9.414565826330533e-06, "loss": 38.8865, "step": 6723 }, { "epoch": 160.0955223880597, "grad_norm": 14.42570686340332, "learning_rate": 9.413165266106443e-06, "loss": 39.4139, "step": 6724 }, { "epoch": 160.11940298507463, "grad_norm": 16.53023338317871, "learning_rate": 9.411764705882354e-06, "loss": 39.0396, "step": 6725 }, { "epoch": 160.14328358208957, "grad_norm": 16.63241958618164, "learning_rate": 9.410364145658264e-06, "loss": 38.6942, "step": 6726 }, { "epoch": 160.16716417910447, "grad_norm": 16.808700561523438, "learning_rate": 9.408963585434175e-06, "loss": 39.2502, "step": 6727 }, { "epoch": 160.1910447761194, "grad_norm": 16.61543083190918, "learning_rate": 9.407563025210084e-06, "loss": 39.3648, "step": 6728 }, { "epoch": 160.21492537313432, "grad_norm": 17.370962142944336, "learning_rate": 9.406162464985995e-06, "loss": 39.7099, "step": 6729 }, { "epoch": 160.23880597014926, "grad_norm": 18.192951202392578, "learning_rate": 9.404761904761905e-06, "loss": 38.4803, "step": 6730 }, { "epoch": 160.26268656716417, "grad_norm": 17.29343032836914, "learning_rate": 9.403361344537816e-06, "loss": 38.9634, "step": 6731 }, { "epoch": 160.2865671641791, "grad_norm": 15.428152084350586, "learning_rate": 9.401960784313727e-06, "loss": 37.3442, "step": 6732 }, { "epoch": 160.31044776119404, "grad_norm": 20.36634635925293, "learning_rate": 9.400560224089637e-06, "loss": 37.5906, "step": 6733 }, { "epoch": 160.33432835820895, "grad_norm": 19.532516479492188, "learning_rate": 9.399159663865546e-06, "loss": 39.7177, "step": 6734 }, { "epoch": 160.3582089552239, "grad_norm": 15.812334060668945, "learning_rate": 9.397759103641457e-06, "loss": 39.041, "step": 6735 }, { "epoch": 160.3820895522388, "grad_norm": 22.160499572753906, "learning_rate": 9.396358543417367e-06, "loss": 39.5028, "step": 6736 }, { "epoch": 160.40597014925373, "grad_norm": 18.124677658081055, "learning_rate": 9.394957983193278e-06, "loss": 37.5503, "step": 6737 }, { "epoch": 160.42985074626867, "grad_norm": 17.192428588867188, "learning_rate": 9.393557422969189e-06, "loss": 40.2257, "step": 6738 }, { "epoch": 160.45373134328358, "grad_norm": 19.324045181274414, "learning_rate": 9.3921568627451e-06, "loss": 39.3236, "step": 6739 }, { "epoch": 160.47761194029852, "grad_norm": 14.983993530273438, "learning_rate": 9.390756302521008e-06, "loss": 39.1351, "step": 6740 }, { "epoch": 160.50149253731342, "grad_norm": 21.260456085205078, "learning_rate": 9.389355742296919e-06, "loss": 40.2213, "step": 6741 }, { "epoch": 160.52537313432836, "grad_norm": 19.486419677734375, "learning_rate": 9.38795518207283e-06, "loss": 39.116, "step": 6742 }, { "epoch": 160.54925373134327, "grad_norm": 19.740835189819336, "learning_rate": 9.38655462184874e-06, "loss": 38.3406, "step": 6743 }, { "epoch": 160.5731343283582, "grad_norm": 14.827152252197266, "learning_rate": 9.385154061624651e-06, "loss": 39.4013, "step": 6744 }, { "epoch": 160.59701492537314, "grad_norm": 18.374135971069336, "learning_rate": 9.383753501400561e-06, "loss": 39.256, "step": 6745 }, { "epoch": 160.62089552238805, "grad_norm": 14.761893272399902, "learning_rate": 9.382352941176472e-06, "loss": 38.3464, "step": 6746 }, { "epoch": 160.644776119403, "grad_norm": 23.56832504272461, "learning_rate": 9.380952380952381e-06, "loss": 38.6375, "step": 6747 }, { "epoch": 160.6686567164179, "grad_norm": 19.972110748291016, "learning_rate": 9.379551820728292e-06, "loss": 39.4843, "step": 6748 }, { "epoch": 160.69253731343284, "grad_norm": 15.896003723144531, "learning_rate": 9.378151260504202e-06, "loss": 38.3614, "step": 6749 }, { "epoch": 160.71641791044777, "grad_norm": 22.914880752563477, "learning_rate": 9.376750700280113e-06, "loss": 40.2098, "step": 6750 }, { "epoch": 160.74029850746268, "grad_norm": 14.334890365600586, "learning_rate": 9.375350140056024e-06, "loss": 39.3033, "step": 6751 }, { "epoch": 160.76417910447762, "grad_norm": 20.54424476623535, "learning_rate": 9.373949579831934e-06, "loss": 39.6488, "step": 6752 }, { "epoch": 160.78805970149253, "grad_norm": 23.600900650024414, "learning_rate": 9.372549019607843e-06, "loss": 36.9016, "step": 6753 }, { "epoch": 160.81194029850747, "grad_norm": 15.01113510131836, "learning_rate": 9.371148459383754e-06, "loss": 38.4658, "step": 6754 }, { "epoch": 160.83582089552237, "grad_norm": 18.84202766418457, "learning_rate": 9.369747899159664e-06, "loss": 38.6521, "step": 6755 }, { "epoch": 160.8597014925373, "grad_norm": 15.818922996520996, "learning_rate": 9.368347338935575e-06, "loss": 40.3014, "step": 6756 }, { "epoch": 160.88358208955225, "grad_norm": 19.53926658630371, "learning_rate": 9.366946778711486e-06, "loss": 39.0312, "step": 6757 }, { "epoch": 160.90746268656716, "grad_norm": 16.320993423461914, "learning_rate": 9.365546218487396e-06, "loss": 38.1269, "step": 6758 }, { "epoch": 160.9313432835821, "grad_norm": 24.081876754760742, "learning_rate": 9.364145658263305e-06, "loss": 39.681, "step": 6759 }, { "epoch": 160.955223880597, "grad_norm": 16.770343780517578, "learning_rate": 9.362745098039216e-06, "loss": 38.7858, "step": 6760 }, { "epoch": 160.97910447761194, "grad_norm": 22.08024024963379, "learning_rate": 9.361344537815127e-06, "loss": 38.8286, "step": 6761 }, { "epoch": 161.0, "grad_norm": 17.7487735748291, "learning_rate": 9.359943977591037e-06, "loss": 34.0391, "step": 6762 }, { "epoch": 161.02388059701494, "grad_norm": 18.156070709228516, "learning_rate": 9.358543417366948e-06, "loss": 39.4564, "step": 6763 }, { "epoch": 161.04776119402985, "grad_norm": 20.877561569213867, "learning_rate": 9.357142857142859e-06, "loss": 38.7863, "step": 6764 }, { "epoch": 161.07164179104478, "grad_norm": 19.127397537231445, "learning_rate": 9.355742296918767e-06, "loss": 38.5402, "step": 6765 }, { "epoch": 161.0955223880597, "grad_norm": 21.27589988708496, "learning_rate": 9.354341736694678e-06, "loss": 38.9504, "step": 6766 }, { "epoch": 161.11940298507463, "grad_norm": 19.405920028686523, "learning_rate": 9.352941176470589e-06, "loss": 39.851, "step": 6767 }, { "epoch": 161.14328358208957, "grad_norm": 17.423633575439453, "learning_rate": 9.3515406162465e-06, "loss": 39.0452, "step": 6768 }, { "epoch": 161.16716417910447, "grad_norm": NaN, "learning_rate": 9.35014005602241e-06, "loss": 49.0405, "step": 6769 }, { "epoch": 161.1910447761194, "grad_norm": 17.809812545776367, "learning_rate": 9.35014005602241e-06, "loss": 37.5939, "step": 6770 }, { "epoch": 161.21492537313432, "grad_norm": 16.22609519958496, "learning_rate": 9.34873949579832e-06, "loss": 39.4706, "step": 6771 }, { "epoch": 161.23880597014926, "grad_norm": 21.230531692504883, "learning_rate": 9.347338935574231e-06, "loss": 39.2319, "step": 6772 }, { "epoch": 161.26268656716417, "grad_norm": 18.550539016723633, "learning_rate": 9.34593837535014e-06, "loss": 39.4455, "step": 6773 }, { "epoch": 161.2865671641791, "grad_norm": 18.481239318847656, "learning_rate": 9.344537815126051e-06, "loss": 40.0004, "step": 6774 }, { "epoch": 161.31044776119404, "grad_norm": 23.4416446685791, "learning_rate": 9.343137254901962e-06, "loss": 37.5972, "step": 6775 }, { "epoch": 161.33432835820895, "grad_norm": 24.323545455932617, "learning_rate": 9.341736694677872e-06, "loss": 37.308, "step": 6776 }, { "epoch": 161.3582089552239, "grad_norm": 15.038119316101074, "learning_rate": 9.340336134453783e-06, "loss": 39.032, "step": 6777 }, { "epoch": 161.3820895522388, "grad_norm": 26.29281234741211, "learning_rate": 9.338935574229693e-06, "loss": 39.4552, "step": 6778 }, { "epoch": 161.40597014925373, "grad_norm": 17.980562210083008, "learning_rate": 9.337535014005602e-06, "loss": 38.6003, "step": 6779 }, { "epoch": 161.42985074626867, "grad_norm": 23.86687660217285, "learning_rate": 9.336134453781513e-06, "loss": 38.2432, "step": 6780 }, { "epoch": 161.45373134328358, "grad_norm": 21.71341896057129, "learning_rate": 9.334733893557424e-06, "loss": 39.3572, "step": 6781 }, { "epoch": 161.47761194029852, "grad_norm": 22.293689727783203, "learning_rate": 9.333333333333334e-06, "loss": 39.5909, "step": 6782 }, { "epoch": 161.50149253731342, "grad_norm": 22.266357421875, "learning_rate": 9.331932773109245e-06, "loss": 39.4302, "step": 6783 }, { "epoch": 161.52537313432836, "grad_norm": 18.248830795288086, "learning_rate": 9.330532212885156e-06, "loss": 38.9019, "step": 6784 }, { "epoch": 161.54925373134327, "grad_norm": 16.384206771850586, "learning_rate": 9.329131652661065e-06, "loss": 38.9382, "step": 6785 }, { "epoch": 161.5731343283582, "grad_norm": 20.268388748168945, "learning_rate": 9.327731092436975e-06, "loss": 38.7883, "step": 6786 }, { "epoch": 161.59701492537314, "grad_norm": 22.8583927154541, "learning_rate": 9.326330532212886e-06, "loss": 37.8416, "step": 6787 }, { "epoch": 161.62089552238805, "grad_norm": 18.193201065063477, "learning_rate": 9.324929971988796e-06, "loss": 38.4221, "step": 6788 }, { "epoch": 161.644776119403, "grad_norm": 26.44017791748047, "learning_rate": 9.323529411764707e-06, "loss": 37.5674, "step": 6789 }, { "epoch": 161.6686567164179, "grad_norm": 21.0838623046875, "learning_rate": 9.322128851540618e-06, "loss": 39.1413, "step": 6790 }, { "epoch": 161.69253731343284, "grad_norm": 17.49184799194336, "learning_rate": 9.320728291316528e-06, "loss": 39.0681, "step": 6791 }, { "epoch": 161.71641791044777, "grad_norm": 26.257192611694336, "learning_rate": 9.319327731092437e-06, "loss": 39.0174, "step": 6792 }, { "epoch": 161.74029850746268, "grad_norm": 14.942763328552246, "learning_rate": 9.317927170868348e-06, "loss": 39.5799, "step": 6793 }, { "epoch": 161.76417910447762, "grad_norm": 35.21318054199219, "learning_rate": 9.316526610644259e-06, "loss": 38.9654, "step": 6794 }, { "epoch": 161.78805970149253, "grad_norm": 26.097381591796875, "learning_rate": 9.31512605042017e-06, "loss": 39.5492, "step": 6795 }, { "epoch": 161.81194029850747, "grad_norm": 28.224538803100586, "learning_rate": 9.31372549019608e-06, "loss": 38.7373, "step": 6796 }, { "epoch": 161.83582089552237, "grad_norm": 21.44225311279297, "learning_rate": 9.31232492997199e-06, "loss": 38.3801, "step": 6797 }, { "epoch": 161.8597014925373, "grad_norm": 26.673877716064453, "learning_rate": 9.3109243697479e-06, "loss": 39.4883, "step": 6798 }, { "epoch": 161.88358208955225, "grad_norm": 21.419830322265625, "learning_rate": 9.30952380952381e-06, "loss": 38.2996, "step": 6799 }, { "epoch": 161.90746268656716, "grad_norm": 21.3859806060791, "learning_rate": 9.30812324929972e-06, "loss": 39.6105, "step": 6800 }, { "epoch": 161.9313432835821, "grad_norm": 24.542308807373047, "learning_rate": 9.306722689075631e-06, "loss": 40.6519, "step": 6801 }, { "epoch": 161.955223880597, "grad_norm": 20.758468627929688, "learning_rate": 9.305322128851542e-06, "loss": 38.8648, "step": 6802 }, { "epoch": 161.97910447761194, "grad_norm": 25.230525970458984, "learning_rate": 9.303921568627453e-06, "loss": 38.2779, "step": 6803 }, { "epoch": 162.0, "grad_norm": 17.33908462524414, "learning_rate": 9.302521008403362e-06, "loss": 34.062, "step": 6804 }, { "epoch": 162.02388059701494, "grad_norm": 21.04616355895996, "learning_rate": 9.301120448179272e-06, "loss": 37.3716, "step": 6805 }, { "epoch": 162.04776119402985, "grad_norm": 20.279638290405273, "learning_rate": 9.299719887955183e-06, "loss": 37.8552, "step": 6806 }, { "epoch": 162.07164179104478, "grad_norm": 21.690444946289062, "learning_rate": 9.298319327731094e-06, "loss": 38.919, "step": 6807 }, { "epoch": 162.0955223880597, "grad_norm": 19.564537048339844, "learning_rate": 9.296918767507004e-06, "loss": 38.8517, "step": 6808 }, { "epoch": 162.11940298507463, "grad_norm": 20.17282485961914, "learning_rate": 9.295518207282915e-06, "loss": 38.5925, "step": 6809 }, { "epoch": 162.14328358208957, "grad_norm": 17.191225051879883, "learning_rate": 9.294117647058824e-06, "loss": 40.0369, "step": 6810 }, { "epoch": 162.16716417910447, "grad_norm": 15.69149112701416, "learning_rate": 9.292717086834734e-06, "loss": 38.9256, "step": 6811 }, { "epoch": 162.1910447761194, "grad_norm": 19.860177993774414, "learning_rate": 9.291316526610645e-06, "loss": 38.4066, "step": 6812 }, { "epoch": 162.21492537313432, "grad_norm": 20.10934066772461, "learning_rate": 9.289915966386556e-06, "loss": 39.9674, "step": 6813 }, { "epoch": 162.23880597014926, "grad_norm": 15.468286514282227, "learning_rate": 9.288515406162466e-06, "loss": 38.8206, "step": 6814 }, { "epoch": 162.26268656716417, "grad_norm": 19.017433166503906, "learning_rate": 9.287114845938377e-06, "loss": 38.5092, "step": 6815 }, { "epoch": 162.2865671641791, "grad_norm": 17.840295791625977, "learning_rate": 9.285714285714288e-06, "loss": 38.9008, "step": 6816 }, { "epoch": 162.31044776119404, "grad_norm": 18.01124382019043, "learning_rate": 9.284313725490197e-06, "loss": 37.5621, "step": 6817 }, { "epoch": 162.33432835820895, "grad_norm": 15.1130952835083, "learning_rate": 9.282913165266107e-06, "loss": 38.2213, "step": 6818 }, { "epoch": 162.3582089552239, "grad_norm": 13.752197265625, "learning_rate": 9.281512605042018e-06, "loss": 38.9823, "step": 6819 }, { "epoch": 162.3820895522388, "grad_norm": 17.562929153442383, "learning_rate": 9.280112044817928e-06, "loss": 39.5778, "step": 6820 }, { "epoch": 162.40597014925373, "grad_norm": 17.960500717163086, "learning_rate": 9.278711484593839e-06, "loss": 38.6016, "step": 6821 }, { "epoch": 162.42985074626867, "grad_norm": 18.7200984954834, "learning_rate": 9.27731092436975e-06, "loss": 38.9038, "step": 6822 }, { "epoch": 162.45373134328358, "grad_norm": 13.515174865722656, "learning_rate": 9.275910364145659e-06, "loss": 38.9758, "step": 6823 }, { "epoch": 162.47761194029852, "grad_norm": 18.944744110107422, "learning_rate": 9.27450980392157e-06, "loss": 38.7721, "step": 6824 }, { "epoch": 162.50149253731342, "grad_norm": 21.34316062927246, "learning_rate": 9.27310924369748e-06, "loss": 38.7627, "step": 6825 }, { "epoch": 162.52537313432836, "grad_norm": 15.169504165649414, "learning_rate": 9.27170868347339e-06, "loss": 39.6574, "step": 6826 }, { "epoch": 162.54925373134327, "grad_norm": 15.559427261352539, "learning_rate": 9.270308123249301e-06, "loss": 38.9596, "step": 6827 }, { "epoch": 162.5731343283582, "grad_norm": 14.636393547058105, "learning_rate": 9.268907563025212e-06, "loss": 37.9376, "step": 6828 }, { "epoch": 162.59701492537314, "grad_norm": 19.879676818847656, "learning_rate": 9.26750700280112e-06, "loss": 38.8639, "step": 6829 }, { "epoch": 162.62089552238805, "grad_norm": 22.679649353027344, "learning_rate": 9.266106442577031e-06, "loss": 39.7803, "step": 6830 }, { "epoch": 162.644776119403, "grad_norm": 14.357563972473145, "learning_rate": 9.264705882352942e-06, "loss": 39.135, "step": 6831 }, { "epoch": 162.6686567164179, "grad_norm": 31.32781219482422, "learning_rate": 9.263305322128853e-06, "loss": 39.2126, "step": 6832 }, { "epoch": 162.69253731343284, "grad_norm": 23.357858657836914, "learning_rate": 9.261904761904763e-06, "loss": 39.1176, "step": 6833 }, { "epoch": 162.71641791044777, "grad_norm": 23.808473587036133, "learning_rate": 9.260504201680674e-06, "loss": 39.5406, "step": 6834 }, { "epoch": 162.74029850746268, "grad_norm": 32.2010498046875, "learning_rate": 9.259103641456585e-06, "loss": 39.0553, "step": 6835 }, { "epoch": 162.76417910447762, "grad_norm": 22.90511703491211, "learning_rate": 9.257703081232494e-06, "loss": 40.5556, "step": 6836 }, { "epoch": 162.78805970149253, "grad_norm": 35.206451416015625, "learning_rate": 9.256302521008404e-06, "loss": 38.774, "step": 6837 }, { "epoch": 162.81194029850747, "grad_norm": 27.09161949157715, "learning_rate": 9.254901960784315e-06, "loss": 39.1619, "step": 6838 }, { "epoch": 162.83582089552237, "grad_norm": 36.54391860961914, "learning_rate": 9.253501400560225e-06, "loss": 38.2451, "step": 6839 }, { "epoch": 162.8597014925373, "grad_norm": 28.31791877746582, "learning_rate": 9.252100840336136e-06, "loss": 39.4128, "step": 6840 }, { "epoch": 162.88358208955225, "grad_norm": 32.99971389770508, "learning_rate": 9.250700280112047e-06, "loss": 38.2235, "step": 6841 }, { "epoch": 162.90746268656716, "grad_norm": 32.75461959838867, "learning_rate": 9.249299719887956e-06, "loss": 38.1497, "step": 6842 }, { "epoch": 162.9313432835821, "grad_norm": 29.004566192626953, "learning_rate": 9.247899159663866e-06, "loss": 39.3836, "step": 6843 }, { "epoch": 162.955223880597, "grad_norm": 25.05014991760254, "learning_rate": 9.246498599439777e-06, "loss": 37.0636, "step": 6844 }, { "epoch": 162.97910447761194, "grad_norm": 31.873991012573242, "learning_rate": 9.245098039215688e-06, "loss": 39.1655, "step": 6845 }, { "epoch": 163.0, "grad_norm": 23.277997970581055, "learning_rate": 9.243697478991598e-06, "loss": 34.8144, "step": 6846 }, { "epoch": 163.02388059701494, "grad_norm": 36.877803802490234, "learning_rate": 9.242296918767509e-06, "loss": 39.8112, "step": 6847 }, { "epoch": 163.04776119402985, "grad_norm": 32.6607551574707, "learning_rate": 9.240896358543418e-06, "loss": 40.7393, "step": 6848 }, { "epoch": 163.07164179104478, "grad_norm": 31.25403594970703, "learning_rate": 9.239495798319328e-06, "loss": 38.3637, "step": 6849 }, { "epoch": 163.0955223880597, "grad_norm": 28.465002059936523, "learning_rate": 9.238095238095239e-06, "loss": 38.7156, "step": 6850 }, { "epoch": 163.11940298507463, "grad_norm": 26.194684982299805, "learning_rate": 9.23669467787115e-06, "loss": 38.4498, "step": 6851 }, { "epoch": 163.14328358208957, "grad_norm": 27.579038619995117, "learning_rate": 9.23529411764706e-06, "loss": 37.5274, "step": 6852 }, { "epoch": 163.16716417910447, "grad_norm": 29.72901725769043, "learning_rate": 9.233893557422971e-06, "loss": 38.1516, "step": 6853 }, { "epoch": 163.1910447761194, "grad_norm": 22.37741470336914, "learning_rate": 9.23249299719888e-06, "loss": 38.5496, "step": 6854 }, { "epoch": 163.21492537313432, "grad_norm": 34.037353515625, "learning_rate": 9.23109243697479e-06, "loss": 40.6135, "step": 6855 }, { "epoch": 163.23880597014926, "grad_norm": 27.039569854736328, "learning_rate": 9.229691876750701e-06, "loss": 38.4322, "step": 6856 }, { "epoch": 163.26268656716417, "grad_norm": 34.39824676513672, "learning_rate": 9.228291316526612e-06, "loss": 37.4896, "step": 6857 }, { "epoch": 163.2865671641791, "grad_norm": 33.6617546081543, "learning_rate": 9.226890756302523e-06, "loss": 39.0515, "step": 6858 }, { "epoch": 163.31044776119404, "grad_norm": 29.275096893310547, "learning_rate": 9.225490196078433e-06, "loss": 38.9624, "step": 6859 }, { "epoch": 163.33432835820895, "grad_norm": 28.370113372802734, "learning_rate": 9.224089635854344e-06, "loss": 39.8506, "step": 6860 }, { "epoch": 163.3582089552239, "grad_norm": 27.253931045532227, "learning_rate": 9.222689075630253e-06, "loss": 38.5453, "step": 6861 }, { "epoch": 163.3820895522388, "grad_norm": 23.24662971496582, "learning_rate": 9.221288515406163e-06, "loss": 38.912, "step": 6862 }, { "epoch": 163.40597014925373, "grad_norm": 32.55588912963867, "learning_rate": 9.219887955182074e-06, "loss": 38.7099, "step": 6863 }, { "epoch": 163.42985074626867, "grad_norm": 27.186988830566406, "learning_rate": 9.218487394957983e-06, "loss": 39.041, "step": 6864 }, { "epoch": 163.45373134328358, "grad_norm": 34.195472717285156, "learning_rate": 9.217086834733894e-06, "loss": 39.3666, "step": 6865 }, { "epoch": 163.47761194029852, "grad_norm": 31.793378829956055, "learning_rate": 9.215686274509804e-06, "loss": 39.8421, "step": 6866 }, { "epoch": 163.50149253731342, "grad_norm": 30.88374137878418, "learning_rate": 9.214285714285715e-06, "loss": 38.6998, "step": 6867 }, { "epoch": 163.52537313432836, "grad_norm": 26.248395919799805, "learning_rate": 9.212885154061626e-06, "loss": 38.059, "step": 6868 }, { "epoch": 163.54925373134327, "grad_norm": 32.238731384277344, "learning_rate": 9.211484593837534e-06, "loss": 38.0208, "step": 6869 }, { "epoch": 163.5731343283582, "grad_norm": 25.677013397216797, "learning_rate": 9.210084033613445e-06, "loss": 37.9852, "step": 6870 }, { "epoch": 163.59701492537314, "grad_norm": 28.29668617248535, "learning_rate": 9.208683473389356e-06, "loss": 40.2626, "step": 6871 }, { "epoch": 163.62089552238805, "grad_norm": 24.860496520996094, "learning_rate": 9.207282913165266e-06, "loss": 37.8531, "step": 6872 }, { "epoch": 163.644776119403, "grad_norm": 32.82472610473633, "learning_rate": 9.205882352941177e-06, "loss": 38.6631, "step": 6873 }, { "epoch": 163.6686567164179, "grad_norm": 27.975828170776367, "learning_rate": 9.204481792717088e-06, "loss": 39.5384, "step": 6874 }, { "epoch": 163.69253731343284, "grad_norm": 30.201351165771484, "learning_rate": 9.203081232492998e-06, "loss": 38.123, "step": 6875 }, { "epoch": 163.71641791044777, "grad_norm": 32.39480972290039, "learning_rate": 9.201680672268907e-06, "loss": 39.7359, "step": 6876 }, { "epoch": 163.74029850746268, "grad_norm": 27.57598304748535, "learning_rate": 9.200280112044818e-06, "loss": 38.0357, "step": 6877 }, { "epoch": 163.76417910447762, "grad_norm": NaN, "learning_rate": 9.198879551820729e-06, "loss": 38.8083, "step": 6878 }, { "epoch": 163.78805970149253, "grad_norm": 22.73164176940918, "learning_rate": 9.198879551820729e-06, "loss": 39.3746, "step": 6879 }, { "epoch": 163.81194029850747, "grad_norm": 32.470794677734375, "learning_rate": 9.19747899159664e-06, "loss": 38.4076, "step": 6880 }, { "epoch": 163.83582089552237, "grad_norm": 26.334978103637695, "learning_rate": 9.19607843137255e-06, "loss": 38.1445, "step": 6881 }, { "epoch": 163.8597014925373, "grad_norm": 32.576622009277344, "learning_rate": 9.19467787114846e-06, "loss": 38.0056, "step": 6882 }, { "epoch": 163.88358208955225, "grad_norm": 29.02097511291504, "learning_rate": 9.19327731092437e-06, "loss": 40.2424, "step": 6883 }, { "epoch": 163.90746268656716, "grad_norm": 29.78497886657715, "learning_rate": 9.19187675070028e-06, "loss": 37.9755, "step": 6884 }, { "epoch": 163.9313432835821, "grad_norm": 28.278854370117188, "learning_rate": 9.19047619047619e-06, "loss": 38.892, "step": 6885 }, { "epoch": 163.955223880597, "grad_norm": 28.68059730529785, "learning_rate": 9.189075630252101e-06, "loss": 37.8199, "step": 6886 }, { "epoch": 163.97910447761194, "grad_norm": 24.857322692871094, "learning_rate": 9.187675070028012e-06, "loss": 39.6917, "step": 6887 }, { "epoch": 164.0, "grad_norm": 27.12505340576172, "learning_rate": 9.186274509803923e-06, "loss": 34.1164, "step": 6888 }, { "epoch": 164.02388059701494, "grad_norm": 26.02627944946289, "learning_rate": 9.184873949579832e-06, "loss": 38.3949, "step": 6889 }, { "epoch": 164.04776119402985, "grad_norm": 33.95978546142578, "learning_rate": 9.183473389355742e-06, "loss": 39.4691, "step": 6890 }, { "epoch": 164.07164179104478, "grad_norm": 27.781116485595703, "learning_rate": 9.182072829131653e-06, "loss": 37.6125, "step": 6891 }, { "epoch": 164.0955223880597, "grad_norm": 27.846071243286133, "learning_rate": 9.180672268907563e-06, "loss": 37.5973, "step": 6892 }, { "epoch": 164.11940298507463, "grad_norm": 23.98561668395996, "learning_rate": 9.179271708683474e-06, "loss": 38.6246, "step": 6893 }, { "epoch": 164.14328358208957, "grad_norm": 28.748355865478516, "learning_rate": 9.177871148459385e-06, "loss": 39.1328, "step": 6894 }, { "epoch": 164.16716417910447, "grad_norm": 22.823392868041992, "learning_rate": 9.176470588235294e-06, "loss": 38.8228, "step": 6895 }, { "epoch": 164.1910447761194, "grad_norm": 31.56593894958496, "learning_rate": 9.175070028011204e-06, "loss": 39.3094, "step": 6896 }, { "epoch": 164.21492537313432, "grad_norm": 27.536691665649414, "learning_rate": 9.173669467787115e-06, "loss": 38.8276, "step": 6897 }, { "epoch": 164.23880597014926, "grad_norm": 28.798383712768555, "learning_rate": 9.172268907563026e-06, "loss": 37.6737, "step": 6898 }, { "epoch": 164.26268656716417, "grad_norm": 26.151948928833008, "learning_rate": 9.170868347338936e-06, "loss": 38.0184, "step": 6899 }, { "epoch": 164.2865671641791, "grad_norm": 32.22966003417969, "learning_rate": 9.169467787114847e-06, "loss": 39.1828, "step": 6900 }, { "epoch": 164.31044776119404, "grad_norm": 25.881643295288086, "learning_rate": 9.168067226890757e-06, "loss": 37.777, "step": 6901 }, { "epoch": 164.33432835820895, "grad_norm": 29.259702682495117, "learning_rate": 9.166666666666666e-06, "loss": 37.7434, "step": 6902 }, { "epoch": 164.3582089552239, "grad_norm": 24.89307403564453, "learning_rate": 9.165266106442577e-06, "loss": 37.8326, "step": 6903 }, { "epoch": 164.3820895522388, "grad_norm": 27.22188377380371, "learning_rate": 9.163865546218488e-06, "loss": 39.1603, "step": 6904 }, { "epoch": 164.40597014925373, "grad_norm": 23.618114471435547, "learning_rate": 9.162464985994398e-06, "loss": 38.3846, "step": 6905 }, { "epoch": 164.42985074626867, "grad_norm": 32.07040786743164, "learning_rate": 9.161064425770309e-06, "loss": 39.0751, "step": 6906 }, { "epoch": 164.45373134328358, "grad_norm": 29.454524993896484, "learning_rate": 9.15966386554622e-06, "loss": 38.2031, "step": 6907 }, { "epoch": 164.47761194029852, "grad_norm": 23.247859954833984, "learning_rate": 9.158263305322129e-06, "loss": 39.196, "step": 6908 }, { "epoch": 164.50149253731342, "grad_norm": 24.405054092407227, "learning_rate": 9.15686274509804e-06, "loss": 39.4669, "step": 6909 }, { "epoch": 164.52537313432836, "grad_norm": 24.326190948486328, "learning_rate": 9.15546218487395e-06, "loss": 39.9453, "step": 6910 }, { "epoch": 164.54925373134327, "grad_norm": 23.48666000366211, "learning_rate": 9.15406162464986e-06, "loss": 37.9608, "step": 6911 }, { "epoch": 164.5731343283582, "grad_norm": 28.095449447631836, "learning_rate": 9.152661064425771e-06, "loss": 38.9568, "step": 6912 }, { "epoch": 164.59701492537314, "grad_norm": 24.124006271362305, "learning_rate": 9.151260504201682e-06, "loss": 40.1764, "step": 6913 }, { "epoch": 164.62089552238805, "grad_norm": 27.309589385986328, "learning_rate": 9.14985994397759e-06, "loss": 40.3751, "step": 6914 }, { "epoch": 164.644776119403, "grad_norm": 22.699621200561523, "learning_rate": 9.148459383753501e-06, "loss": 39.709, "step": 6915 }, { "epoch": 164.6686567164179, "grad_norm": 28.5625, "learning_rate": 9.147058823529412e-06, "loss": 38.6687, "step": 6916 }, { "epoch": 164.69253731343284, "grad_norm": 24.676198959350586, "learning_rate": 9.145658263305323e-06, "loss": 38.8227, "step": 6917 }, { "epoch": 164.71641791044777, "grad_norm": 25.53683853149414, "learning_rate": 9.144257703081233e-06, "loss": 38.3309, "step": 6918 }, { "epoch": 164.74029850746268, "grad_norm": 21.91119384765625, "learning_rate": 9.142857142857144e-06, "loss": 38.1358, "step": 6919 }, { "epoch": 164.76417910447762, "grad_norm": 23.987483978271484, "learning_rate": 9.141456582633055e-06, "loss": 39.3783, "step": 6920 }, { "epoch": 164.78805970149253, "grad_norm": 21.90296173095703, "learning_rate": 9.140056022408963e-06, "loss": 38.4104, "step": 6921 }, { "epoch": 164.81194029850747, "grad_norm": 23.88947868347168, "learning_rate": 9.138655462184874e-06, "loss": 40.9021, "step": 6922 }, { "epoch": 164.83582089552237, "grad_norm": 19.73707389831543, "learning_rate": 9.137254901960785e-06, "loss": 39.4567, "step": 6923 }, { "epoch": 164.8597014925373, "grad_norm": 21.8206787109375, "learning_rate": 9.135854341736695e-06, "loss": 38.7045, "step": 6924 }, { "epoch": 164.88358208955225, "grad_norm": 20.918621063232422, "learning_rate": 9.134453781512606e-06, "loss": 39.1697, "step": 6925 }, { "epoch": 164.90746268656716, "grad_norm": 21.001279830932617, "learning_rate": 9.133053221288517e-06, "loss": 38.2225, "step": 6926 }, { "epoch": 164.9313432835821, "grad_norm": 16.66687774658203, "learning_rate": 9.131652661064426e-06, "loss": 38.0187, "step": 6927 }, { "epoch": 164.955223880597, "grad_norm": 20.349098205566406, "learning_rate": 9.130252100840336e-06, "loss": 38.9978, "step": 6928 }, { "epoch": 164.97910447761194, "grad_norm": 17.241241455078125, "learning_rate": 9.128851540616247e-06, "loss": 38.5722, "step": 6929 }, { "epoch": 165.0, "grad_norm": 20.30120086669922, "learning_rate": 9.127450980392158e-06, "loss": 33.074, "step": 6930 }, { "epoch": 165.02388059701494, "grad_norm": 20.583757400512695, "learning_rate": 9.126050420168068e-06, "loss": 38.28, "step": 6931 }, { "epoch": 165.04776119402985, "grad_norm": 17.925338745117188, "learning_rate": 9.124649859943979e-06, "loss": 39.5699, "step": 6932 }, { "epoch": 165.07164179104478, "grad_norm": 20.660139083862305, "learning_rate": 9.123249299719888e-06, "loss": 39.1526, "step": 6933 }, { "epoch": 165.0955223880597, "grad_norm": 20.247779846191406, "learning_rate": 9.121848739495798e-06, "loss": 38.3042, "step": 6934 }, { "epoch": 165.11940298507463, "grad_norm": 22.91403579711914, "learning_rate": 9.120448179271709e-06, "loss": 38.7246, "step": 6935 }, { "epoch": 165.14328358208957, "grad_norm": 18.240346908569336, "learning_rate": 9.11904761904762e-06, "loss": 40.6094, "step": 6936 }, { "epoch": 165.16716417910447, "grad_norm": 19.83465576171875, "learning_rate": 9.11764705882353e-06, "loss": 39.5535, "step": 6937 }, { "epoch": 165.1910447761194, "grad_norm": 17.511629104614258, "learning_rate": 9.116246498599441e-06, "loss": 39.8103, "step": 6938 }, { "epoch": 165.21492537313432, "grad_norm": 16.819786071777344, "learning_rate": 9.11484593837535e-06, "loss": 37.9015, "step": 6939 }, { "epoch": 165.23880597014926, "grad_norm": 19.494901657104492, "learning_rate": 9.11344537815126e-06, "loss": 37.4988, "step": 6940 }, { "epoch": 165.26268656716417, "grad_norm": 20.577810287475586, "learning_rate": 9.112044817927171e-06, "loss": 38.5511, "step": 6941 }, { "epoch": 165.2865671641791, "grad_norm": 17.430700302124023, "learning_rate": 9.110644257703082e-06, "loss": 39.5467, "step": 6942 }, { "epoch": 165.31044776119404, "grad_norm": 21.67786407470703, "learning_rate": 9.109243697478992e-06, "loss": 37.5904, "step": 6943 }, { "epoch": 165.33432835820895, "grad_norm": 24.708356857299805, "learning_rate": 9.107843137254903e-06, "loss": 38.7451, "step": 6944 }, { "epoch": 165.3582089552239, "grad_norm": 16.916446685791016, "learning_rate": 9.106442577030814e-06, "loss": 37.8958, "step": 6945 }, { "epoch": 165.3820895522388, "grad_norm": 23.504207611083984, "learning_rate": 9.105042016806723e-06, "loss": 39.1934, "step": 6946 }, { "epoch": 165.40597014925373, "grad_norm": 18.756376266479492, "learning_rate": 9.103641456582633e-06, "loss": 36.8792, "step": 6947 }, { "epoch": 165.42985074626867, "grad_norm": 18.65342140197754, "learning_rate": 9.102240896358544e-06, "loss": 37.893, "step": 6948 }, { "epoch": 165.45373134328358, "grad_norm": 19.376798629760742, "learning_rate": 9.100840336134455e-06, "loss": 39.4041, "step": 6949 }, { "epoch": 165.47761194029852, "grad_norm": 14.387222290039062, "learning_rate": 9.099439775910365e-06, "loss": 38.3171, "step": 6950 }, { "epoch": 165.50149253731342, "grad_norm": 22.488039016723633, "learning_rate": 9.098039215686276e-06, "loss": 37.5534, "step": 6951 }, { "epoch": 165.52537313432836, "grad_norm": 16.533842086791992, "learning_rate": 9.096638655462185e-06, "loss": 39.5571, "step": 6952 }, { "epoch": 165.54925373134327, "grad_norm": 21.6029052734375, "learning_rate": 9.095238095238095e-06, "loss": 39.4568, "step": 6953 }, { "epoch": 165.5731343283582, "grad_norm": 23.718917846679688, "learning_rate": 9.093837535014006e-06, "loss": 38.8227, "step": 6954 }, { "epoch": 165.59701492537314, "grad_norm": 15.316898345947266, "learning_rate": 9.092436974789917e-06, "loss": 38.7754, "step": 6955 }, { "epoch": 165.62089552238805, "grad_norm": 21.905000686645508, "learning_rate": 9.091036414565827e-06, "loss": 39.0251, "step": 6956 }, { "epoch": 165.644776119403, "grad_norm": 25.428035736083984, "learning_rate": 9.089635854341738e-06, "loss": 38.984, "step": 6957 }, { "epoch": 165.6686567164179, "grad_norm": 15.639355659484863, "learning_rate": 9.088235294117647e-06, "loss": 38.9392, "step": 6958 }, { "epoch": 165.69253731343284, "grad_norm": 30.744590759277344, "learning_rate": 9.086834733893558e-06, "loss": 38.2226, "step": 6959 }, { "epoch": 165.71641791044777, "grad_norm": 23.297607421875, "learning_rate": 9.085434173669468e-06, "loss": 38.6842, "step": 6960 }, { "epoch": 165.74029850746268, "grad_norm": 27.761877059936523, "learning_rate": 9.084033613445379e-06, "loss": 38.7044, "step": 6961 }, { "epoch": 165.76417910447762, "grad_norm": 20.486059188842773, "learning_rate": 9.08263305322129e-06, "loss": 38.9376, "step": 6962 }, { "epoch": 165.78805970149253, "grad_norm": 25.5533504486084, "learning_rate": 9.0812324929972e-06, "loss": 38.9618, "step": 6963 }, { "epoch": 165.81194029850747, "grad_norm": 19.12059783935547, "learning_rate": 9.07983193277311e-06, "loss": 38.6444, "step": 6964 }, { "epoch": 165.83582089552237, "grad_norm": 20.324451446533203, "learning_rate": 9.07843137254902e-06, "loss": 38.7041, "step": 6965 }, { "epoch": 165.8597014925373, "grad_norm": 19.719558715820312, "learning_rate": 9.07703081232493e-06, "loss": 39.5301, "step": 6966 }, { "epoch": 165.88358208955225, "grad_norm": 18.664348602294922, "learning_rate": 9.075630252100841e-06, "loss": 38.6139, "step": 6967 }, { "epoch": 165.90746268656716, "grad_norm": 22.657773971557617, "learning_rate": 9.074229691876752e-06, "loss": 39.0989, "step": 6968 }, { "epoch": 165.9313432835821, "grad_norm": 16.08565902709961, "learning_rate": 9.072829131652662e-06, "loss": 37.5231, "step": 6969 }, { "epoch": 165.955223880597, "grad_norm": 20.269485473632812, "learning_rate": 9.071428571428573e-06, "loss": 39.146, "step": 6970 }, { "epoch": 165.97910447761194, "grad_norm": 19.0544376373291, "learning_rate": 9.070028011204482e-06, "loss": 38.0774, "step": 6971 }, { "epoch": 166.0, "grad_norm": 17.171775817871094, "learning_rate": 9.068627450980392e-06, "loss": 35.643, "step": 6972 }, { "epoch": 166.02388059701494, "grad_norm": 16.68134307861328, "learning_rate": 9.067226890756303e-06, "loss": 38.4436, "step": 6973 }, { "epoch": 166.04776119402985, "grad_norm": 20.85126495361328, "learning_rate": 9.065826330532214e-06, "loss": 39.3827, "step": 6974 }, { "epoch": 166.07164179104478, "grad_norm": 16.558414459228516, "learning_rate": 9.064425770308124e-06, "loss": 39.2912, "step": 6975 }, { "epoch": 166.0955223880597, "grad_norm": 16.860647201538086, "learning_rate": 9.063025210084035e-06, "loss": 38.5577, "step": 6976 }, { "epoch": 166.11940298507463, "grad_norm": 18.252050399780273, "learning_rate": 9.061624649859944e-06, "loss": 38.5392, "step": 6977 }, { "epoch": 166.14328358208957, "grad_norm": 15.765396118164062, "learning_rate": 9.060224089635855e-06, "loss": 39.5898, "step": 6978 }, { "epoch": 166.16716417910447, "grad_norm": 18.337221145629883, "learning_rate": 9.058823529411765e-06, "loss": 37.3266, "step": 6979 }, { "epoch": 166.1910447761194, "grad_norm": 17.254228591918945, "learning_rate": 9.057422969187676e-06, "loss": 38.7531, "step": 6980 }, { "epoch": 166.21492537313432, "grad_norm": 15.587992668151855, "learning_rate": 9.056022408963587e-06, "loss": 39.1706, "step": 6981 }, { "epoch": 166.23880597014926, "grad_norm": 17.77199935913086, "learning_rate": 9.054621848739497e-06, "loss": 38.2599, "step": 6982 }, { "epoch": 166.26268656716417, "grad_norm": 18.72295570373535, "learning_rate": 9.053221288515406e-06, "loss": 39.0038, "step": 6983 }, { "epoch": 166.2865671641791, "grad_norm": 15.716513633728027, "learning_rate": 9.051820728291317e-06, "loss": 38.4302, "step": 6984 }, { "epoch": 166.31044776119404, "grad_norm": 18.48078727722168, "learning_rate": 9.050420168067227e-06, "loss": 38.8014, "step": 6985 }, { "epoch": 166.33432835820895, "grad_norm": 15.823141098022461, "learning_rate": 9.049019607843138e-06, "loss": 38.6901, "step": 6986 }, { "epoch": 166.3582089552239, "grad_norm": 19.339231491088867, "learning_rate": 9.047619047619049e-06, "loss": 38.4488, "step": 6987 }, { "epoch": 166.3820895522388, "grad_norm": 18.258453369140625, "learning_rate": 9.04621848739496e-06, "loss": 37.8733, "step": 6988 }, { "epoch": 166.40597014925373, "grad_norm": 15.638240814208984, "learning_rate": 9.04481792717087e-06, "loss": 36.9098, "step": 6989 }, { "epoch": 166.42985074626867, "grad_norm": 19.154794692993164, "learning_rate": 9.043417366946779e-06, "loss": 37.4268, "step": 6990 }, { "epoch": 166.45373134328358, "grad_norm": 14.776188850402832, "learning_rate": 9.04201680672269e-06, "loss": 39.6194, "step": 6991 }, { "epoch": 166.47761194029852, "grad_norm": 18.53327751159668, "learning_rate": 9.0406162464986e-06, "loss": 38.2096, "step": 6992 }, { "epoch": 166.50149253731342, "grad_norm": 16.842300415039062, "learning_rate": 9.03921568627451e-06, "loss": 38.5841, "step": 6993 }, { "epoch": 166.52537313432836, "grad_norm": 15.047019004821777, "learning_rate": 9.037815126050421e-06, "loss": 38.4693, "step": 6994 }, { "epoch": 166.54925373134327, "grad_norm": 24.15064811706543, "learning_rate": 9.036414565826332e-06, "loss": 39.4956, "step": 6995 }, { "epoch": 166.5731343283582, "grad_norm": 18.957366943359375, "learning_rate": 9.035014005602241e-06, "loss": 38.1947, "step": 6996 }, { "epoch": 166.59701492537314, "grad_norm": 14.240507125854492, "learning_rate": 9.033613445378152e-06, "loss": 38.1626, "step": 6997 }, { "epoch": 166.62089552238805, "grad_norm": 23.42329978942871, "learning_rate": 9.032212885154062e-06, "loss": 39.1337, "step": 6998 }, { "epoch": 166.644776119403, "grad_norm": 20.22484016418457, "learning_rate": 9.030812324929973e-06, "loss": 38.7054, "step": 6999 }, { "epoch": 166.6686567164179, "grad_norm": 16.616657257080078, "learning_rate": 9.029411764705884e-06, "loss": 39.5361, "step": 7000 }, { "epoch": 166.69253731343284, "grad_norm": 18.058732986450195, "learning_rate": 9.028011204481794e-06, "loss": 39.2417, "step": 7001 }, { "epoch": 166.71641791044777, "grad_norm": 18.702417373657227, "learning_rate": 9.026610644257703e-06, "loss": 39.4343, "step": 7002 }, { "epoch": 166.74029850746268, "grad_norm": NaN, "learning_rate": 9.025210084033614e-06, "loss": 34.1441, "step": 7003 }, { "epoch": 166.76417910447762, "grad_norm": 15.148275375366211, "learning_rate": 9.025210084033614e-06, "loss": 38.7294, "step": 7004 }, { "epoch": 166.78805970149253, "grad_norm": 17.811681747436523, "learning_rate": 9.023809523809524e-06, "loss": 40.2746, "step": 7005 }, { "epoch": 166.81194029850747, "grad_norm": 15.742315292358398, "learning_rate": 9.022408963585435e-06, "loss": 39.8818, "step": 7006 }, { "epoch": 166.83582089552237, "grad_norm": 16.79622459411621, "learning_rate": 9.021008403361346e-06, "loss": 38.1164, "step": 7007 }, { "epoch": 166.8597014925373, "grad_norm": 17.417369842529297, "learning_rate": 9.019607843137256e-06, "loss": 39.9128, "step": 7008 }, { "epoch": 166.88358208955225, "grad_norm": 18.435260772705078, "learning_rate": 9.018207282913165e-06, "loss": 39.2292, "step": 7009 }, { "epoch": 166.90746268656716, "grad_norm": 14.42545223236084, "learning_rate": 9.016806722689076e-06, "loss": 37.3983, "step": 7010 }, { "epoch": 166.9313432835821, "grad_norm": 19.327251434326172, "learning_rate": 9.015406162464987e-06, "loss": 38.8978, "step": 7011 }, { "epoch": 166.955223880597, "grad_norm": 23.709386825561523, "learning_rate": 9.014005602240897e-06, "loss": 37.6597, "step": 7012 }, { "epoch": 166.97910447761194, "grad_norm": 13.446073532104492, "learning_rate": 9.012605042016808e-06, "loss": 38.0333, "step": 7013 }, { "epoch": 167.0, "grad_norm": 17.742656707763672, "learning_rate": 9.011204481792719e-06, "loss": 33.7607, "step": 7014 }, { "epoch": 167.02388059701494, "grad_norm": 24.789691925048828, "learning_rate": 9.009803921568629e-06, "loss": 37.8773, "step": 7015 }, { "epoch": 167.04776119402985, "grad_norm": 15.1881685256958, "learning_rate": 9.008403361344538e-06, "loss": 38.3267, "step": 7016 }, { "epoch": 167.07164179104478, "grad_norm": 26.128843307495117, "learning_rate": 9.007002801120449e-06, "loss": 38.5114, "step": 7017 }, { "epoch": 167.0955223880597, "grad_norm": 18.408493041992188, "learning_rate": 9.00560224089636e-06, "loss": 39.4748, "step": 7018 }, { "epoch": 167.11940298507463, "grad_norm": 30.897546768188477, "learning_rate": 9.00420168067227e-06, "loss": 38.585, "step": 7019 }, { "epoch": 167.14328358208957, "grad_norm": 21.582815170288086, "learning_rate": 9.00280112044818e-06, "loss": 38.478, "step": 7020 }, { "epoch": 167.16716417910447, "grad_norm": 22.242801666259766, "learning_rate": 9.001400560224091e-06, "loss": 38.6737, "step": 7021 }, { "epoch": 167.1910447761194, "grad_norm": 21.05809211730957, "learning_rate": 9e-06, "loss": 37.014, "step": 7022 }, { "epoch": 167.21492537313432, "grad_norm": 17.701534271240234, "learning_rate": 8.998599439775911e-06, "loss": 37.6012, "step": 7023 }, { "epoch": 167.23880597014926, "grad_norm": 14.203514099121094, "learning_rate": 8.997198879551822e-06, "loss": 38.7506, "step": 7024 }, { "epoch": 167.26268656716417, "grad_norm": 21.035812377929688, "learning_rate": 8.995798319327732e-06, "loss": 39.2168, "step": 7025 }, { "epoch": 167.2865671641791, "grad_norm": 16.741291046142578, "learning_rate": 8.994397759103643e-06, "loss": 37.6171, "step": 7026 }, { "epoch": 167.31044776119404, "grad_norm": 20.702959060668945, "learning_rate": 8.992997198879553e-06, "loss": 39.8419, "step": 7027 }, { "epoch": 167.33432835820895, "grad_norm": 15.385114669799805, "learning_rate": 8.991596638655462e-06, "loss": 40.0453, "step": 7028 }, { "epoch": 167.3582089552239, "grad_norm": 20.197725296020508, "learning_rate": 8.990196078431373e-06, "loss": 37.4206, "step": 7029 }, { "epoch": 167.3820895522388, "grad_norm": 16.919689178466797, "learning_rate": 8.988795518207284e-06, "loss": 38.1808, "step": 7030 }, { "epoch": 167.40597014925373, "grad_norm": 18.998701095581055, "learning_rate": 8.987394957983194e-06, "loss": 38.792, "step": 7031 }, { "epoch": 167.42985074626867, "grad_norm": 17.395341873168945, "learning_rate": 8.985994397759105e-06, "loss": 39.3755, "step": 7032 }, { "epoch": 167.45373134328358, "grad_norm": 16.7122745513916, "learning_rate": 8.984593837535016e-06, "loss": 38.39, "step": 7033 }, { "epoch": 167.47761194029852, "grad_norm": 16.960344314575195, "learning_rate": 8.983193277310926e-06, "loss": 38.9375, "step": 7034 }, { "epoch": 167.50149253731342, "grad_norm": 18.20403480529785, "learning_rate": 8.981792717086835e-06, "loss": 38.3068, "step": 7035 }, { "epoch": 167.52537313432836, "grad_norm": 15.392725944519043, "learning_rate": 8.980392156862746e-06, "loss": 37.5274, "step": 7036 }, { "epoch": 167.54925373134327, "grad_norm": 17.684289932250977, "learning_rate": 8.978991596638656e-06, "loss": 38.959, "step": 7037 }, { "epoch": 167.5731343283582, "grad_norm": 15.469087600708008, "learning_rate": 8.977591036414567e-06, "loss": 37.1788, "step": 7038 }, { "epoch": 167.59701492537314, "grad_norm": 19.313865661621094, "learning_rate": 8.976190476190478e-06, "loss": 39.7805, "step": 7039 }, { "epoch": 167.62089552238805, "grad_norm": 17.780128479003906, "learning_rate": 8.974789915966388e-06, "loss": 38.685, "step": 7040 }, { "epoch": 167.644776119403, "grad_norm": 15.168746948242188, "learning_rate": 8.973389355742297e-06, "loss": 39.9769, "step": 7041 }, { "epoch": 167.6686567164179, "grad_norm": 19.67713165283203, "learning_rate": 8.971988795518208e-06, "loss": 38.8518, "step": 7042 }, { "epoch": 167.69253731343284, "grad_norm": 13.875410079956055, "learning_rate": 8.970588235294119e-06, "loss": 38.6862, "step": 7043 }, { "epoch": 167.71641791044777, "grad_norm": 17.410036087036133, "learning_rate": 8.96918767507003e-06, "loss": 37.2191, "step": 7044 }, { "epoch": 167.74029850746268, "grad_norm": 12.649752616882324, "learning_rate": 8.96778711484594e-06, "loss": 38.9077, "step": 7045 }, { "epoch": 167.76417910447762, "grad_norm": 18.6796932220459, "learning_rate": 8.96638655462185e-06, "loss": 39.6732, "step": 7046 }, { "epoch": 167.78805970149253, "grad_norm": 20.211078643798828, "learning_rate": 8.96498599439776e-06, "loss": 39.0417, "step": 7047 }, { "epoch": 167.81194029850747, "grad_norm": 16.24715805053711, "learning_rate": 8.96358543417367e-06, "loss": 40.6287, "step": 7048 }, { "epoch": 167.83582089552237, "grad_norm": 14.665667533874512, "learning_rate": 8.96218487394958e-06, "loss": 38.0857, "step": 7049 }, { "epoch": 167.8597014925373, "grad_norm": 16.91412925720215, "learning_rate": 8.960784313725491e-06, "loss": 37.7777, "step": 7050 }, { "epoch": 167.88358208955225, "grad_norm": 16.80457878112793, "learning_rate": 8.959383753501402e-06, "loss": 38.633, "step": 7051 }, { "epoch": 167.90746268656716, "grad_norm": 17.991159439086914, "learning_rate": 8.957983193277313e-06, "loss": 39.2465, "step": 7052 }, { "epoch": 167.9313432835821, "grad_norm": 16.38360595703125, "learning_rate": 8.956582633053222e-06, "loss": 39.0996, "step": 7053 }, { "epoch": 167.955223880597, "grad_norm": 15.639243125915527, "learning_rate": 8.955182072829132e-06, "loss": 37.9887, "step": 7054 }, { "epoch": 167.97910447761194, "grad_norm": 13.691624641418457, "learning_rate": 8.953781512605043e-06, "loss": 39.3299, "step": 7055 }, { "epoch": 168.0, "grad_norm": 13.173548698425293, "learning_rate": 8.952380952380953e-06, "loss": 33.9401, "step": 7056 }, { "epoch": 168.02388059701494, "grad_norm": 15.656696319580078, "learning_rate": 8.950980392156864e-06, "loss": 39.6606, "step": 7057 }, { "epoch": 168.04776119402985, "grad_norm": 17.121431350708008, "learning_rate": 8.949579831932775e-06, "loss": 40.1573, "step": 7058 }, { "epoch": 168.07164179104478, "grad_norm": 15.972476959228516, "learning_rate": 8.948179271708685e-06, "loss": 38.4846, "step": 7059 }, { "epoch": 168.0955223880597, "grad_norm": 13.958266258239746, "learning_rate": 8.946778711484594e-06, "loss": 39.4852, "step": 7060 }, { "epoch": 168.11940298507463, "grad_norm": 18.454063415527344, "learning_rate": 8.945378151260505e-06, "loss": 38.1036, "step": 7061 }, { "epoch": 168.14328358208957, "grad_norm": 17.495267868041992, "learning_rate": 8.943977591036416e-06, "loss": 37.9424, "step": 7062 }, { "epoch": 168.16716417910447, "grad_norm": 15.519514083862305, "learning_rate": 8.942577030812326e-06, "loss": 38.6599, "step": 7063 }, { "epoch": 168.1910447761194, "grad_norm": 20.250783920288086, "learning_rate": 8.941176470588237e-06, "loss": 37.5316, "step": 7064 }, { "epoch": 168.21492537313432, "grad_norm": 16.71542739868164, "learning_rate": 8.939775910364148e-06, "loss": 39.2917, "step": 7065 }, { "epoch": 168.23880597014926, "grad_norm": 15.245902061462402, "learning_rate": 8.938375350140056e-06, "loss": 39.1659, "step": 7066 }, { "epoch": 168.26268656716417, "grad_norm": 17.60688018798828, "learning_rate": 8.936974789915967e-06, "loss": 39.6018, "step": 7067 }, { "epoch": 168.2865671641791, "grad_norm": 23.204566955566406, "learning_rate": 8.935574229691878e-06, "loss": 39.2939, "step": 7068 }, { "epoch": 168.31044776119404, "grad_norm": 16.920000076293945, "learning_rate": 8.934173669467788e-06, "loss": 37.6469, "step": 7069 }, { "epoch": 168.33432835820895, "grad_norm": 17.549373626708984, "learning_rate": 8.932773109243699e-06, "loss": 38.3607, "step": 7070 }, { "epoch": 168.3582089552239, "grad_norm": 28.0925350189209, "learning_rate": 8.93137254901961e-06, "loss": 38.8907, "step": 7071 }, { "epoch": 168.3820895522388, "grad_norm": 21.194316864013672, "learning_rate": 8.929971988795519e-06, "loss": 38.6106, "step": 7072 }, { "epoch": 168.40597014925373, "grad_norm": 29.696517944335938, "learning_rate": 8.92857142857143e-06, "loss": 38.6381, "step": 7073 }, { "epoch": 168.42985074626867, "grad_norm": 21.88129997253418, "learning_rate": 8.92717086834734e-06, "loss": 37.0661, "step": 7074 }, { "epoch": 168.45373134328358, "grad_norm": 22.167688369750977, "learning_rate": 8.92577030812325e-06, "loss": 38.1369, "step": 7075 }, { "epoch": 168.47761194029852, "grad_norm": 25.323083877563477, "learning_rate": 8.924369747899161e-06, "loss": 37.809, "step": 7076 }, { "epoch": 168.50149253731342, "grad_norm": 15.450112342834473, "learning_rate": 8.922969187675072e-06, "loss": 38.1747, "step": 7077 }, { "epoch": 168.52537313432836, "grad_norm": 22.90835189819336, "learning_rate": 8.921568627450982e-06, "loss": 37.6116, "step": 7078 }, { "epoch": 168.54925373134327, "grad_norm": 17.67413330078125, "learning_rate": 8.920168067226891e-06, "loss": 38.3698, "step": 7079 }, { "epoch": 168.5731343283582, "grad_norm": 20.223190307617188, "learning_rate": 8.918767507002802e-06, "loss": 39.1152, "step": 7080 }, { "epoch": 168.59701492537314, "grad_norm": 23.67002296447754, "learning_rate": 8.917366946778713e-06, "loss": 38.104, "step": 7081 }, { "epoch": 168.62089552238805, "grad_norm": 17.217201232910156, "learning_rate": 8.915966386554623e-06, "loss": 39.2392, "step": 7082 }, { "epoch": 168.644776119403, "grad_norm": 22.82044792175293, "learning_rate": 8.914565826330534e-06, "loss": 38.5469, "step": 7083 }, { "epoch": 168.6686567164179, "grad_norm": 22.710506439208984, "learning_rate": 8.913165266106445e-06, "loss": 38.6132, "step": 7084 }, { "epoch": 168.69253731343284, "grad_norm": 15.312932014465332, "learning_rate": 8.911764705882354e-06, "loss": 39.6502, "step": 7085 }, { "epoch": 168.71641791044777, "grad_norm": 22.852859497070312, "learning_rate": 8.910364145658264e-06, "loss": 38.7171, "step": 7086 }, { "epoch": 168.74029850746268, "grad_norm": 18.29657554626465, "learning_rate": 8.908963585434175e-06, "loss": 38.8535, "step": 7087 }, { "epoch": 168.76417910447762, "grad_norm": 15.349685668945312, "learning_rate": 8.907563025210085e-06, "loss": 37.8596, "step": 7088 }, { "epoch": 168.78805970149253, "grad_norm": 19.419158935546875, "learning_rate": 8.906162464985994e-06, "loss": 38.5777, "step": 7089 }, { "epoch": 168.81194029850747, "grad_norm": 17.963842391967773, "learning_rate": 8.904761904761905e-06, "loss": 38.8288, "step": 7090 }, { "epoch": 168.83582089552237, "grad_norm": 19.185089111328125, "learning_rate": 8.903361344537816e-06, "loss": 38.0491, "step": 7091 }, { "epoch": 168.8597014925373, "grad_norm": 25.85097312927246, "learning_rate": 8.901960784313726e-06, "loss": 37.4236, "step": 7092 }, { "epoch": 168.88358208955225, "grad_norm": 16.209335327148438, "learning_rate": 8.900560224089635e-06, "loss": 39.0322, "step": 7093 }, { "epoch": 168.90746268656716, "grad_norm": 27.616640090942383, "learning_rate": 8.899159663865546e-06, "loss": 38.5832, "step": 7094 }, { "epoch": 168.9313432835821, "grad_norm": 19.664894104003906, "learning_rate": 8.897759103641457e-06, "loss": 37.6454, "step": 7095 }, { "epoch": 168.955223880597, "grad_norm": 20.211137771606445, "learning_rate": 8.896358543417367e-06, "loss": 39.4748, "step": 7096 }, { "epoch": 168.97910447761194, "grad_norm": 23.72620391845703, "learning_rate": 8.894957983193278e-06, "loss": 39.8556, "step": 7097 }, { "epoch": 169.0, "grad_norm": 11.977401733398438, "learning_rate": 8.893557422969188e-06, "loss": 33.6459, "step": 7098 }, { "epoch": 169.02388059701494, "grad_norm": 27.021682739257812, "learning_rate": 8.892156862745099e-06, "loss": 40.1441, "step": 7099 }, { "epoch": 169.04776119402985, "grad_norm": 16.188669204711914, "learning_rate": 8.890756302521008e-06, "loss": 38.0459, "step": 7100 }, { "epoch": 169.07164179104478, "grad_norm": 24.323711395263672, "learning_rate": 8.889355742296919e-06, "loss": 38.386, "step": 7101 }, { "epoch": 169.0955223880597, "grad_norm": 22.40289878845215, "learning_rate": 8.88795518207283e-06, "loss": 37.6409, "step": 7102 }, { "epoch": 169.11940298507463, "grad_norm": 17.63547706604004, "learning_rate": 8.88655462184874e-06, "loss": 37.1451, "step": 7103 }, { "epoch": 169.14328358208957, "grad_norm": 35.601951599121094, "learning_rate": 8.88515406162465e-06, "loss": 39.6787, "step": 7104 }, { "epoch": 169.16716417910447, "grad_norm": 28.64064598083496, "learning_rate": 8.883753501400561e-06, "loss": 39.7639, "step": 7105 }, { "epoch": 169.1910447761194, "grad_norm": 37.155372619628906, "learning_rate": 8.88235294117647e-06, "loss": 37.6808, "step": 7106 }, { "epoch": 169.21492537313432, "grad_norm": 29.988176345825195, "learning_rate": 8.88095238095238e-06, "loss": 38.2382, "step": 7107 }, { "epoch": 169.23880597014926, "grad_norm": 32.40060806274414, "learning_rate": 8.879551820728291e-06, "loss": 38.6155, "step": 7108 }, { "epoch": 169.26268656716417, "grad_norm": 29.709169387817383, "learning_rate": 8.878151260504202e-06, "loss": 38.6187, "step": 7109 }, { "epoch": 169.2865671641791, "grad_norm": 28.023569107055664, "learning_rate": 8.876750700280113e-06, "loss": 39.2048, "step": 7110 }, { "epoch": 169.31044776119404, "grad_norm": 24.473493576049805, "learning_rate": 8.875350140056023e-06, "loss": 38.3527, "step": 7111 }, { "epoch": 169.33432835820895, "grad_norm": 33.315338134765625, "learning_rate": 8.873949579831932e-06, "loss": 37.1304, "step": 7112 }, { "epoch": 169.3582089552239, "grad_norm": 28.781728744506836, "learning_rate": 8.872549019607843e-06, "loss": 38.6562, "step": 7113 }, { "epoch": 169.3820895522388, "grad_norm": 33.044647216796875, "learning_rate": 8.871148459383754e-06, "loss": 38.6907, "step": 7114 }, { "epoch": 169.40597014925373, "grad_norm": 28.969144821166992, "learning_rate": 8.869747899159664e-06, "loss": 39.0103, "step": 7115 }, { "epoch": 169.42985074626867, "grad_norm": 29.890914916992188, "learning_rate": 8.868347338935575e-06, "loss": 36.4496, "step": 7116 }, { "epoch": 169.45373134328358, "grad_norm": 29.558334350585938, "learning_rate": 8.866946778711485e-06, "loss": 38.781, "step": 7117 }, { "epoch": 169.47761194029852, "grad_norm": 28.465272903442383, "learning_rate": 8.865546218487396e-06, "loss": 37.38, "step": 7118 }, { "epoch": 169.50149253731342, "grad_norm": 26.63448143005371, "learning_rate": 8.864145658263305e-06, "loss": 38.3743, "step": 7119 }, { "epoch": 169.52537313432836, "grad_norm": 33.672149658203125, "learning_rate": 8.862745098039216e-06, "loss": 37.1088, "step": 7120 }, { "epoch": 169.54925373134327, "grad_norm": 27.566909790039062, "learning_rate": 8.861344537815126e-06, "loss": 39.3635, "step": 7121 }, { "epoch": 169.5731343283582, "grad_norm": 30.72598648071289, "learning_rate": 8.859943977591037e-06, "loss": 37.5061, "step": 7122 }, { "epoch": 169.59701492537314, "grad_norm": 21.491497039794922, "learning_rate": 8.858543417366948e-06, "loss": 37.4045, "step": 7123 }, { "epoch": 169.62089552238805, "grad_norm": 31.785451889038086, "learning_rate": 8.857142857142858e-06, "loss": 37.2397, "step": 7124 }, { "epoch": 169.644776119403, "grad_norm": 28.889570236206055, "learning_rate": 8.855742296918767e-06, "loss": 38.2749, "step": 7125 }, { "epoch": 169.6686567164179, "grad_norm": 29.61405372619629, "learning_rate": 8.854341736694678e-06, "loss": 39.3158, "step": 7126 }, { "epoch": 169.69253731343284, "grad_norm": 27.74846076965332, "learning_rate": 8.852941176470588e-06, "loss": 39.5199, "step": 7127 }, { "epoch": 169.71641791044777, "grad_norm": 31.886384963989258, "learning_rate": 8.851540616246499e-06, "loss": 37.9009, "step": 7128 }, { "epoch": 169.74029850746268, "grad_norm": 28.313329696655273, "learning_rate": 8.85014005602241e-06, "loss": 39.9385, "step": 7129 }, { "epoch": 169.76417910447762, "grad_norm": 30.50246810913086, "learning_rate": 8.84873949579832e-06, "loss": 38.9029, "step": 7130 }, { "epoch": 169.78805970149253, "grad_norm": 28.30780792236328, "learning_rate": 8.84733893557423e-06, "loss": 39.1536, "step": 7131 }, { "epoch": 169.81194029850747, "grad_norm": 29.161802291870117, "learning_rate": 8.84593837535014e-06, "loss": 39.0592, "step": 7132 }, { "epoch": 169.83582089552237, "grad_norm": 24.86357307434082, "learning_rate": 8.84453781512605e-06, "loss": 39.0002, "step": 7133 }, { "epoch": 169.8597014925373, "grad_norm": 36.86708068847656, "learning_rate": 8.843137254901961e-06, "loss": 38.9471, "step": 7134 }, { "epoch": 169.88358208955225, "grad_norm": 32.14461898803711, "learning_rate": 8.841736694677872e-06, "loss": 38.8807, "step": 7135 }, { "epoch": 169.90746268656716, "grad_norm": 30.886720657348633, "learning_rate": 8.840336134453783e-06, "loss": 39.0135, "step": 7136 }, { "epoch": 169.9313432835821, "grad_norm": 26.017770767211914, "learning_rate": 8.838935574229691e-06, "loss": 39.394, "step": 7137 }, { "epoch": 169.955223880597, "grad_norm": 27.808815002441406, "learning_rate": 8.837535014005602e-06, "loss": 38.4624, "step": 7138 }, { "epoch": 169.97910447761194, "grad_norm": 24.808860778808594, "learning_rate": 8.836134453781513e-06, "loss": 39.5112, "step": 7139 }, { "epoch": 170.0, "grad_norm": 26.195302963256836, "learning_rate": 8.834733893557423e-06, "loss": 34.1301, "step": 7140 }, { "epoch": 170.0, "step": 7140, "total_flos": 3.510199823180317e+17, "train_loss": 4.586799935733571, "train_runtime": 25702.2082, "train_samples_per_second": 35.399, "train_steps_per_second": 0.278 }, { "epoch": 170.02388059701494, "grad_norm": 27.166627883911133, "learning_rate": 1e-05, "loss": 39.5213, "step": 7141 }, { "epoch": 170.04776119402985, "grad_norm": Infinity, "learning_rate": 9.99874686716792e-06, "loss": 44.4595, "step": 7142 }, { "epoch": 170.07164179104478, "grad_norm": Infinity, "learning_rate": 9.99874686716792e-06, "loss": 47.7558, "step": 7143 }, { "epoch": 170.0955223880597, "grad_norm": 454.9642639160156, "learning_rate": 9.99874686716792e-06, "loss": 47.5788, "step": 7144 }, { "epoch": 170.11940298507463, "grad_norm": 259.2172546386719, "learning_rate": 9.99749373433584e-06, "loss": 44.2854, "step": 7145 }, { "epoch": 170.14328358208957, "grad_norm": 93.37568664550781, "learning_rate": 9.996240601503761e-06, "loss": 41.6306, "step": 7146 }, { "epoch": 170.16716417910447, "grad_norm": 83.61749267578125, "learning_rate": 9.99498746867168e-06, "loss": 40.4442, "step": 7147 }, { "epoch": 170.1910447761194, "grad_norm": 66.74041748046875, "learning_rate": 9.9937343358396e-06, "loss": 39.8658, "step": 7148 }, { "epoch": 170.21492537313432, "grad_norm": 60.166690826416016, "learning_rate": 9.992481203007518e-06, "loss": 40.5291, "step": 7149 }, { "epoch": 170.23880597014926, "grad_norm": 49.17573165893555, "learning_rate": 9.99122807017544e-06, "loss": 38.687, "step": 7150 }, { "epoch": 170.26268656716417, "grad_norm": 53.34871292114258, "learning_rate": 9.98997493734336e-06, "loss": 38.0343, "step": 7151 }, { "epoch": 170.2865671641791, "grad_norm": 36.63299560546875, "learning_rate": 9.988721804511279e-06, "loss": 38.4072, "step": 7152 }, { "epoch": 170.31044776119404, "grad_norm": 51.223777770996094, "learning_rate": 9.987468671679199e-06, "loss": 38.6381, "step": 7153 }, { "epoch": 170.33432835820895, "grad_norm": 29.117027282714844, "learning_rate": 9.986215538847118e-06, "loss": 39.8293, "step": 7154 }, { "epoch": 170.3582089552239, "grad_norm": 42.85747146606445, "learning_rate": 9.984962406015038e-06, "loss": 38.4093, "step": 7155 }, { "epoch": 170.3820895522388, "grad_norm": 24.090818405151367, "learning_rate": 9.983709273182957e-06, "loss": 38.7232, "step": 7156 }, { "epoch": 170.40597014925373, "grad_norm": 37.851863861083984, "learning_rate": 9.982456140350879e-06, "loss": 39.2879, "step": 7157 }, { "epoch": 170.42985074626867, "grad_norm": 20.849395751953125, "learning_rate": 9.981203007518798e-06, "loss": 38.5625, "step": 7158 }, { "epoch": 170.45373134328358, "grad_norm": 32.551849365234375, "learning_rate": 9.979949874686718e-06, "loss": 37.3203, "step": 7159 }, { "epoch": 170.47761194029852, "grad_norm": 23.6810359954834, "learning_rate": 9.978696741854637e-06, "loss": 37.7778, "step": 7160 }, { "epoch": 170.50149253731342, "grad_norm": 26.827194213867188, "learning_rate": 9.977443609022557e-06, "loss": 38.5357, "step": 7161 }, { "epoch": 170.52537313432836, "grad_norm": 25.67653465270996, "learning_rate": 9.976190476190477e-06, "loss": 38.2528, "step": 7162 }, { "epoch": 170.54925373134327, "grad_norm": 25.528614044189453, "learning_rate": 9.974937343358396e-06, "loss": 38.9571, "step": 7163 }, { "epoch": 170.5731343283582, "grad_norm": 17.796689987182617, "learning_rate": 9.973684210526316e-06, "loss": 39.4661, "step": 7164 }, { "epoch": 170.59701492537314, "grad_norm": 25.79865264892578, "learning_rate": 9.972431077694237e-06, "loss": 38.3182, "step": 7165 }, { "epoch": 170.62089552238805, "grad_norm": 21.690196990966797, "learning_rate": 9.971177944862157e-06, "loss": 38.4445, "step": 7166 }, { "epoch": 170.644776119403, "grad_norm": 18.10161590576172, "learning_rate": 9.969924812030076e-06, "loss": 39.3527, "step": 7167 }, { "epoch": 170.6686567164179, "grad_norm": 19.185293197631836, "learning_rate": 9.968671679197996e-06, "loss": 38.0544, "step": 7168 }, { "epoch": 170.69253731343284, "grad_norm": 19.368209838867188, "learning_rate": 9.967418546365915e-06, "loss": 39.4563, "step": 7169 }, { "epoch": 170.71641791044777, "grad_norm": 15.173422813415527, "learning_rate": 9.966165413533837e-06, "loss": 38.7524, "step": 7170 }, { "epoch": 170.74029850746268, "grad_norm": 19.56130027770996, "learning_rate": 9.964912280701755e-06, "loss": 38.7178, "step": 7171 }, { "epoch": 170.76417910447762, "grad_norm": 18.328960418701172, "learning_rate": 9.963659147869676e-06, "loss": 39.8653, "step": 7172 }, { "epoch": 170.78805970149253, "grad_norm": 17.148019790649414, "learning_rate": 9.962406015037594e-06, "loss": 39.0803, "step": 7173 }, { "epoch": 170.81194029850747, "grad_norm": 19.625036239624023, "learning_rate": 9.961152882205515e-06, "loss": 39.3432, "step": 7174 }, { "epoch": 170.83582089552237, "grad_norm": 24.38473129272461, "learning_rate": 9.959899749373435e-06, "loss": 38.8941, "step": 7175 }, { "epoch": 170.8597014925373, "grad_norm": 13.859121322631836, "learning_rate": 9.958646616541354e-06, "loss": 37.8674, "step": 7176 }, { "epoch": 170.88358208955225, "grad_norm": 28.161521911621094, "learning_rate": 9.957393483709274e-06, "loss": 39.448, "step": 7177 }, { "epoch": 170.90746268656716, "grad_norm": 20.711326599121094, "learning_rate": 9.956140350877194e-06, "loss": 39.11, "step": 7178 }, { "epoch": 170.9313432835821, "grad_norm": 19.910417556762695, "learning_rate": 9.954887218045113e-06, "loss": 39.1011, "step": 7179 }, { "epoch": 170.955223880597, "grad_norm": 26.991012573242188, "learning_rate": 9.953634085213033e-06, "loss": 37.7999, "step": 7180 }, { "epoch": 170.97910447761194, "grad_norm": 17.512699127197266, "learning_rate": 9.952380952380954e-06, "loss": 37.333, "step": 7181 }, { "epoch": 171.0, "grad_norm": 31.227685928344727, "learning_rate": 9.951127819548872e-06, "loss": 33.6615, "step": 7182 }, { "epoch": 171.02388059701494, "grad_norm": 22.83246421813965, "learning_rate": 9.949874686716793e-06, "loss": 38.4419, "step": 7183 }, { "epoch": 171.04776119402985, "grad_norm": 40.81578063964844, "learning_rate": 9.948621553884713e-06, "loss": 38.3418, "step": 7184 }, { "epoch": 171.07164179104478, "grad_norm": 34.16019821166992, "learning_rate": 9.947368421052632e-06, "loss": 37.5867, "step": 7185 }, { "epoch": 171.0955223880597, "grad_norm": 35.443870544433594, "learning_rate": 9.946115288220552e-06, "loss": 39.525, "step": 7186 }, { "epoch": 171.11940298507463, "grad_norm": 32.02059555053711, "learning_rate": 9.944862155388472e-06, "loss": 38.8342, "step": 7187 }, { "epoch": 171.14328358208957, "grad_norm": 30.18817710876465, "learning_rate": 9.943609022556391e-06, "loss": 37.9161, "step": 7188 }, { "epoch": 171.16716417910447, "grad_norm": 24.910490036010742, "learning_rate": 9.942355889724311e-06, "loss": 39.0131, "step": 7189 }, { "epoch": 171.1910447761194, "grad_norm": 33.26876449584961, "learning_rate": 9.941102756892232e-06, "loss": 38.0236, "step": 7190 }, { "epoch": 171.21492537313432, "grad_norm": 28.529455184936523, "learning_rate": 9.939849624060152e-06, "loss": 39.2241, "step": 7191 }, { "epoch": 171.23880597014926, "grad_norm": 34.611534118652344, "learning_rate": 9.938596491228071e-06, "loss": 38.5791, "step": 7192 }, { "epoch": 171.26268656716417, "grad_norm": 30.867097854614258, "learning_rate": 9.937343358395991e-06, "loss": 38.9934, "step": 7193 }, { "epoch": 171.2865671641791, "grad_norm": 33.581302642822266, "learning_rate": 9.93609022556391e-06, "loss": 39.1015, "step": 7194 }, { "epoch": 171.31044776119404, "grad_norm": 28.50710678100586, "learning_rate": 9.93483709273183e-06, "loss": 38.525, "step": 7195 }, { "epoch": 171.33432835820895, "grad_norm": 26.710535049438477, "learning_rate": 9.93358395989975e-06, "loss": 38.985, "step": 7196 }, { "epoch": 171.3582089552239, "grad_norm": 22.844213485717773, "learning_rate": 9.93233082706767e-06, "loss": 37.3385, "step": 7197 }, { "epoch": 171.3820895522388, "grad_norm": 38.604583740234375, "learning_rate": 9.93107769423559e-06, "loss": 38.3507, "step": 7198 }, { "epoch": 171.40597014925373, "grad_norm": NaN, "learning_rate": 9.929824561403509e-06, "loss": 54.4909, "step": 7199 }, { "epoch": 171.42985074626867, "grad_norm": 30.092470169067383, "learning_rate": 9.929824561403509e-06, "loss": 38.6197, "step": 7200 }, { "epoch": 171.45373134328358, "grad_norm": 36.72769546508789, "learning_rate": 9.92857142857143e-06, "loss": 37.5002, "step": 7201 }, { "epoch": 171.47761194029852, "grad_norm": 33.80430603027344, "learning_rate": 9.92731829573935e-06, "loss": 38.5208, "step": 7202 }, { "epoch": 171.50149253731342, "grad_norm": 28.850698471069336, "learning_rate": 9.926065162907269e-06, "loss": 38.629, "step": 7203 }, { "epoch": 171.52537313432836, "grad_norm": 26.544612884521484, "learning_rate": 9.924812030075189e-06, "loss": 38.3152, "step": 7204 }, { "epoch": 171.54925373134327, "grad_norm": 28.672277450561523, "learning_rate": 9.923558897243108e-06, "loss": 39.0015, "step": 7205 }, { "epoch": 171.5731343283582, "grad_norm": 26.214168548583984, "learning_rate": 9.92230576441103e-06, "loss": 38.8247, "step": 7206 }, { "epoch": 171.59701492537314, "grad_norm": 37.60875701904297, "learning_rate": 9.921052631578947e-06, "loss": 39.3055, "step": 7207 }, { "epoch": 171.62089552238805, "grad_norm": 32.91227340698242, "learning_rate": 9.919799498746869e-06, "loss": 39.0874, "step": 7208 }, { "epoch": 171.644776119403, "grad_norm": 27.47034454345703, "learning_rate": 9.918546365914787e-06, "loss": 37.7921, "step": 7209 }, { "epoch": 171.6686567164179, "grad_norm": 24.67852210998535, "learning_rate": 9.917293233082708e-06, "loss": 37.8243, "step": 7210 }, { "epoch": 171.69253731343284, "grad_norm": 30.638681411743164, "learning_rate": 9.916040100250628e-06, "loss": 38.2425, "step": 7211 }, { "epoch": 171.71641791044777, "grad_norm": 21.866235733032227, "learning_rate": 9.914786967418547e-06, "loss": 39.4408, "step": 7212 }, { "epoch": 171.74029850746268, "grad_norm": 37.21723175048828, "learning_rate": 9.913533834586467e-06, "loss": 39.3199, "step": 7213 }, { "epoch": 171.76417910447762, "grad_norm": 33.222869873046875, "learning_rate": 9.912280701754386e-06, "loss": 39.3108, "step": 7214 }, { "epoch": 171.78805970149253, "grad_norm": 31.65533447265625, "learning_rate": 9.911027568922308e-06, "loss": 38.7455, "step": 7215 }, { "epoch": 171.81194029850747, "grad_norm": 30.97352409362793, "learning_rate": 9.909774436090226e-06, "loss": 37.6917, "step": 7216 }, { "epoch": 171.83582089552237, "grad_norm": 24.84351921081543, "learning_rate": 9.908521303258147e-06, "loss": 38.2327, "step": 7217 }, { "epoch": 171.8597014925373, "grad_norm": 25.861270904541016, "learning_rate": 9.907268170426066e-06, "loss": 37.2067, "step": 7218 }, { "epoch": 171.88358208955225, "grad_norm": 31.17856216430664, "learning_rate": 9.906015037593986e-06, "loss": 38.2829, "step": 7219 }, { "epoch": 171.90746268656716, "grad_norm": 26.58542823791504, "learning_rate": 9.904761904761906e-06, "loss": 39.0652, "step": 7220 }, { "epoch": 171.9313432835821, "grad_norm": 33.54816436767578, "learning_rate": 9.903508771929825e-06, "loss": 38.1871, "step": 7221 }, { "epoch": 171.955223880597, "grad_norm": 30.45197105407715, "learning_rate": 9.902255639097745e-06, "loss": 38.0343, "step": 7222 }, { "epoch": 171.97910447761194, "grad_norm": 28.675378799438477, "learning_rate": 9.901002506265664e-06, "loss": 38.0105, "step": 7223 }, { "epoch": 172.0, "grad_norm": 22.688058853149414, "learning_rate": 9.899749373433584e-06, "loss": 34.1479, "step": 7224 }, { "epoch": 172.02388059701494, "grad_norm": 29.49295425415039, "learning_rate": 9.898496240601505e-06, "loss": 39.1435, "step": 7225 }, { "epoch": 172.04776119402985, "grad_norm": 24.734025955200195, "learning_rate": 9.897243107769425e-06, "loss": 38.322, "step": 7226 }, { "epoch": 172.07164179104478, "grad_norm": 34.65670394897461, "learning_rate": 9.895989974937344e-06, "loss": 37.4019, "step": 7227 }, { "epoch": 172.0955223880597, "grad_norm": 30.98259925842285, "learning_rate": 9.894736842105264e-06, "loss": 38.6403, "step": 7228 }, { "epoch": 172.11940298507463, "grad_norm": 29.755584716796875, "learning_rate": 9.893483709273184e-06, "loss": 39.1218, "step": 7229 }, { "epoch": 172.14328358208957, "grad_norm": 26.55215835571289, "learning_rate": 9.892230576441103e-06, "loss": 38.5852, "step": 7230 }, { "epoch": 172.16716417910447, "grad_norm": 28.36668586730957, "learning_rate": 9.890977443609023e-06, "loss": 38.6966, "step": 7231 }, { "epoch": 172.1910447761194, "grad_norm": 24.79121971130371, "learning_rate": 9.889724310776944e-06, "loss": 38.3481, "step": 7232 }, { "epoch": 172.21492537313432, "grad_norm": 29.209148406982422, "learning_rate": 9.888471177944862e-06, "loss": 37.8566, "step": 7233 }, { "epoch": 172.23880597014926, "grad_norm": 26.701807022094727, "learning_rate": 9.887218045112783e-06, "loss": 39.5109, "step": 7234 }, { "epoch": 172.26268656716417, "grad_norm": 31.177106857299805, "learning_rate": 9.885964912280703e-06, "loss": 37.8092, "step": 7235 }, { "epoch": 172.2865671641791, "grad_norm": 26.01350212097168, "learning_rate": 9.884711779448623e-06, "loss": 38.2528, "step": 7236 }, { "epoch": 172.31044776119404, "grad_norm": 29.5618896484375, "learning_rate": 9.883458646616542e-06, "loss": 37.5082, "step": 7237 }, { "epoch": 172.33432835820895, "grad_norm": 30.10390281677246, "learning_rate": 9.882205513784462e-06, "loss": 37.9635, "step": 7238 }, { "epoch": 172.3582089552239, "grad_norm": 25.675289154052734, "learning_rate": 9.880952380952381e-06, "loss": 38.3451, "step": 7239 }, { "epoch": 172.3820895522388, "grad_norm": 24.46607780456543, "learning_rate": 9.879699248120301e-06, "loss": 38.9061, "step": 7240 }, { "epoch": 172.40597014925373, "grad_norm": 28.36737060546875, "learning_rate": 9.878446115288222e-06, "loss": 37.5352, "step": 7241 }, { "epoch": 172.42985074626867, "grad_norm": 24.840145111083984, "learning_rate": 9.87719298245614e-06, "loss": 38.7872, "step": 7242 }, { "epoch": 172.45373134328358, "grad_norm": 26.529098510742188, "learning_rate": 9.875939849624061e-06, "loss": 38.0806, "step": 7243 }, { "epoch": 172.47761194029852, "grad_norm": 21.085857391357422, "learning_rate": 9.87468671679198e-06, "loss": 38.301, "step": 7244 }, { "epoch": 172.50149253731342, "grad_norm": 26.893800735473633, "learning_rate": 9.8734335839599e-06, "loss": 37.4983, "step": 7245 }, { "epoch": 172.52537313432836, "grad_norm": 20.84930992126465, "learning_rate": 9.87218045112782e-06, "loss": 37.5785, "step": 7246 }, { "epoch": 172.54925373134327, "grad_norm": 23.662948608398438, "learning_rate": 9.87092731829574e-06, "loss": 37.9118, "step": 7247 }, { "epoch": 172.5731343283582, "grad_norm": 20.506759643554688, "learning_rate": 9.86967418546366e-06, "loss": 38.0004, "step": 7248 }, { "epoch": 172.59701492537314, "grad_norm": 20.3808650970459, "learning_rate": 9.868421052631579e-06, "loss": 39.4201, "step": 7249 }, { "epoch": 172.62089552238805, "grad_norm": 19.416587829589844, "learning_rate": 9.8671679197995e-06, "loss": 38.8031, "step": 7250 }, { "epoch": 172.644776119403, "grad_norm": 20.337444305419922, "learning_rate": 9.86591478696742e-06, "loss": 39.5332, "step": 7251 }, { "epoch": 172.6686567164179, "grad_norm": 19.94097328186035, "learning_rate": 9.86466165413534e-06, "loss": 38.9583, "step": 7252 }, { "epoch": 172.69253731343284, "grad_norm": 18.987834930419922, "learning_rate": 9.86340852130326e-06, "loss": 39.4555, "step": 7253 }, { "epoch": 172.71641791044777, "grad_norm": 17.905500411987305, "learning_rate": 9.862155388471179e-06, "loss": 37.0105, "step": 7254 }, { "epoch": 172.74029850746268, "grad_norm": 16.578981399536133, "learning_rate": 9.860902255639098e-06, "loss": 38.764, "step": 7255 }, { "epoch": 172.76417910447762, "grad_norm": 17.216270446777344, "learning_rate": 9.859649122807018e-06, "loss": 37.615, "step": 7256 }, { "epoch": 172.78805970149253, "grad_norm": 19.038070678710938, "learning_rate": 9.858395989974938e-06, "loss": 38.6936, "step": 7257 }, { "epoch": 172.81194029850747, "grad_norm": NaN, "learning_rate": 9.857142857142859e-06, "loss": 33.444, "step": 7258 }, { "epoch": 172.83582089552237, "grad_norm": 17.612396240234375, "learning_rate": 9.857142857142859e-06, "loss": 38.8854, "step": 7259 }, { "epoch": 172.8597014925373, "grad_norm": 25.871450424194336, "learning_rate": 9.855889724310778e-06, "loss": 39.7183, "step": 7260 }, { "epoch": 172.88358208955225, "grad_norm": 20.912675857543945, "learning_rate": 9.854636591478698e-06, "loss": 38.5365, "step": 7261 }, { "epoch": 172.90746268656716, "grad_norm": 19.9591064453125, "learning_rate": 9.853383458646618e-06, "loss": 38.7913, "step": 7262 }, { "epoch": 172.9313432835821, "grad_norm": 20.852313995361328, "learning_rate": 9.852130325814537e-06, "loss": 38.5725, "step": 7263 }, { "epoch": 172.955223880597, "grad_norm": 16.304344177246094, "learning_rate": 9.850877192982457e-06, "loss": 38.8952, "step": 7264 }, { "epoch": 172.97910447761194, "grad_norm": 21.012598037719727, "learning_rate": 9.849624060150376e-06, "loss": 38.2271, "step": 7265 }, { "epoch": 173.0, "grad_norm": 16.632991790771484, "learning_rate": 9.848370927318298e-06, "loss": 34.2444, "step": 7266 }, { "epoch": 173.02388059701494, "grad_norm": 17.801403045654297, "learning_rate": 9.847117794486216e-06, "loss": 38.2125, "step": 7267 }, { "epoch": 173.04776119402985, "grad_norm": 16.416452407836914, "learning_rate": 9.845864661654137e-06, "loss": 38.6538, "step": 7268 }, { "epoch": 173.07164179104478, "grad_norm": 16.36857032775879, "learning_rate": 9.844611528822055e-06, "loss": 38.9644, "step": 7269 }, { "epoch": 173.0955223880597, "grad_norm": 16.667531967163086, "learning_rate": 9.843358395989976e-06, "loss": 37.3438, "step": 7270 }, { "epoch": 173.11940298507463, "grad_norm": 20.268720626831055, "learning_rate": 9.842105263157896e-06, "loss": 38.4221, "step": 7271 }, { "epoch": 173.14328358208957, "grad_norm": 15.569287300109863, "learning_rate": 9.840852130325815e-06, "loss": 39.1168, "step": 7272 }, { "epoch": 173.16716417910447, "grad_norm": 21.108577728271484, "learning_rate": 9.839598997493735e-06, "loss": 38.7105, "step": 7273 }, { "epoch": 173.1910447761194, "grad_norm": 17.720117568969727, "learning_rate": 9.838345864661655e-06, "loss": 38.4833, "step": 7274 }, { "epoch": 173.21492537313432, "grad_norm": 23.795623779296875, "learning_rate": 9.837092731829576e-06, "loss": 38.9659, "step": 7275 }, { "epoch": 173.23880597014926, "grad_norm": 22.483427047729492, "learning_rate": 9.835839598997494e-06, "loss": 39.7438, "step": 7276 }, { "epoch": 173.26268656716417, "grad_norm": 19.345884323120117, "learning_rate": 9.834586466165415e-06, "loss": 37.2718, "step": 7277 }, { "epoch": 173.2865671641791, "grad_norm": 16.387704849243164, "learning_rate": 9.833333333333333e-06, "loss": 37.6755, "step": 7278 }, { "epoch": 173.31044776119404, "grad_norm": 20.114343643188477, "learning_rate": 9.832080200501254e-06, "loss": 39.3259, "step": 7279 }, { "epoch": 173.33432835820895, "grad_norm": 17.888080596923828, "learning_rate": 9.830827067669174e-06, "loss": 37.8181, "step": 7280 }, { "epoch": 173.3582089552239, "grad_norm": 14.599053382873535, "learning_rate": 9.829573934837093e-06, "loss": 38.5021, "step": 7281 }, { "epoch": 173.3820895522388, "grad_norm": 23.150272369384766, "learning_rate": 9.828320802005013e-06, "loss": 38.7287, "step": 7282 }, { "epoch": 173.40597014925373, "grad_norm": 16.990703582763672, "learning_rate": 9.827067669172933e-06, "loss": 37.7563, "step": 7283 }, { "epoch": 173.42985074626867, "grad_norm": 21.03927230834961, "learning_rate": 9.825814536340852e-06, "loss": 38.8247, "step": 7284 }, { "epoch": 173.45373134328358, "grad_norm": 18.88947868347168, "learning_rate": 9.824561403508772e-06, "loss": 38.5369, "step": 7285 }, { "epoch": 173.47761194029852, "grad_norm": 19.675981521606445, "learning_rate": 9.823308270676693e-06, "loss": 37.2397, "step": 7286 }, { "epoch": 173.50149253731342, "grad_norm": 15.254344940185547, "learning_rate": 9.822055137844613e-06, "loss": 38.7559, "step": 7287 }, { "epoch": 173.52537313432836, "grad_norm": 17.197786331176758, "learning_rate": 9.820802005012532e-06, "loss": 38.6583, "step": 7288 }, { "epoch": 173.54925373134327, "grad_norm": 13.433090209960938, "learning_rate": 9.819548872180452e-06, "loss": 38.2371, "step": 7289 }, { "epoch": 173.5731343283582, "grad_norm": 16.729307174682617, "learning_rate": 9.818295739348372e-06, "loss": 39.3977, "step": 7290 }, { "epoch": 173.59701492537314, "grad_norm": 15.629776000976562, "learning_rate": 9.817042606516291e-06, "loss": 37.9545, "step": 7291 }, { "epoch": 173.62089552238805, "grad_norm": 20.348583221435547, "learning_rate": 9.815789473684212e-06, "loss": 38.7693, "step": 7292 }, { "epoch": 173.644776119403, "grad_norm": 18.90270233154297, "learning_rate": 9.81453634085213e-06, "loss": 39.0774, "step": 7293 }, { "epoch": 173.6686567164179, "grad_norm": 14.576498031616211, "learning_rate": 9.813283208020052e-06, "loss": 38.4017, "step": 7294 }, { "epoch": 173.69253731343284, "grad_norm": 19.90891456604004, "learning_rate": 9.812030075187971e-06, "loss": 38.2823, "step": 7295 }, { "epoch": 173.71641791044777, "grad_norm": 18.241924285888672, "learning_rate": 9.81077694235589e-06, "loss": 37.5477, "step": 7296 }, { "epoch": 173.74029850746268, "grad_norm": 15.419953346252441, "learning_rate": 9.80952380952381e-06, "loss": 38.0637, "step": 7297 }, { "epoch": 173.76417910447762, "grad_norm": 21.94540023803711, "learning_rate": 9.80827067669173e-06, "loss": 37.1823, "step": 7298 }, { "epoch": 173.78805970149253, "grad_norm": 17.8468017578125, "learning_rate": 9.80701754385965e-06, "loss": 38.3753, "step": 7299 }, { "epoch": 173.81194029850747, "grad_norm": 27.14240074157715, "learning_rate": 9.80576441102757e-06, "loss": 39.1886, "step": 7300 }, { "epoch": 173.83582089552237, "grad_norm": 18.001319885253906, "learning_rate": 9.80451127819549e-06, "loss": 38.0588, "step": 7301 }, { "epoch": 173.8597014925373, "grad_norm": 29.474332809448242, "learning_rate": 9.803258145363408e-06, "loss": 38.8461, "step": 7302 }, { "epoch": 173.88358208955225, "grad_norm": 19.030712127685547, "learning_rate": 9.80200501253133e-06, "loss": 38.0905, "step": 7303 }, { "epoch": 173.90746268656716, "grad_norm": 26.412479400634766, "learning_rate": 9.80075187969925e-06, "loss": 38.2451, "step": 7304 }, { "epoch": 173.9313432835821, "grad_norm": 20.873828887939453, "learning_rate": 9.799498746867169e-06, "loss": 37.2942, "step": 7305 }, { "epoch": 173.955223880597, "grad_norm": 25.46503448486328, "learning_rate": 9.798245614035088e-06, "loss": 39.4271, "step": 7306 }, { "epoch": 173.97910447761194, "grad_norm": 19.72415542602539, "learning_rate": 9.796992481203008e-06, "loss": 39.1495, "step": 7307 }, { "epoch": 174.0, "grad_norm": 22.775169372558594, "learning_rate": 9.795739348370928e-06, "loss": 34.0273, "step": 7308 }, { "epoch": 174.02388059701494, "grad_norm": 19.259878158569336, "learning_rate": 9.794486215538847e-06, "loss": 37.977, "step": 7309 }, { "epoch": 174.04776119402985, "grad_norm": 23.16216468811035, "learning_rate": 9.793233082706769e-06, "loss": 38.2491, "step": 7310 }, { "epoch": 174.07164179104478, "grad_norm": 19.84416389465332, "learning_rate": 9.791979949874686e-06, "loss": 37.6288, "step": 7311 }, { "epoch": 174.0955223880597, "grad_norm": 23.899057388305664, "learning_rate": 9.790726817042608e-06, "loss": 38.2218, "step": 7312 }, { "epoch": 174.11940298507463, "grad_norm": 21.903470993041992, "learning_rate": 9.789473684210527e-06, "loss": 38.4567, "step": 7313 }, { "epoch": 174.14328358208957, "grad_norm": 26.155000686645508, "learning_rate": 9.788220551378447e-06, "loss": 38.7748, "step": 7314 }, { "epoch": 174.16716417910447, "grad_norm": 21.06147575378418, "learning_rate": 9.786967418546367e-06, "loss": 38.2962, "step": 7315 }, { "epoch": 174.1910447761194, "grad_norm": 25.352506637573242, "learning_rate": 9.785714285714286e-06, "loss": 38.2831, "step": 7316 }, { "epoch": 174.21492537313432, "grad_norm": 23.535900115966797, "learning_rate": 9.784461152882206e-06, "loss": 39.9764, "step": 7317 }, { "epoch": 174.23880597014926, "grad_norm": 22.830669403076172, "learning_rate": 9.783208020050125e-06, "loss": 39.1042, "step": 7318 }, { "epoch": 174.26268656716417, "grad_norm": 21.910917282104492, "learning_rate": 9.781954887218047e-06, "loss": 38.5363, "step": 7319 }, { "epoch": 174.2865671641791, "grad_norm": 17.074180603027344, "learning_rate": 9.780701754385966e-06, "loss": 37.051, "step": 7320 }, { "epoch": 174.31044776119404, "grad_norm": 19.93785858154297, "learning_rate": 9.779448621553886e-06, "loss": 38.4798, "step": 7321 }, { "epoch": 174.33432835820895, "grad_norm": 22.12788963317871, "learning_rate": 9.778195488721805e-06, "loss": 38.0934, "step": 7322 }, { "epoch": 174.3582089552239, "grad_norm": 17.97043800354004, "learning_rate": 9.776942355889725e-06, "loss": 38.3819, "step": 7323 }, { "epoch": 174.3820895522388, "grad_norm": 20.136077880859375, "learning_rate": 9.775689223057645e-06, "loss": 38.7023, "step": 7324 }, { "epoch": 174.40597014925373, "grad_norm": 16.51250648498535, "learning_rate": 9.774436090225564e-06, "loss": 37.9475, "step": 7325 }, { "epoch": 174.42985074626867, "grad_norm": 21.541324615478516, "learning_rate": 9.773182957393484e-06, "loss": 39.0212, "step": 7326 }, { "epoch": 174.45373134328358, "grad_norm": 17.423656463623047, "learning_rate": 9.771929824561405e-06, "loss": 37.9269, "step": 7327 }, { "epoch": 174.47761194029852, "grad_norm": 18.572166442871094, "learning_rate": 9.770676691729323e-06, "loss": 38.3929, "step": 7328 }, { "epoch": 174.50149253731342, "grad_norm": 19.709980010986328, "learning_rate": 9.769423558897244e-06, "loss": 37.9021, "step": 7329 }, { "epoch": 174.52537313432836, "grad_norm": 20.803659439086914, "learning_rate": 9.768170426065164e-06, "loss": 39.1917, "step": 7330 }, { "epoch": 174.54925373134327, "grad_norm": 17.603025436401367, "learning_rate": 9.766917293233084e-06, "loss": 38.5951, "step": 7331 }, { "epoch": 174.5731343283582, "grad_norm": 20.333627700805664, "learning_rate": 9.765664160401003e-06, "loss": 37.4959, "step": 7332 }, { "epoch": 174.59701492537314, "grad_norm": 17.328895568847656, "learning_rate": 9.764411027568923e-06, "loss": 39.1875, "step": 7333 }, { "epoch": 174.62089552238805, "grad_norm": 20.204282760620117, "learning_rate": 9.763157894736844e-06, "loss": 38.0766, "step": 7334 }, { "epoch": 174.644776119403, "grad_norm": 15.856727600097656, "learning_rate": 9.761904761904762e-06, "loss": 38.9756, "step": 7335 }, { "epoch": 174.6686567164179, "grad_norm": 18.967605590820312, "learning_rate": 9.760651629072683e-06, "loss": 38.378, "step": 7336 }, { "epoch": 174.69253731343284, "grad_norm": 22.51470375061035, "learning_rate": 9.759398496240601e-06, "loss": 37.9415, "step": 7337 }, { "epoch": 174.71641791044777, "grad_norm": 20.97652244567871, "learning_rate": 9.758145363408522e-06, "loss": 38.0416, "step": 7338 }, { "epoch": 174.74029850746268, "grad_norm": 19.052473068237305, "learning_rate": 9.756892230576442e-06, "loss": 39.084, "step": 7339 }, { "epoch": 174.76417910447762, "grad_norm": 15.750896453857422, "learning_rate": 9.755639097744362e-06, "loss": 39.6359, "step": 7340 }, { "epoch": 174.78805970149253, "grad_norm": 21.774534225463867, "learning_rate": 9.754385964912281e-06, "loss": 38.4529, "step": 7341 }, { "epoch": 174.81194029850747, "grad_norm": 17.55640411376953, "learning_rate": 9.7531328320802e-06, "loss": 37.3946, "step": 7342 }, { "epoch": 174.83582089552237, "grad_norm": 21.838682174682617, "learning_rate": 9.751879699248122e-06, "loss": 37.731, "step": 7343 }, { "epoch": 174.8597014925373, "grad_norm": 18.15571403503418, "learning_rate": 9.75062656641604e-06, "loss": 37.917, "step": 7344 }, { "epoch": 174.88358208955225, "grad_norm": 20.560977935791016, "learning_rate": 9.749373433583961e-06, "loss": 37.7593, "step": 7345 }, { "epoch": 174.90746268656716, "grad_norm": 21.18572998046875, "learning_rate": 9.748120300751881e-06, "loss": 37.8903, "step": 7346 }, { "epoch": 174.9313432835821, "grad_norm": 18.545352935791016, "learning_rate": 9.7468671679198e-06, "loss": 37.6087, "step": 7347 }, { "epoch": 174.955223880597, "grad_norm": 21.975116729736328, "learning_rate": 9.74561403508772e-06, "loss": 38.9785, "step": 7348 }, { "epoch": 174.97910447761194, "grad_norm": 18.184467315673828, "learning_rate": 9.74436090225564e-06, "loss": 37.7652, "step": 7349 }, { "epoch": 175.0, "grad_norm": 17.978364944458008, "learning_rate": 9.74310776942356e-06, "loss": 35.1204, "step": 7350 }, { "epoch": 175.02388059701494, "grad_norm": 21.585533142089844, "learning_rate": 9.741854636591479e-06, "loss": 37.4162, "step": 7351 }, { "epoch": 175.04776119402985, "grad_norm": 21.419065475463867, "learning_rate": 9.740601503759399e-06, "loss": 38.1579, "step": 7352 }, { "epoch": 175.07164179104478, "grad_norm": 17.175764083862305, "learning_rate": 9.73934837092732e-06, "loss": 37.2658, "step": 7353 }, { "epoch": 175.0955223880597, "grad_norm": 20.27353286743164, "learning_rate": 9.73809523809524e-06, "loss": 38.8461, "step": 7354 }, { "epoch": 175.11940298507463, "grad_norm": 18.640180587768555, "learning_rate": 9.736842105263159e-06, "loss": 37.937, "step": 7355 }, { "epoch": 175.14328358208957, "grad_norm": 17.626445770263672, "learning_rate": 9.735588972431079e-06, "loss": 37.8605, "step": 7356 }, { "epoch": 175.16716417910447, "grad_norm": 21.346338272094727, "learning_rate": 9.734335839598998e-06, "loss": 38.8235, "step": 7357 }, { "epoch": 175.1910447761194, "grad_norm": NaN, "learning_rate": 9.733082706766918e-06, "loss": 34.0548, "step": 7358 }, { "epoch": 175.21492537313432, "grad_norm": 15.183211326599121, "learning_rate": 9.733082706766918e-06, "loss": 38.7547, "step": 7359 }, { "epoch": 175.23880597014926, "grad_norm": 20.86262321472168, "learning_rate": 9.731829573934837e-06, "loss": 37.4753, "step": 7360 }, { "epoch": 175.26268656716417, "grad_norm": 15.405557632446289, "learning_rate": 9.730576441102759e-06, "loss": 37.4482, "step": 7361 }, { "epoch": 175.2865671641791, "grad_norm": 18.813549041748047, "learning_rate": 9.729323308270677e-06, "loss": 38.8293, "step": 7362 }, { "epoch": 175.31044776119404, "grad_norm": 19.621522903442383, "learning_rate": 9.728070175438598e-06, "loss": 39.3186, "step": 7363 }, { "epoch": 175.33432835820895, "grad_norm": 21.80621337890625, "learning_rate": 9.726817042606517e-06, "loss": 38.1934, "step": 7364 }, { "epoch": 175.3582089552239, "grad_norm": 21.302892684936523, "learning_rate": 9.725563909774437e-06, "loss": 37.5196, "step": 7365 }, { "epoch": 175.3820895522388, "grad_norm": 17.530221939086914, "learning_rate": 9.724310776942357e-06, "loss": 38.3165, "step": 7366 }, { "epoch": 175.40597014925373, "grad_norm": 15.90662956237793, "learning_rate": 9.723057644110276e-06, "loss": 38.0947, "step": 7367 }, { "epoch": 175.42985074626867, "grad_norm": NaN, "learning_rate": 9.721804511278196e-06, "loss": 65.2164, "step": 7368 }, { "epoch": 175.45373134328358, "grad_norm": 18.875221252441406, "learning_rate": 9.721804511278196e-06, "loss": 38.7631, "step": 7369 }, { "epoch": 175.47761194029852, "grad_norm": 17.658750534057617, "learning_rate": 9.720551378446115e-06, "loss": 39.7298, "step": 7370 }, { "epoch": 175.50149253731342, "grad_norm": 16.8253173828125, "learning_rate": 9.719298245614037e-06, "loss": 38.7608, "step": 7371 }, { "epoch": 175.52537313432836, "grad_norm": 14.109174728393555, "learning_rate": 9.718045112781955e-06, "loss": 38.0865, "step": 7372 }, { "epoch": 175.54925373134327, "grad_norm": 16.604694366455078, "learning_rate": 9.716791979949876e-06, "loss": 39.2844, "step": 7373 }, { "epoch": 175.5731343283582, "grad_norm": 20.231338500976562, "learning_rate": 9.715538847117796e-06, "loss": 37.8806, "step": 7374 }, { "epoch": 175.59701492537314, "grad_norm": 22.997631072998047, "learning_rate": 9.714285714285715e-06, "loss": 38.8939, "step": 7375 }, { "epoch": 175.62089552238805, "grad_norm": 19.7714900970459, "learning_rate": 9.713032581453635e-06, "loss": 38.0255, "step": 7376 }, { "epoch": 175.644776119403, "grad_norm": 15.104757308959961, "learning_rate": 9.711779448621554e-06, "loss": 37.361, "step": 7377 }, { "epoch": 175.6686567164179, "grad_norm": 16.79823112487793, "learning_rate": 9.710526315789474e-06, "loss": 38.6268, "step": 7378 }, { "epoch": 175.69253731343284, "grad_norm": 15.565764427185059, "learning_rate": 9.709273182957394e-06, "loss": 38.7304, "step": 7379 }, { "epoch": 175.71641791044777, "grad_norm": 14.454784393310547, "learning_rate": 9.708020050125315e-06, "loss": 39.039, "step": 7380 }, { "epoch": 175.74029850746268, "grad_norm": 14.47907543182373, "learning_rate": 9.706766917293234e-06, "loss": 38.231, "step": 7381 }, { "epoch": 175.76417910447762, "grad_norm": 15.941643714904785, "learning_rate": 9.705513784461154e-06, "loss": 38.1181, "step": 7382 }, { "epoch": 175.78805970149253, "grad_norm": 19.6253662109375, "learning_rate": 9.704260651629074e-06, "loss": 38.4808, "step": 7383 }, { "epoch": 175.81194029850747, "grad_norm": 16.49032211303711, "learning_rate": 9.703007518796993e-06, "loss": 38.1848, "step": 7384 }, { "epoch": 175.83582089552237, "grad_norm": 14.712738037109375, "learning_rate": 9.701754385964913e-06, "loss": 37.8768, "step": 7385 }, { "epoch": 175.8597014925373, "grad_norm": 12.555728912353516, "learning_rate": 9.700501253132832e-06, "loss": 38.9321, "step": 7386 }, { "epoch": 175.88358208955225, "grad_norm": 15.138301849365234, "learning_rate": 9.699248120300752e-06, "loss": 39.1631, "step": 7387 }, { "epoch": 175.90746268656716, "grad_norm": 14.10248851776123, "learning_rate": 9.697994987468673e-06, "loss": 36.9886, "step": 7388 }, { "epoch": 175.9313432835821, "grad_norm": 15.674737930297852, "learning_rate": 9.696741854636593e-06, "loss": 38.6095, "step": 7389 }, { "epoch": 175.955223880597, "grad_norm": 17.84684944152832, "learning_rate": 9.695488721804513e-06, "loss": 38.2303, "step": 7390 }, { "epoch": 175.97910447761194, "grad_norm": 20.122066497802734, "learning_rate": 9.694235588972432e-06, "loss": 39.0045, "step": 7391 }, { "epoch": 176.0, "grad_norm": 17.95144271850586, "learning_rate": 9.692982456140352e-06, "loss": 32.8977, "step": 7392 }, { "epoch": 176.02388059701494, "grad_norm": 14.381842613220215, "learning_rate": 9.691729323308271e-06, "loss": 38.9414, "step": 7393 }, { "epoch": 176.04776119402985, "grad_norm": 18.826648712158203, "learning_rate": 9.690476190476191e-06, "loss": 38.05, "step": 7394 }, { "epoch": 176.07164179104478, "grad_norm": 18.625883102416992, "learning_rate": 9.689223057644112e-06, "loss": 39.3167, "step": 7395 }, { "epoch": 176.0955223880597, "grad_norm": 19.133636474609375, "learning_rate": 9.68796992481203e-06, "loss": 38.3765, "step": 7396 }, { "epoch": 176.11940298507463, "grad_norm": 16.876758575439453, "learning_rate": 9.686716791979951e-06, "loss": 38.8189, "step": 7397 }, { "epoch": 176.14328358208957, "grad_norm": 20.237958908081055, "learning_rate": 9.68546365914787e-06, "loss": 37.9682, "step": 7398 }, { "epoch": 176.16716417910447, "grad_norm": 19.831436157226562, "learning_rate": 9.68421052631579e-06, "loss": 38.1386, "step": 7399 }, { "epoch": 176.1910447761194, "grad_norm": 17.909395217895508, "learning_rate": 9.68295739348371e-06, "loss": 37.6457, "step": 7400 }, { "epoch": 176.21492537313432, "grad_norm": 15.805506706237793, "learning_rate": 9.68170426065163e-06, "loss": 37.7815, "step": 7401 }, { "epoch": 176.23880597014926, "grad_norm": 16.30780601501465, "learning_rate": 9.68045112781955e-06, "loss": 38.5857, "step": 7402 }, { "epoch": 176.26268656716417, "grad_norm": 13.730635643005371, "learning_rate": 9.679197994987469e-06, "loss": 38.3596, "step": 7403 }, { "epoch": 176.2865671641791, "grad_norm": 16.07013511657715, "learning_rate": 9.67794486215539e-06, "loss": 37.8236, "step": 7404 }, { "epoch": 176.31044776119404, "grad_norm": 13.732840538024902, "learning_rate": 9.676691729323308e-06, "loss": 38.4655, "step": 7405 }, { "epoch": 176.33432835820895, "grad_norm": 13.604117393493652, "learning_rate": 9.67543859649123e-06, "loss": 39.0092, "step": 7406 }, { "epoch": 176.3582089552239, "grad_norm": 17.90340232849121, "learning_rate": 9.674185463659147e-06, "loss": 38.4551, "step": 7407 }, { "epoch": 176.3820895522388, "grad_norm": 19.416580200195312, "learning_rate": 9.672932330827069e-06, "loss": 37.8589, "step": 7408 }, { "epoch": 176.40597014925373, "grad_norm": 18.84051513671875, "learning_rate": 9.671679197994988e-06, "loss": 37.1731, "step": 7409 }, { "epoch": 176.42985074626867, "grad_norm": 15.683023452758789, "learning_rate": 9.670426065162908e-06, "loss": 38.6623, "step": 7410 }, { "epoch": 176.45373134328358, "grad_norm": 15.627781867980957, "learning_rate": 9.669172932330828e-06, "loss": 39.5563, "step": 7411 }, { "epoch": 176.47761194029852, "grad_norm": 18.245759963989258, "learning_rate": 9.667919799498747e-06, "loss": 38.0547, "step": 7412 }, { "epoch": 176.50149253731342, "grad_norm": 28.857357025146484, "learning_rate": 9.666666666666667e-06, "loss": 38.2534, "step": 7413 }, { "epoch": 176.52537313432836, "grad_norm": 16.545024871826172, "learning_rate": 9.665413533834588e-06, "loss": 37.7573, "step": 7414 }, { "epoch": 176.54925373134327, "grad_norm": 32.44770431518555, "learning_rate": 9.664160401002508e-06, "loss": 39.0261, "step": 7415 }, { "epoch": 176.5731343283582, "grad_norm": 22.43410301208496, "learning_rate": 9.662907268170427e-06, "loss": 38.1903, "step": 7416 }, { "epoch": 176.59701492537314, "grad_norm": 29.712522506713867, "learning_rate": 9.661654135338347e-06, "loss": 38.1701, "step": 7417 }, { "epoch": 176.62089552238805, "grad_norm": 20.179025650024414, "learning_rate": 9.660401002506266e-06, "loss": 38.8206, "step": 7418 }, { "epoch": 176.644776119403, "grad_norm": 23.98577308654785, "learning_rate": 9.659147869674186e-06, "loss": 39.189, "step": 7419 }, { "epoch": 176.6686567164179, "grad_norm": 24.150781631469727, "learning_rate": 9.657894736842106e-06, "loss": 38.2772, "step": 7420 }, { "epoch": 176.69253731343284, "grad_norm": 16.749544143676758, "learning_rate": 9.656641604010027e-06, "loss": 38.9518, "step": 7421 }, { "epoch": 176.71641791044777, "grad_norm": 26.16396141052246, "learning_rate": 9.655388471177945e-06, "loss": 37.7769, "step": 7422 }, { "epoch": 176.74029850746268, "grad_norm": 22.416610717773438, "learning_rate": 9.654135338345866e-06, "loss": 38.0703, "step": 7423 }, { "epoch": 176.76417910447762, "grad_norm": 14.045994758605957, "learning_rate": 9.652882205513786e-06, "loss": 39.5412, "step": 7424 }, { "epoch": 176.78805970149253, "grad_norm": 29.801090240478516, "learning_rate": 9.651629072681705e-06, "loss": 36.6879, "step": 7425 }, { "epoch": 176.81194029850747, "grad_norm": 16.378732681274414, "learning_rate": 9.650375939849625e-06, "loss": 36.7957, "step": 7426 }, { "epoch": 176.83582089552237, "grad_norm": 29.72284507751465, "learning_rate": 9.649122807017545e-06, "loss": 38.8986, "step": 7427 }, { "epoch": 176.8597014925373, "grad_norm": NaN, "learning_rate": 9.647869674185464e-06, "loss": 32.0868, "step": 7428 }, { "epoch": 176.88358208955225, "grad_norm": 22.460494995117188, "learning_rate": 9.647869674185464e-06, "loss": 38.4721, "step": 7429 }, { "epoch": 176.90746268656716, "grad_norm": 22.48520851135254, "learning_rate": 9.646616541353384e-06, "loss": 38.0989, "step": 7430 }, { "epoch": 176.9313432835821, "grad_norm": 24.938936233520508, "learning_rate": 9.645363408521305e-06, "loss": 38.0467, "step": 7431 }, { "epoch": 176.955223880597, "grad_norm": 19.816362380981445, "learning_rate": 9.644110275689223e-06, "loss": 38.3439, "step": 7432 }, { "epoch": 176.97910447761194, "grad_norm": 33.925724029541016, "learning_rate": 9.642857142857144e-06, "loss": 37.6431, "step": 7433 }, { "epoch": 177.0, "grad_norm": 20.18031120300293, "learning_rate": 9.641604010025064e-06, "loss": 33.7928, "step": 7434 }, { "epoch": 177.02388059701494, "grad_norm": 40.42418670654297, "learning_rate": 9.640350877192983e-06, "loss": 38.1969, "step": 7435 }, { "epoch": 177.04776119402985, "grad_norm": 32.65384292602539, "learning_rate": 9.639097744360903e-06, "loss": 38.2825, "step": 7436 }, { "epoch": 177.07164179104478, "grad_norm": 40.55938720703125, "learning_rate": 9.637844611528823e-06, "loss": 38.3454, "step": 7437 }, { "epoch": 177.0955223880597, "grad_norm": 40.250762939453125, "learning_rate": 9.636591478696742e-06, "loss": 36.7613, "step": 7438 }, { "epoch": 177.11940298507463, "grad_norm": 22.756441116333008, "learning_rate": 9.635338345864662e-06, "loss": 38.4782, "step": 7439 }, { "epoch": 177.14328358208957, "grad_norm": 25.255971908569336, "learning_rate": 9.634085213032583e-06, "loss": 38.5564, "step": 7440 }, { "epoch": 177.16716417910447, "grad_norm": 32.509010314941406, "learning_rate": 9.632832080200501e-06, "loss": 37.9028, "step": 7441 }, { "epoch": 177.1910447761194, "grad_norm": 26.76149559020996, "learning_rate": 9.631578947368422e-06, "loss": 39.4661, "step": 7442 }, { "epoch": 177.21492537313432, "grad_norm": 35.867462158203125, "learning_rate": 9.630325814536342e-06, "loss": 38.6241, "step": 7443 }, { "epoch": 177.23880597014926, "grad_norm": 31.468015670776367, "learning_rate": 9.629072681704261e-06, "loss": 37.8266, "step": 7444 }, { "epoch": 177.26268656716417, "grad_norm": 35.157798767089844, "learning_rate": 9.627819548872181e-06, "loss": 38.3938, "step": 7445 }, { "epoch": 177.2865671641791, "grad_norm": 33.04148483276367, "learning_rate": 9.6265664160401e-06, "loss": 37.382, "step": 7446 }, { "epoch": 177.31044776119404, "grad_norm": 29.57913589477539, "learning_rate": 9.62531328320802e-06, "loss": 37.5585, "step": 7447 }, { "epoch": 177.33432835820895, "grad_norm": 27.25524139404297, "learning_rate": 9.62406015037594e-06, "loss": 37.4982, "step": 7448 }, { "epoch": 177.3582089552239, "grad_norm": NaN, "learning_rate": 9.622807017543861e-06, "loss": 34.8127, "step": 7449 }, { "epoch": 177.3820895522388, "grad_norm": 33.32447814941406, "learning_rate": 9.622807017543861e-06, "loss": 37.4434, "step": 7450 }, { "epoch": 177.40597014925373, "grad_norm": 29.68785285949707, "learning_rate": 9.62155388471178e-06, "loss": 37.8705, "step": 7451 }, { "epoch": 177.42985074626867, "grad_norm": NaN, "learning_rate": 9.6203007518797e-06, "loss": 41.4018, "step": 7452 }, { "epoch": 177.45373134328358, "grad_norm": 32.368263244628906, "learning_rate": 9.6203007518797e-06, "loss": 38.2835, "step": 7453 }, { "epoch": 177.47761194029852, "grad_norm": 29.269750595092773, "learning_rate": 9.61904761904762e-06, "loss": 37.714, "step": 7454 }, { "epoch": 177.50149253731342, "grad_norm": 30.023723602294922, "learning_rate": 9.61779448621554e-06, "loss": 37.1855, "step": 7455 }, { "epoch": 177.52537313432836, "grad_norm": 25.97041130065918, "learning_rate": 9.61654135338346e-06, "loss": 38.573, "step": 7456 }, { "epoch": 177.54925373134327, "grad_norm": 32.41938018798828, "learning_rate": 9.61528822055138e-06, "loss": 39.1859, "step": 7457 }, { "epoch": 177.5731343283582, "grad_norm": 30.231359481811523, "learning_rate": 9.614035087719298e-06, "loss": 39.3906, "step": 7458 }, { "epoch": 177.59701492537314, "grad_norm": 34.94846725463867, "learning_rate": 9.61278195488722e-06, "loss": 39.9365, "step": 7459 }, { "epoch": 177.62089552238805, "grad_norm": 34.16421127319336, "learning_rate": 9.611528822055138e-06, "loss": 37.2807, "step": 7460 }, { "epoch": 177.644776119403, "grad_norm": 27.481935501098633, "learning_rate": 9.610275689223059e-06, "loss": 38.0588, "step": 7461 }, { "epoch": 177.6686567164179, "grad_norm": 22.71653938293457, "learning_rate": 9.609022556390978e-06, "loss": 37.773, "step": 7462 }, { "epoch": 177.69253731343284, "grad_norm": 33.87922668457031, "learning_rate": 9.607769423558898e-06, "loss": 37.8048, "step": 7463 }, { "epoch": 177.71641791044777, "grad_norm": 24.821271896362305, "learning_rate": 9.606516290726818e-06, "loss": 37.876, "step": 7464 }, { "epoch": 177.74029850746268, "grad_norm": 37.070491790771484, "learning_rate": 9.605263157894737e-06, "loss": 38.6927, "step": 7465 }, { "epoch": 177.76417910447762, "grad_norm": 31.79026222229004, "learning_rate": 9.604010025062659e-06, "loss": 37.8024, "step": 7466 }, { "epoch": 177.78805970149253, "grad_norm": 29.7656307220459, "learning_rate": 9.602756892230576e-06, "loss": 38.6212, "step": 7467 }, { "epoch": 177.81194029850747, "grad_norm": 26.21623992919922, "learning_rate": 9.601503759398498e-06, "loss": 37.9078, "step": 7468 }, { "epoch": 177.83582089552237, "grad_norm": 34.19346618652344, "learning_rate": 9.600250626566416e-06, "loss": 37.8592, "step": 7469 }, { "epoch": 177.8597014925373, "grad_norm": 31.018447875976562, "learning_rate": 9.598997493734337e-06, "loss": 39.7333, "step": 7470 }, { "epoch": 177.88358208955225, "grad_norm": 33.910614013671875, "learning_rate": 9.597744360902257e-06, "loss": 38.2207, "step": 7471 }, { "epoch": 177.90746268656716, "grad_norm": 29.57449722290039, "learning_rate": 9.596491228070176e-06, "loss": 37.8515, "step": 7472 }, { "epoch": 177.9313432835821, "grad_norm": 29.0955810546875, "learning_rate": 9.595238095238096e-06, "loss": 39.3709, "step": 7473 }, { "epoch": 177.955223880597, "grad_norm": 22.823320388793945, "learning_rate": 9.593984962406015e-06, "loss": 38.6859, "step": 7474 }, { "epoch": 177.97910447761194, "grad_norm": 33.68880844116211, "learning_rate": 9.592731829573937e-06, "loss": 37.424, "step": 7475 }, { "epoch": 178.0, "grad_norm": 22.224315643310547, "learning_rate": 9.591478696741855e-06, "loss": 33.0249, "step": 7476 }, { "epoch": 178.02388059701494, "grad_norm": 34.6712646484375, "learning_rate": 9.590225563909776e-06, "loss": 38.8583, "step": 7477 }, { "epoch": 178.04776119402985, "grad_norm": 32.04248809814453, "learning_rate": 9.588972431077695e-06, "loss": 38.3, "step": 7478 }, { "epoch": 178.07164179104478, "grad_norm": 27.30583381652832, "learning_rate": 9.587719298245615e-06, "loss": 37.78, "step": 7479 }, { "epoch": 178.0955223880597, "grad_norm": 27.105405807495117, "learning_rate": 9.586466165413535e-06, "loss": 38.0345, "step": 7480 }, { "epoch": 178.11940298507463, "grad_norm": 26.92739486694336, "learning_rate": 9.585213032581454e-06, "loss": 37.9945, "step": 7481 }, { "epoch": 178.14328358208957, "grad_norm": 24.58989715576172, "learning_rate": 9.583959899749374e-06, "loss": 37.7868, "step": 7482 }, { "epoch": 178.16716417910447, "grad_norm": 35.88637924194336, "learning_rate": 9.582706766917293e-06, "loss": 37.5245, "step": 7483 }, { "epoch": 178.1910447761194, "grad_norm": 30.281505584716797, "learning_rate": 9.581453634085213e-06, "loss": 37.7936, "step": 7484 }, { "epoch": 178.21492537313432, "grad_norm": 28.63441276550293, "learning_rate": 9.580200501253134e-06, "loss": 39.1822, "step": 7485 }, { "epoch": 178.23880597014926, "grad_norm": 27.02237319946289, "learning_rate": 9.578947368421054e-06, "loss": 38.5349, "step": 7486 }, { "epoch": 178.26268656716417, "grad_norm": 32.959190368652344, "learning_rate": 9.577694235588974e-06, "loss": 37.9876, "step": 7487 }, { "epoch": 178.2865671641791, "grad_norm": 25.708955764770508, "learning_rate": 9.576441102756893e-06, "loss": 36.7052, "step": 7488 }, { "epoch": 178.31044776119404, "grad_norm": 33.02278137207031, "learning_rate": 9.575187969924813e-06, "loss": 37.8834, "step": 7489 }, { "epoch": 178.33432835820895, "grad_norm": 30.28676986694336, "learning_rate": 9.573934837092732e-06, "loss": 38.6412, "step": 7490 }, { "epoch": 178.3582089552239, "grad_norm": 28.039459228515625, "learning_rate": 9.572681704260652e-06, "loss": 37.4207, "step": 7491 }, { "epoch": 178.3820895522388, "grad_norm": 20.65064239501953, "learning_rate": 9.571428571428573e-06, "loss": 37.3706, "step": 7492 }, { "epoch": 178.40597014925373, "grad_norm": 30.815134048461914, "learning_rate": 9.570175438596491e-06, "loss": 38.8579, "step": 7493 }, { "epoch": 178.42985074626867, "grad_norm": 27.219388961791992, "learning_rate": 9.568922305764412e-06, "loss": 38.3453, "step": 7494 }, { "epoch": 178.45373134328358, "grad_norm": 33.38025665283203, "learning_rate": 9.567669172932332e-06, "loss": 36.6813, "step": 7495 }, { "epoch": 178.47761194029852, "grad_norm": 30.232894897460938, "learning_rate": 9.566416040100252e-06, "loss": 37.8682, "step": 7496 }, { "epoch": 178.50149253731342, "grad_norm": 29.54288673400879, "learning_rate": 9.565162907268171e-06, "loss": 39.1899, "step": 7497 }, { "epoch": 178.52537313432836, "grad_norm": 29.446496963500977, "learning_rate": 9.56390977443609e-06, "loss": 38.4056, "step": 7498 }, { "epoch": 178.54925373134327, "grad_norm": 30.845216751098633, "learning_rate": 9.56265664160401e-06, "loss": 38.3574, "step": 7499 }, { "epoch": 178.5731343283582, "grad_norm": 26.717031478881836, "learning_rate": 9.56140350877193e-06, "loss": 37.8946, "step": 7500 }, { "epoch": 178.59701492537314, "grad_norm": 31.20941925048828, "learning_rate": 9.560150375939851e-06, "loss": 38.3069, "step": 7501 }, { "epoch": 178.62089552238805, "grad_norm": 25.0770206451416, "learning_rate": 9.55889724310777e-06, "loss": 38.1639, "step": 7502 }, { "epoch": 178.644776119403, "grad_norm": 30.205888748168945, "learning_rate": 9.55764411027569e-06, "loss": 37.514, "step": 7503 }, { "epoch": 178.6686567164179, "grad_norm": 27.877737045288086, "learning_rate": 9.55639097744361e-06, "loss": 37.3937, "step": 7504 }, { "epoch": 178.69253731343284, "grad_norm": 31.21794319152832, "learning_rate": 9.55513784461153e-06, "loss": 38.6557, "step": 7505 }, { "epoch": 178.71641791044777, "grad_norm": 26.74827766418457, "learning_rate": 9.55388471177945e-06, "loss": 39.0042, "step": 7506 }, { "epoch": 178.74029850746268, "grad_norm": 32.50165939331055, "learning_rate": 9.552631578947369e-06, "loss": 38.5628, "step": 7507 }, { "epoch": 178.76417910447762, "grad_norm": 28.316530227661133, "learning_rate": 9.551378446115288e-06, "loss": 39.1192, "step": 7508 }, { "epoch": 178.78805970149253, "grad_norm": 26.695558547973633, "learning_rate": 9.550125313283208e-06, "loss": 37.2427, "step": 7509 }, { "epoch": 178.81194029850747, "grad_norm": 27.85847282409668, "learning_rate": 9.54887218045113e-06, "loss": 38.7848, "step": 7510 }, { "epoch": 178.83582089552237, "grad_norm": 30.937238693237305, "learning_rate": 9.547619047619049e-06, "loss": 37.194, "step": 7511 }, { "epoch": 178.8597014925373, "grad_norm": 26.466461181640625, "learning_rate": 9.546365914786969e-06, "loss": 38.8251, "step": 7512 }, { "epoch": 178.88358208955225, "grad_norm": 32.745391845703125, "learning_rate": 9.545112781954888e-06, "loss": 38.0742, "step": 7513 }, { "epoch": 178.90746268656716, "grad_norm": 29.391193389892578, "learning_rate": 9.543859649122808e-06, "loss": 38.6374, "step": 7514 }, { "epoch": 178.9313432835821, "grad_norm": 24.619367599487305, "learning_rate": 9.542606516290727e-06, "loss": 38.7896, "step": 7515 }, { "epoch": 178.955223880597, "grad_norm": 23.773025512695312, "learning_rate": 9.541353383458647e-06, "loss": 38.2214, "step": 7516 }, { "epoch": 178.97910447761194, "grad_norm": NaN, "learning_rate": 9.540100250626567e-06, "loss": 66.8795, "step": 7517 }, { "epoch": 179.0, "grad_norm": 26.397310256958008, "learning_rate": 9.540100250626567e-06, "loss": 34.0341, "step": 7518 }, { "epoch": 179.02388059701494, "grad_norm": 23.14042091369629, "learning_rate": 9.538847117794488e-06, "loss": 38.0085, "step": 7519 }, { "epoch": 179.04776119402985, "grad_norm": 34.07661437988281, "learning_rate": 9.537593984962407e-06, "loss": 38.3437, "step": 7520 }, { "epoch": 179.07164179104478, "grad_norm": 31.97378921508789, "learning_rate": 9.536340852130327e-06, "loss": 39.0287, "step": 7521 }, { "epoch": 179.0955223880597, "grad_norm": 26.95208740234375, "learning_rate": 9.535087719298247e-06, "loss": 38.9776, "step": 7522 }, { "epoch": 179.11940298507463, "grad_norm": 25.850631713867188, "learning_rate": 9.533834586466166e-06, "loss": 37.6615, "step": 7523 }, { "epoch": 179.14328358208957, "grad_norm": 29.238176345825195, "learning_rate": 9.532581453634086e-06, "loss": 39.0653, "step": 7524 }, { "epoch": 179.16716417910447, "grad_norm": 25.66439437866211, "learning_rate": 9.531328320802005e-06, "loss": 38.5403, "step": 7525 }, { "epoch": 179.1910447761194, "grad_norm": 27.784435272216797, "learning_rate": 9.530075187969927e-06, "loss": 38.501, "step": 7526 }, { "epoch": 179.21492537313432, "grad_norm": 27.187753677368164, "learning_rate": 9.528822055137845e-06, "loss": 38.6573, "step": 7527 }, { "epoch": 179.23880597014926, "grad_norm": 29.330095291137695, "learning_rate": 9.527568922305766e-06, "loss": 38.9352, "step": 7528 }, { "epoch": 179.26268656716417, "grad_norm": 28.7341365814209, "learning_rate": 9.526315789473684e-06, "loss": 37.9328, "step": 7529 }, { "epoch": 179.2865671641791, "grad_norm": 28.210481643676758, "learning_rate": 9.525062656641605e-06, "loss": 37.826, "step": 7530 }, { "epoch": 179.31044776119404, "grad_norm": 25.721118927001953, "learning_rate": 9.523809523809525e-06, "loss": 36.9992, "step": 7531 }, { "epoch": 179.33432835820895, "grad_norm": 29.90156364440918, "learning_rate": 9.522556390977444e-06, "loss": 38.8308, "step": 7532 }, { "epoch": 179.3582089552239, "grad_norm": 23.75611114501953, "learning_rate": 9.521303258145364e-06, "loss": 37.236, "step": 7533 }, { "epoch": 179.3820895522388, "grad_norm": 28.6463565826416, "learning_rate": 9.520050125313284e-06, "loss": 37.9837, "step": 7534 }, { "epoch": 179.40597014925373, "grad_norm": 22.911027908325195, "learning_rate": 9.518796992481205e-06, "loss": 37.6443, "step": 7535 }, { "epoch": 179.42985074626867, "grad_norm": 32.341163635253906, "learning_rate": 9.517543859649123e-06, "loss": 38.261, "step": 7536 }, { "epoch": 179.45373134328358, "grad_norm": 25.994626998901367, "learning_rate": 9.516290726817044e-06, "loss": 37.7336, "step": 7537 }, { "epoch": 179.47761194029852, "grad_norm": 32.042869567871094, "learning_rate": 9.515037593984964e-06, "loss": 38.5627, "step": 7538 }, { "epoch": 179.50149253731342, "grad_norm": 28.385757446289062, "learning_rate": 9.513784461152883e-06, "loss": 38.0611, "step": 7539 }, { "epoch": 179.52537313432836, "grad_norm": 26.522703170776367, "learning_rate": 9.512531328320803e-06, "loss": 38.3019, "step": 7540 }, { "epoch": 179.54925373134327, "grad_norm": 25.935222625732422, "learning_rate": 9.511278195488722e-06, "loss": 37.8566, "step": 7541 }, { "epoch": 179.5731343283582, "grad_norm": 30.307241439819336, "learning_rate": 9.510025062656642e-06, "loss": 38.7433, "step": 7542 }, { "epoch": 179.59701492537314, "grad_norm": 25.107316970825195, "learning_rate": 9.508771929824562e-06, "loss": 36.395, "step": 7543 }, { "epoch": 179.62089552238805, "grad_norm": 32.13312530517578, "learning_rate": 9.507518796992481e-06, "loss": 38.4197, "step": 7544 }, { "epoch": 179.644776119403, "grad_norm": 28.332002639770508, "learning_rate": 9.506265664160403e-06, "loss": 37.7446, "step": 7545 }, { "epoch": 179.6686567164179, "grad_norm": 28.015735626220703, "learning_rate": 9.505012531328322e-06, "loss": 38.1281, "step": 7546 }, { "epoch": 179.69253731343284, "grad_norm": 26.351720809936523, "learning_rate": 9.503759398496242e-06, "loss": 38.4677, "step": 7547 }, { "epoch": 179.71641791044777, "grad_norm": 28.444782257080078, "learning_rate": 9.502506265664161e-06, "loss": 37.1134, "step": 7548 }, { "epoch": 179.74029850746268, "grad_norm": 23.317214965820312, "learning_rate": 9.501253132832081e-06, "loss": 36.6965, "step": 7549 }, { "epoch": 179.76417910447762, "grad_norm": 35.22730255126953, "learning_rate": 9.5e-06, "loss": 38.5933, "step": 7550 }, { "epoch": 179.78805970149253, "grad_norm": 28.624221801757812, "learning_rate": 9.49874686716792e-06, "loss": 38.145, "step": 7551 }, { "epoch": 179.81194029850747, "grad_norm": 34.93153381347656, "learning_rate": 9.497493734335841e-06, "loss": 38.0996, "step": 7552 }, { "epoch": 179.83582089552237, "grad_norm": 30.9583797454834, "learning_rate": 9.49624060150376e-06, "loss": 38.9154, "step": 7553 }, { "epoch": 179.8597014925373, "grad_norm": NaN, "learning_rate": 9.49498746867168e-06, "loss": 51.9797, "step": 7554 }, { "epoch": 179.88358208955225, "grad_norm": 27.24198341369629, "learning_rate": 9.49498746867168e-06, "loss": 38.1387, "step": 7555 }, { "epoch": 179.90746268656716, "grad_norm": 28.509775161743164, "learning_rate": 9.4937343358396e-06, "loss": 38.1909, "step": 7556 }, { "epoch": 179.9313432835821, "grad_norm": 27.539745330810547, "learning_rate": 9.49248120300752e-06, "loss": 37.5785, "step": 7557 }, { "epoch": 179.955223880597, "grad_norm": 23.719282150268555, "learning_rate": 9.49122807017544e-06, "loss": 38.5756, "step": 7558 }, { "epoch": 179.97910447761194, "grad_norm": 32.51416015625, "learning_rate": 9.489974937343359e-06, "loss": 38.3331, "step": 7559 }, { "epoch": 180.0, "grad_norm": 21.694049835205078, "learning_rate": 9.488721804511279e-06, "loss": 34.0386, "step": 7560 }, { "epoch": 180.02388059701494, "grad_norm": 34.182586669921875, "learning_rate": 9.487468671679198e-06, "loss": 38.373, "step": 7561 }, { "epoch": 180.04776119402985, "grad_norm": 28.496118545532227, "learning_rate": 9.48621553884712e-06, "loss": 37.3118, "step": 7562 }, { "epoch": 180.07164179104478, "grad_norm": 30.934417724609375, "learning_rate": 9.484962406015037e-06, "loss": 37.2624, "step": 7563 }, { "epoch": 180.0955223880597, "grad_norm": 26.485118865966797, "learning_rate": 9.483709273182959e-06, "loss": 37.9195, "step": 7564 }, { "epoch": 180.11940298507463, "grad_norm": 27.35857391357422, "learning_rate": 9.482456140350878e-06, "loss": 39.061, "step": 7565 }, { "epoch": 180.14328358208957, "grad_norm": 24.543331146240234, "learning_rate": 9.481203007518798e-06, "loss": 38.2442, "step": 7566 }, { "epoch": 180.16716417910447, "grad_norm": NaN, "learning_rate": 9.479949874686717e-06, "loss": 62.5496, "step": 7567 }, { "epoch": 180.1910447761194, "grad_norm": 31.08120346069336, "learning_rate": 9.479949874686717e-06, "loss": 38.2847, "step": 7568 }, { "epoch": 180.21492537313432, "grad_norm": 24.63750457763672, "learning_rate": 9.478696741854637e-06, "loss": 37.3386, "step": 7569 }, { "epoch": 180.23880597014926, "grad_norm": 31.72042465209961, "learning_rate": 9.477443609022557e-06, "loss": 38.5061, "step": 7570 }, { "epoch": 180.26268656716417, "grad_norm": 28.289594650268555, "learning_rate": 9.476190476190476e-06, "loss": 39.2517, "step": 7571 }, { "epoch": 180.2865671641791, "grad_norm": 28.50221824645996, "learning_rate": 9.474937343358398e-06, "loss": 36.671, "step": 7572 }, { "epoch": 180.31044776119404, "grad_norm": 25.799354553222656, "learning_rate": 9.473684210526315e-06, "loss": 38.8831, "step": 7573 }, { "epoch": 180.33432835820895, "grad_norm": 32.123512268066406, "learning_rate": 9.472431077694237e-06, "loss": 38.4682, "step": 7574 }, { "epoch": 180.3582089552239, "grad_norm": 27.540674209594727, "learning_rate": 9.471177944862156e-06, "loss": 38.3273, "step": 7575 }, { "epoch": 180.3820895522388, "grad_norm": NaN, "learning_rate": 9.469924812030076e-06, "loss": 42.6944, "step": 7576 }, { "epoch": 180.40597014925373, "grad_norm": 31.53324317932129, "learning_rate": 9.469924812030076e-06, "loss": 37.8429, "step": 7577 }, { "epoch": 180.42985074626867, "grad_norm": 28.76616859436035, "learning_rate": 9.468671679197996e-06, "loss": 38.9031, "step": 7578 }, { "epoch": 180.45373134328358, "grad_norm": 27.742734909057617, "learning_rate": 9.467418546365915e-06, "loss": 38.2788, "step": 7579 }, { "epoch": 180.47761194029852, "grad_norm": 23.84189224243164, "learning_rate": 9.466165413533835e-06, "loss": 37.3147, "step": 7580 }, { "epoch": 180.50149253731342, "grad_norm": 27.617036819458008, "learning_rate": 9.464912280701754e-06, "loss": 38.3207, "step": 7581 }, { "epoch": 180.52537313432836, "grad_norm": 23.339120864868164, "learning_rate": 9.463659147869676e-06, "loss": 37.9027, "step": 7582 }, { "epoch": 180.54925373134327, "grad_norm": 28.931547164916992, "learning_rate": 9.462406015037595e-06, "loss": 37.7769, "step": 7583 }, { "epoch": 180.5731343283582, "grad_norm": 24.628896713256836, "learning_rate": 9.461152882205515e-06, "loss": 38.4154, "step": 7584 }, { "epoch": 180.59701492537314, "grad_norm": 26.340177536010742, "learning_rate": 9.459899749373434e-06, "loss": 36.9641, "step": 7585 }, { "epoch": 180.62089552238805, "grad_norm": 22.31771469116211, "learning_rate": 9.458646616541354e-06, "loss": 37.9069, "step": 7586 }, { "epoch": 180.644776119403, "grad_norm": 24.355815887451172, "learning_rate": 9.457393483709274e-06, "loss": 37.5167, "step": 7587 }, { "epoch": 180.6686567164179, "grad_norm": 22.45478057861328, "learning_rate": 9.456140350877195e-06, "loss": 37.0763, "step": 7588 }, { "epoch": 180.69253731343284, "grad_norm": 21.712766647338867, "learning_rate": 9.454887218045113e-06, "loss": 37.7854, "step": 7589 }, { "epoch": 180.71641791044777, "grad_norm": 20.127975463867188, "learning_rate": 9.453634085213034e-06, "loss": 38.1377, "step": 7590 }, { "epoch": 180.74029850746268, "grad_norm": 19.657285690307617, "learning_rate": 9.452380952380952e-06, "loss": 39.2664, "step": 7591 }, { "epoch": 180.76417910447762, "grad_norm": 18.618865966796875, "learning_rate": 9.451127819548873e-06, "loss": 38.2197, "step": 7592 }, { "epoch": 180.78805970149253, "grad_norm": 14.82322883605957, "learning_rate": 9.449874686716793e-06, "loss": 38.8792, "step": 7593 }, { "epoch": 180.81194029850747, "grad_norm": 23.297121047973633, "learning_rate": 9.448621553884713e-06, "loss": 37.819, "step": 7594 }, { "epoch": 180.83582089552237, "grad_norm": 18.612077713012695, "learning_rate": 9.447368421052632e-06, "loss": 37.756, "step": 7595 }, { "epoch": 180.8597014925373, "grad_norm": 22.1790771484375, "learning_rate": 9.446115288220552e-06, "loss": 38.2304, "step": 7596 }, { "epoch": 180.88358208955225, "grad_norm": 18.968181610107422, "learning_rate": 9.444862155388473e-06, "loss": 38.8481, "step": 7597 }, { "epoch": 180.90746268656716, "grad_norm": 19.16139793395996, "learning_rate": 9.443609022556391e-06, "loss": 37.887, "step": 7598 }, { "epoch": 180.9313432835821, "grad_norm": 24.370647430419922, "learning_rate": 9.442355889724312e-06, "loss": 38.9882, "step": 7599 }, { "epoch": 180.955223880597, "grad_norm": 19.582992553710938, "learning_rate": 9.44110275689223e-06, "loss": 39.416, "step": 7600 }, { "epoch": 180.97910447761194, "grad_norm": 32.500946044921875, "learning_rate": 9.439849624060151e-06, "loss": 37.0235, "step": 7601 }, { "epoch": 181.0, "grad_norm": 22.320682525634766, "learning_rate": 9.438596491228071e-06, "loss": 34.5362, "step": 7602 }, { "epoch": 181.02388059701494, "grad_norm": 34.26927947998047, "learning_rate": 9.43734335839599e-06, "loss": 38.3729, "step": 7603 }, { "epoch": 181.04776119402985, "grad_norm": NaN, "learning_rate": 9.43609022556391e-06, "loss": 41.5496, "step": 7604 }, { "epoch": 181.07164179104478, "grad_norm": 33.34910202026367, "learning_rate": 9.43609022556391e-06, "loss": 37.429, "step": 7605 }, { "epoch": 181.0955223880597, "grad_norm": 31.405717849731445, "learning_rate": 9.43483709273183e-06, "loss": 37.9895, "step": 7606 }, { "epoch": 181.11940298507463, "grad_norm": 29.484378814697266, "learning_rate": 9.43358395989975e-06, "loss": 38.1531, "step": 7607 }, { "epoch": 181.14328358208957, "grad_norm": 27.070419311523438, "learning_rate": 9.432330827067669e-06, "loss": 37.4599, "step": 7608 }, { "epoch": 181.16716417910447, "grad_norm": 29.64767837524414, "learning_rate": 9.43107769423559e-06, "loss": 39.2385, "step": 7609 }, { "epoch": 181.1910447761194, "grad_norm": 30.908058166503906, "learning_rate": 9.42982456140351e-06, "loss": 38.8913, "step": 7610 }, { "epoch": 181.21492537313432, "grad_norm": 25.773351669311523, "learning_rate": 9.42857142857143e-06, "loss": 38.6503, "step": 7611 }, { "epoch": 181.23880597014926, "grad_norm": 30.587566375732422, "learning_rate": 9.427318295739349e-06, "loss": 38.9265, "step": 7612 }, { "epoch": 181.26268656716417, "grad_norm": 29.84368324279785, "learning_rate": 9.426065162907269e-06, "loss": 38.2391, "step": 7613 }, { "epoch": 181.2865671641791, "grad_norm": 31.73255157470703, "learning_rate": 9.424812030075188e-06, "loss": 37.3281, "step": 7614 }, { "epoch": 181.31044776119404, "grad_norm": 24.846481323242188, "learning_rate": 9.423558897243108e-06, "loss": 38.7828, "step": 7615 }, { "epoch": 181.33432835820895, "grad_norm": 29.050628662109375, "learning_rate": 9.422305764411028e-06, "loss": 38.2192, "step": 7616 }, { "epoch": 181.3582089552239, "grad_norm": 22.826169967651367, "learning_rate": 9.421052631578949e-06, "loss": 37.6121, "step": 7617 }, { "epoch": 181.3820895522388, "grad_norm": 30.73110008239746, "learning_rate": 9.419799498746868e-06, "loss": 39.2107, "step": 7618 }, { "epoch": 181.40597014925373, "grad_norm": 25.17683219909668, "learning_rate": 9.418546365914788e-06, "loss": 38.7411, "step": 7619 }, { "epoch": 181.42985074626867, "grad_norm": 30.565523147583008, "learning_rate": 9.417293233082708e-06, "loss": 37.3072, "step": 7620 }, { "epoch": 181.45373134328358, "grad_norm": 23.134422302246094, "learning_rate": 9.416040100250627e-06, "loss": 38.6556, "step": 7621 }, { "epoch": 181.47761194029852, "grad_norm": 29.73345375061035, "learning_rate": 9.414786967418547e-06, "loss": 37.9902, "step": 7622 }, { "epoch": 181.50149253731342, "grad_norm": 27.748497009277344, "learning_rate": 9.413533834586466e-06, "loss": 38.4888, "step": 7623 }, { "epoch": 181.52537313432836, "grad_norm": 29.086557388305664, "learning_rate": 9.412280701754388e-06, "loss": 38.0586, "step": 7624 }, { "epoch": 181.54925373134327, "grad_norm": 24.033424377441406, "learning_rate": 9.411027568922306e-06, "loss": 39.1418, "step": 7625 }, { "epoch": 181.5731343283582, "grad_norm": 31.593238830566406, "learning_rate": 9.409774436090227e-06, "loss": 39.2994, "step": 7626 }, { "epoch": 181.59701492537314, "grad_norm": 24.30849838256836, "learning_rate": 9.408521303258147e-06, "loss": 37.4472, "step": 7627 }, { "epoch": 181.62089552238805, "grad_norm": 33.19766616821289, "learning_rate": 9.407268170426066e-06, "loss": 38.1845, "step": 7628 }, { "epoch": 181.644776119403, "grad_norm": 24.544702529907227, "learning_rate": 9.406015037593986e-06, "loss": 37.094, "step": 7629 }, { "epoch": 181.6686567164179, "grad_norm": 31.14695167541504, "learning_rate": 9.404761904761905e-06, "loss": 38.9074, "step": 7630 }, { "epoch": 181.69253731343284, "grad_norm": 28.016338348388672, "learning_rate": 9.403508771929825e-06, "loss": 38.6535, "step": 7631 }, { "epoch": 181.71641791044777, "grad_norm": 31.826282501220703, "learning_rate": 9.402255639097745e-06, "loss": 38.6819, "step": 7632 }, { "epoch": 181.74029850746268, "grad_norm": 24.161087036132812, "learning_rate": 9.401002506265666e-06, "loss": 37.7841, "step": 7633 }, { "epoch": 181.76417910447762, "grad_norm": 27.22321128845215, "learning_rate": 9.399749373433584e-06, "loss": 37.2927, "step": 7634 }, { "epoch": 181.78805970149253, "grad_norm": 23.210390090942383, "learning_rate": 9.398496240601505e-06, "loss": 37.9316, "step": 7635 }, { "epoch": 181.81194029850747, "grad_norm": 23.418764114379883, "learning_rate": 9.397243107769425e-06, "loss": 37.1737, "step": 7636 }, { "epoch": 181.83582089552237, "grad_norm": 20.593555450439453, "learning_rate": 9.395989974937344e-06, "loss": 38.3443, "step": 7637 }, { "epoch": 181.8597014925373, "grad_norm": 26.723350524902344, "learning_rate": 9.394736842105264e-06, "loss": 38.5668, "step": 7638 }, { "epoch": 181.88358208955225, "grad_norm": 21.229936599731445, "learning_rate": 9.393483709273183e-06, "loss": 38.2915, "step": 7639 }, { "epoch": 181.90746268656716, "grad_norm": 25.924835205078125, "learning_rate": 9.392230576441103e-06, "loss": 38.9211, "step": 7640 }, { "epoch": 181.9313432835821, "grad_norm": 19.696718215942383, "learning_rate": 9.390977443609023e-06, "loss": 38.486, "step": 7641 }, { "epoch": 181.955223880597, "grad_norm": 22.874286651611328, "learning_rate": 9.389724310776944e-06, "loss": 37.2433, "step": 7642 }, { "epoch": 181.97910447761194, "grad_norm": 18.583221435546875, "learning_rate": 9.388471177944863e-06, "loss": 37.7967, "step": 7643 }, { "epoch": 182.0, "grad_norm": 20.269168853759766, "learning_rate": 9.387218045112783e-06, "loss": 31.9012, "step": 7644 }, { "epoch": 182.02388059701494, "grad_norm": 19.068851470947266, "learning_rate": 9.385964912280703e-06, "loss": 36.8886, "step": 7645 }, { "epoch": 182.04776119402985, "grad_norm": 19.43038558959961, "learning_rate": 9.384711779448622e-06, "loss": 38.2625, "step": 7646 }, { "epoch": 182.07164179104478, "grad_norm": 18.73162269592285, "learning_rate": 9.383458646616542e-06, "loss": 38.9788, "step": 7647 }, { "epoch": 182.0955223880597, "grad_norm": 21.09528923034668, "learning_rate": 9.382205513784461e-06, "loss": 37.1233, "step": 7648 }, { "epoch": 182.11940298507463, "grad_norm": 20.089250564575195, "learning_rate": 9.380952380952381e-06, "loss": 38.2381, "step": 7649 }, { "epoch": 182.14328358208957, "grad_norm": 17.421754837036133, "learning_rate": 9.379699248120302e-06, "loss": 37.9993, "step": 7650 }, { "epoch": 182.16716417910447, "grad_norm": 18.992149353027344, "learning_rate": 9.378446115288222e-06, "loss": 37.3638, "step": 7651 }, { "epoch": 182.1910447761194, "grad_norm": 15.953680038452148, "learning_rate": 9.377192982456142e-06, "loss": 38.0116, "step": 7652 }, { "epoch": 182.21492537313432, "grad_norm": 20.523019790649414, "learning_rate": 9.375939849624061e-06, "loss": 38.9804, "step": 7653 }, { "epoch": 182.23880597014926, "grad_norm": 15.280717849731445, "learning_rate": 9.37468671679198e-06, "loss": 37.5246, "step": 7654 }, { "epoch": 182.26268656716417, "grad_norm": 20.681921005249023, "learning_rate": 9.3734335839599e-06, "loss": 36.9325, "step": 7655 }, { "epoch": 182.2865671641791, "grad_norm": 17.027320861816406, "learning_rate": 9.37218045112782e-06, "loss": 38.0196, "step": 7656 }, { "epoch": 182.31044776119404, "grad_norm": 20.468412399291992, "learning_rate": 9.370927318295741e-06, "loss": 39.457, "step": 7657 }, { "epoch": 182.33432835820895, "grad_norm": 18.735979080200195, "learning_rate": 9.36967418546366e-06, "loss": 38.0952, "step": 7658 }, { "epoch": 182.3582089552239, "grad_norm": NaN, "learning_rate": 9.36842105263158e-06, "loss": 43.9235, "step": 7659 }, { "epoch": 182.3820895522388, "grad_norm": 26.147253036499023, "learning_rate": 9.36842105263158e-06, "loss": 38.8342, "step": 7660 }, { "epoch": 182.40597014925373, "grad_norm": 30.72784423828125, "learning_rate": 9.367167919799498e-06, "loss": 38.1475, "step": 7661 }, { "epoch": 182.42985074626867, "grad_norm": 23.31533432006836, "learning_rate": 9.36591478696742e-06, "loss": 37.7839, "step": 7662 }, { "epoch": 182.45373134328358, "grad_norm": 28.856985092163086, "learning_rate": 9.36466165413534e-06, "loss": 38.5783, "step": 7663 }, { "epoch": 182.47761194029852, "grad_norm": 24.6004581451416, "learning_rate": 9.363408521303259e-06, "loss": 39.4207, "step": 7664 }, { "epoch": 182.50149253731342, "grad_norm": 24.046371459960938, "learning_rate": 9.362155388471178e-06, "loss": 38.2667, "step": 7665 }, { "epoch": 182.52537313432836, "grad_norm": 26.086563110351562, "learning_rate": 9.360902255639098e-06, "loss": 37.57, "step": 7666 }, { "epoch": 182.54925373134327, "grad_norm": 19.401077270507812, "learning_rate": 9.35964912280702e-06, "loss": 37.6523, "step": 7667 }, { "epoch": 182.5731343283582, "grad_norm": 26.627574920654297, "learning_rate": 9.358395989974937e-06, "loss": 37.9341, "step": 7668 }, { "epoch": 182.59701492537314, "grad_norm": 21.351564407348633, "learning_rate": 9.357142857142859e-06, "loss": 37.7288, "step": 7669 }, { "epoch": 182.62089552238805, "grad_norm": 15.088356018066406, "learning_rate": 9.355889724310778e-06, "loss": 37.764, "step": 7670 }, { "epoch": 182.644776119403, "grad_norm": 19.552310943603516, "learning_rate": 9.354636591478698e-06, "loss": 37.6055, "step": 7671 }, { "epoch": 182.6686567164179, "grad_norm": 17.138029098510742, "learning_rate": 9.353383458646617e-06, "loss": 38.8954, "step": 7672 }, { "epoch": 182.69253731343284, "grad_norm": 20.236984252929688, "learning_rate": 9.352130325814537e-06, "loss": 38.7094, "step": 7673 }, { "epoch": 182.71641791044777, "grad_norm": 17.15636444091797, "learning_rate": 9.350877192982457e-06, "loss": 38.7631, "step": 7674 }, { "epoch": 182.74029850746268, "grad_norm": 14.976155281066895, "learning_rate": 9.349624060150376e-06, "loss": 37.9882, "step": 7675 }, { "epoch": 182.76417910447762, "grad_norm": 17.57347297668457, "learning_rate": 9.348370927318296e-06, "loss": 38.4724, "step": 7676 }, { "epoch": 182.78805970149253, "grad_norm": 19.05532455444336, "learning_rate": 9.347117794486217e-06, "loss": 37.4343, "step": 7677 }, { "epoch": 182.81194029850747, "grad_norm": 20.01117515563965, "learning_rate": 9.345864661654137e-06, "loss": 39.2464, "step": 7678 }, { "epoch": 182.83582089552237, "grad_norm": 15.85883903503418, "learning_rate": 9.344611528822056e-06, "loss": 37.8886, "step": 7679 }, { "epoch": 182.8597014925373, "grad_norm": 16.31858253479004, "learning_rate": 9.343358395989976e-06, "loss": 38.2813, "step": 7680 }, { "epoch": 182.88358208955225, "grad_norm": 21.91824722290039, "learning_rate": 9.342105263157895e-06, "loss": 38.7016, "step": 7681 }, { "epoch": 182.90746268656716, "grad_norm": 13.628788948059082, "learning_rate": 9.340852130325815e-06, "loss": 38.281, "step": 7682 }, { "epoch": 182.9313432835821, "grad_norm": 18.080459594726562, "learning_rate": 9.339598997493735e-06, "loss": 39.22, "step": 7683 }, { "epoch": 182.955223880597, "grad_norm": 22.61056137084961, "learning_rate": 9.338345864661656e-06, "loss": 39.4347, "step": 7684 }, { "epoch": 182.97910447761194, "grad_norm": 18.271526336669922, "learning_rate": 9.337092731829574e-06, "loss": 39.2384, "step": 7685 }, { "epoch": 183.0, "grad_norm": 15.15306568145752, "learning_rate": 9.335839598997495e-06, "loss": 34.1513, "step": 7686 }, { "epoch": 183.02388059701494, "grad_norm": 28.156301498413086, "learning_rate": 9.334586466165415e-06, "loss": 37.1129, "step": 7687 }, { "epoch": 183.04776119402985, "grad_norm": 17.79732894897461, "learning_rate": 9.333333333333334e-06, "loss": 38.5227, "step": 7688 }, { "epoch": 183.07164179104478, "grad_norm": 18.45402717590332, "learning_rate": 9.332080200501254e-06, "loss": 38.492, "step": 7689 }, { "epoch": 183.0955223880597, "grad_norm": 25.375320434570312, "learning_rate": 9.330827067669174e-06, "loss": 38.8671, "step": 7690 }, { "epoch": 183.11940298507463, "grad_norm": 16.20077896118164, "learning_rate": 9.329573934837093e-06, "loss": 38.1921, "step": 7691 }, { "epoch": 183.14328358208957, "grad_norm": 23.60683822631836, "learning_rate": 9.328320802005013e-06, "loss": 38.3588, "step": 7692 }, { "epoch": 183.16716417910447, "grad_norm": 28.53430938720703, "learning_rate": 9.327067669172934e-06, "loss": 38.4831, "step": 7693 }, { "epoch": 183.1910447761194, "grad_norm": 19.33002281188965, "learning_rate": 9.325814536340852e-06, "loss": 37.6731, "step": 7694 }, { "epoch": 183.21492537313432, "grad_norm": 35.12484359741211, "learning_rate": 9.324561403508773e-06, "loss": 38.6068, "step": 7695 }, { "epoch": 183.23880597014926, "grad_norm": 25.615224838256836, "learning_rate": 9.323308270676693e-06, "loss": 40.1274, "step": 7696 }, { "epoch": 183.26268656716417, "grad_norm": 38.18474197387695, "learning_rate": 9.322055137844612e-06, "loss": 38.6643, "step": 7697 }, { "epoch": 183.2865671641791, "grad_norm": 25.419836044311523, "learning_rate": 9.320802005012532e-06, "loss": 39.6969, "step": 7698 }, { "epoch": 183.31044776119404, "grad_norm": 40.90986251831055, "learning_rate": 9.319548872180452e-06, "loss": 37.8611, "step": 7699 }, { "epoch": 183.33432835820895, "grad_norm": 40.589378356933594, "learning_rate": 9.318295739348371e-06, "loss": 39.3673, "step": 7700 }, { "epoch": 183.3582089552239, "grad_norm": 34.87507629394531, "learning_rate": 9.31704260651629e-06, "loss": 39.072, "step": 7701 }, { "epoch": 183.3820895522388, "grad_norm": 35.49257278442383, "learning_rate": 9.315789473684212e-06, "loss": 38.231, "step": 7702 }, { "epoch": 183.40597014925373, "grad_norm": 31.80084991455078, "learning_rate": 9.31453634085213e-06, "loss": 39.5215, "step": 7703 }, { "epoch": 183.42985074626867, "grad_norm": 32.01988983154297, "learning_rate": 9.313283208020051e-06, "loss": 38.3811, "step": 7704 }, { "epoch": 183.45373134328358, "grad_norm": 32.97187042236328, "learning_rate": 9.312030075187971e-06, "loss": 37.529, "step": 7705 }, { "epoch": 183.47761194029852, "grad_norm": 32.333255767822266, "learning_rate": 9.31077694235589e-06, "loss": 38.386, "step": 7706 }, { "epoch": 183.50149253731342, "grad_norm": 31.905254364013672, "learning_rate": 9.30952380952381e-06, "loss": 39.7536, "step": 7707 }, { "epoch": 183.52537313432836, "grad_norm": 26.036340713500977, "learning_rate": 9.30827067669173e-06, "loss": 39.0496, "step": 7708 }, { "epoch": 183.54925373134327, "grad_norm": 31.00899887084961, "learning_rate": 9.30701754385965e-06, "loss": 37.6582, "step": 7709 }, { "epoch": 183.5731343283582, "grad_norm": 23.661453247070312, "learning_rate": 9.30576441102757e-06, "loss": 38.5868, "step": 7710 }, { "epoch": 183.59701492537314, "grad_norm": 35.26527404785156, "learning_rate": 9.30451127819549e-06, "loss": 36.9418, "step": 7711 }, { "epoch": 183.62089552238805, "grad_norm": 30.152225494384766, "learning_rate": 9.30325814536341e-06, "loss": 37.9041, "step": 7712 }, { "epoch": 183.644776119403, "grad_norm": 36.04405212402344, "learning_rate": 9.30200501253133e-06, "loss": 37.8787, "step": 7713 }, { "epoch": 183.6686567164179, "grad_norm": 32.55191421508789, "learning_rate": 9.300751879699249e-06, "loss": 38.5805, "step": 7714 }, { "epoch": 183.69253731343284, "grad_norm": 35.73372268676758, "learning_rate": 9.299498746867169e-06, "loss": 39.6069, "step": 7715 }, { "epoch": 183.71641791044777, "grad_norm": 30.653011322021484, "learning_rate": 9.298245614035088e-06, "loss": 38.2053, "step": 7716 }, { "epoch": 183.74029850746268, "grad_norm": 34.98927688598633, "learning_rate": 9.29699248120301e-06, "loss": 38.7874, "step": 7717 }, { "epoch": 183.76417910447762, "grad_norm": 27.018739700317383, "learning_rate": 9.295739348370927e-06, "loss": 38.1713, "step": 7718 }, { "epoch": 183.78805970149253, "grad_norm": 37.969173431396484, "learning_rate": 9.294486215538849e-06, "loss": 39.1804, "step": 7719 }, { "epoch": 183.81194029850747, "grad_norm": 35.38280487060547, "learning_rate": 9.293233082706767e-06, "loss": 38.5821, "step": 7720 }, { "epoch": 183.83582089552237, "grad_norm": 31.927392959594727, "learning_rate": 9.291979949874688e-06, "loss": 38.0057, "step": 7721 }, { "epoch": 183.8597014925373, "grad_norm": 32.185203552246094, "learning_rate": 9.290726817042607e-06, "loss": 38.5497, "step": 7722 }, { "epoch": 183.88358208955225, "grad_norm": 32.241790771484375, "learning_rate": 9.289473684210527e-06, "loss": 37.8311, "step": 7723 }, { "epoch": 183.90746268656716, "grad_norm": 30.077545166015625, "learning_rate": 9.288220551378447e-06, "loss": 38.8007, "step": 7724 }, { "epoch": 183.9313432835821, "grad_norm": 35.88338088989258, "learning_rate": 9.286967418546366e-06, "loss": 38.5947, "step": 7725 }, { "epoch": 183.955223880597, "grad_norm": 31.73858642578125, "learning_rate": 9.285714285714288e-06, "loss": 37.8341, "step": 7726 }, { "epoch": 183.97910447761194, "grad_norm": 29.405078887939453, "learning_rate": 9.284461152882205e-06, "loss": 37.7245, "step": 7727 }, { "epoch": 184.0, "grad_norm": NaN, "learning_rate": 9.283208020050127e-06, "loss": 56.4104, "step": 7728 }, { "epoch": 184.02388059701494, "grad_norm": 19.570993423461914, "learning_rate": 9.283208020050127e-06, "loss": 38.1128, "step": 7729 }, { "epoch": 184.04776119402985, "grad_norm": NaN, "learning_rate": 9.281954887218045e-06, "loss": 63.7892, "step": 7730 }, { "epoch": 184.07164179104478, "grad_norm": 48.01729965209961, "learning_rate": 9.281954887218045e-06, "loss": 39.5211, "step": 7731 }, { "epoch": 184.0955223880597, "grad_norm": 33.520503997802734, "learning_rate": 9.280701754385966e-06, "loss": 39.2581, "step": 7732 }, { "epoch": 184.11940298507463, "grad_norm": 40.728187561035156, "learning_rate": 9.279448621553886e-06, "loss": 40.5069, "step": 7733 }, { "epoch": 184.14328358208957, "grad_norm": 34.6091194152832, "learning_rate": 9.278195488721805e-06, "loss": 38.6713, "step": 7734 }, { "epoch": 184.16716417910447, "grad_norm": 27.991249084472656, "learning_rate": 9.276942355889725e-06, "loss": 38.9768, "step": 7735 }, { "epoch": 184.1910447761194, "grad_norm": 32.23847198486328, "learning_rate": 9.275689223057644e-06, "loss": 39.503, "step": 7736 }, { "epoch": 184.21492537313432, "grad_norm": 23.983753204345703, "learning_rate": 9.274436090225564e-06, "loss": 39.7654, "step": 7737 }, { "epoch": 184.23880597014926, "grad_norm": 33.7354736328125, "learning_rate": 9.273182957393484e-06, "loss": 40.2666, "step": 7738 }, { "epoch": 184.26268656716417, "grad_norm": 22.54912567138672, "learning_rate": 9.271929824561405e-06, "loss": 40.3684, "step": 7739 }, { "epoch": 184.2865671641791, "grad_norm": 31.727224349975586, "learning_rate": 9.270676691729324e-06, "loss": 39.9529, "step": 7740 }, { "epoch": 184.31044776119404, "grad_norm": 32.118106842041016, "learning_rate": 9.269423558897244e-06, "loss": 41.3572, "step": 7741 }, { "epoch": 184.33432835820895, "grad_norm": 19.489656448364258, "learning_rate": 9.268170426065164e-06, "loss": 39.6626, "step": 7742 }, { "epoch": 184.3582089552239, "grad_norm": 29.95058822631836, "learning_rate": 9.266917293233083e-06, "loss": 40.7756, "step": 7743 }, { "epoch": 184.3820895522388, "grad_norm": 22.743227005004883, "learning_rate": 9.265664160401003e-06, "loss": 40.0637, "step": 7744 }, { "epoch": 184.40597014925373, "grad_norm": 24.127614974975586, "learning_rate": 9.264411027568922e-06, "loss": 39.5272, "step": 7745 }, { "epoch": 184.42985074626867, "grad_norm": 32.89726257324219, "learning_rate": 9.263157894736842e-06, "loss": 40.244, "step": 7746 }, { "epoch": 184.45373134328358, "grad_norm": 20.5611629486084, "learning_rate": 9.261904761904763e-06, "loss": 39.9755, "step": 7747 }, { "epoch": 184.47761194029852, "grad_norm": 36.67335510253906, "learning_rate": 9.260651629072683e-06, "loss": 40.6738, "step": 7748 }, { "epoch": 184.50149253731342, "grad_norm": 27.706262588500977, "learning_rate": 9.259398496240603e-06, "loss": 40.3502, "step": 7749 }, { "epoch": 184.52537313432836, "grad_norm": 22.725189208984375, "learning_rate": 9.258145363408522e-06, "loss": 37.5332, "step": 7750 }, { "epoch": 184.54925373134327, "grad_norm": 40.575313568115234, "learning_rate": 9.256892230576442e-06, "loss": 40.0921, "step": 7751 }, { "epoch": 184.5731343283582, "grad_norm": 27.19171142578125, "learning_rate": 9.255639097744363e-06, "loss": 38.1854, "step": 7752 }, { "epoch": 184.59701492537314, "grad_norm": 30.067363739013672, "learning_rate": 9.254385964912281e-06, "loss": 40.1989, "step": 7753 }, { "epoch": 184.62089552238805, "grad_norm": 25.565664291381836, "learning_rate": 9.253132832080202e-06, "loss": 40.3723, "step": 7754 }, { "epoch": 184.644776119403, "grad_norm": 28.75983428955078, "learning_rate": 9.25187969924812e-06, "loss": 40.6146, "step": 7755 }, { "epoch": 184.6686567164179, "grad_norm": 20.13669776916504, "learning_rate": 9.250626566416041e-06, "loss": 41.4179, "step": 7756 }, { "epoch": 184.69253731343284, "grad_norm": 34.65123748779297, "learning_rate": 9.249373433583961e-06, "loss": 41.4522, "step": 7757 }, { "epoch": 184.71641791044777, "grad_norm": 29.887758255004883, "learning_rate": 9.24812030075188e-06, "loss": 41.7257, "step": 7758 }, { "epoch": 184.74029850746268, "grad_norm": 22.708446502685547, "learning_rate": 9.2468671679198e-06, "loss": 39.5245, "step": 7759 }, { "epoch": 184.76417910447762, "grad_norm": 27.760478973388672, "learning_rate": 9.24561403508772e-06, "loss": 40.1309, "step": 7760 }, { "epoch": 184.78805970149253, "grad_norm": 35.92546844482422, "learning_rate": 9.24436090225564e-06, "loss": 39.6421, "step": 7761 }, { "epoch": 184.81194029850747, "grad_norm": 18.31737518310547, "learning_rate": 9.243107769423559e-06, "loss": 39.7503, "step": 7762 }, { "epoch": 184.83582089552237, "grad_norm": 26.146255493164062, "learning_rate": 9.24185463659148e-06, "loss": 39.6416, "step": 7763 }, { "epoch": 184.8597014925373, "grad_norm": 21.460485458374023, "learning_rate": 9.240601503759398e-06, "loss": 40.1663, "step": 7764 }, { "epoch": 184.88358208955225, "grad_norm": 22.847776412963867, "learning_rate": 9.23934837092732e-06, "loss": 41.4605, "step": 7765 }, { "epoch": 184.90746268656716, "grad_norm": 21.51983070373535, "learning_rate": 9.238095238095239e-06, "loss": 38.5327, "step": 7766 }, { "epoch": 184.9313432835821, "grad_norm": 23.702680587768555, "learning_rate": 9.236842105263159e-06, "loss": 40.1406, "step": 7767 }, { "epoch": 184.955223880597, "grad_norm": 25.279239654541016, "learning_rate": 9.235588972431078e-06, "loss": 40.8743, "step": 7768 }, { "epoch": 184.97910447761194, "grad_norm": 33.16035842895508, "learning_rate": 9.234335839598998e-06, "loss": 39.785, "step": 7769 }, { "epoch": 185.0, "grad_norm": 22.09147834777832, "learning_rate": 9.233082706766918e-06, "loss": 34.7394, "step": 7770 }, { "epoch": 185.02388059701494, "grad_norm": 30.796735763549805, "learning_rate": 9.231829573934837e-06, "loss": 40.6943, "step": 7771 }, { "epoch": 185.04776119402985, "grad_norm": 32.15016555786133, "learning_rate": 9.230576441102758e-06, "loss": 40.4672, "step": 7772 }, { "epoch": 185.07164179104478, "grad_norm": 22.353782653808594, "learning_rate": 9.229323308270678e-06, "loss": 41.4305, "step": 7773 }, { "epoch": 185.0955223880597, "grad_norm": 53.881473541259766, "learning_rate": 9.228070175438598e-06, "loss": 38.3879, "step": 7774 }, { "epoch": 185.11940298507463, "grad_norm": 39.897361755371094, "learning_rate": 9.226817042606517e-06, "loss": 41.0108, "step": 7775 }, { "epoch": 185.14328358208957, "grad_norm": NaN, "learning_rate": 9.225563909774437e-06, "loss": 51.0282, "step": 7776 }, { "epoch": 185.16716417910447, "grad_norm": 33.82575988769531, "learning_rate": 9.225563909774437e-06, "loss": 40.394, "step": 7777 }, { "epoch": 185.1910447761194, "grad_norm": 82.26239776611328, "learning_rate": 9.224310776942356e-06, "loss": 43.2198, "step": 7778 }, { "epoch": 185.21492537313432, "grad_norm": 73.5898208618164, "learning_rate": 9.223057644110276e-06, "loss": 41.9718, "step": 7779 }, { "epoch": 185.23880597014926, "grad_norm": 54.194740295410156, "learning_rate": 9.221804511278196e-06, "loss": 42.3892, "step": 7780 }, { "epoch": 185.26268656716417, "grad_norm": 64.35474395751953, "learning_rate": 9.220551378446117e-06, "loss": 41.4895, "step": 7781 }, { "epoch": 185.2865671641791, "grad_norm": 52.19340515136719, "learning_rate": 9.219298245614035e-06, "loss": 41.6825, "step": 7782 }, { "epoch": 185.31044776119404, "grad_norm": 49.811134338378906, "learning_rate": 9.218045112781956e-06, "loss": 42.6462, "step": 7783 }, { "epoch": 185.33432835820895, "grad_norm": 40.66097640991211, "learning_rate": 9.216791979949876e-06, "loss": 41.6908, "step": 7784 }, { "epoch": 185.3582089552239, "grad_norm": 37.429107666015625, "learning_rate": 9.215538847117795e-06, "loss": 42.5027, "step": 7785 }, { "epoch": 185.3820895522388, "grad_norm": 42.99324035644531, "learning_rate": 9.214285714285715e-06, "loss": 41.92, "step": 7786 }, { "epoch": 185.40597014925373, "grad_norm": 33.43999099731445, "learning_rate": 9.213032581453634e-06, "loss": 40.9467, "step": 7787 }, { "epoch": 185.42985074626867, "grad_norm": 49.963966369628906, "learning_rate": 9.211779448621556e-06, "loss": 42.4665, "step": 7788 }, { "epoch": 185.45373134328358, "grad_norm": 34.8093147277832, "learning_rate": 9.210526315789474e-06, "loss": 41.5139, "step": 7789 }, { "epoch": 185.47761194029852, "grad_norm": 34.77069854736328, "learning_rate": 9.209273182957395e-06, "loss": 41.5734, "step": 7790 }, { "epoch": 185.50149253731342, "grad_norm": 47.45319747924805, "learning_rate": 9.208020050125313e-06, "loss": 41.3032, "step": 7791 }, { "epoch": 185.52537313432836, "grad_norm": 24.358047485351562, "learning_rate": 9.206766917293234e-06, "loss": 41.1637, "step": 7792 }, { "epoch": 185.54925373134327, "grad_norm": 46.77471923828125, "learning_rate": 9.205513784461154e-06, "loss": 44.595, "step": 7793 }, { "epoch": 185.5731343283582, "grad_norm": 36.80247116088867, "learning_rate": 9.204260651629073e-06, "loss": 42.6577, "step": 7794 }, { "epoch": 185.59701492537314, "grad_norm": 30.75225830078125, "learning_rate": 9.203007518796993e-06, "loss": 41.0479, "step": 7795 }, { "epoch": 185.62089552238805, "grad_norm": 55.35914611816406, "learning_rate": 9.201754385964913e-06, "loss": 43.4212, "step": 7796 }, { "epoch": 185.644776119403, "grad_norm": 34.891109466552734, "learning_rate": 9.200501253132834e-06, "loss": 42.6526, "step": 7797 }, { "epoch": 185.6686567164179, "grad_norm": 60.54024887084961, "learning_rate": 9.199248120300752e-06, "loss": 43.1204, "step": 7798 }, { "epoch": 185.69253731343284, "grad_norm": 48.87995529174805, "learning_rate": 9.197994987468673e-06, "loss": 43.8232, "step": 7799 }, { "epoch": 185.71641791044777, "grad_norm": 43.41633224487305, "learning_rate": 9.196741854636593e-06, "loss": 40.743, "step": 7800 }, { "epoch": 185.74029850746268, "grad_norm": 43.59025955200195, "learning_rate": 9.195488721804512e-06, "loss": 42.746, "step": 7801 }, { "epoch": 185.76417910447762, "grad_norm": 45.43309020996094, "learning_rate": 9.194235588972432e-06, "loss": 42.921, "step": 7802 }, { "epoch": 185.78805970149253, "grad_norm": 28.127649307250977, "learning_rate": 9.192982456140351e-06, "loss": 42.1366, "step": 7803 }, { "epoch": 185.81194029850747, "grad_norm": 61.12681579589844, "learning_rate": 9.191729323308271e-06, "loss": 42.4191, "step": 7804 }, { "epoch": 185.83582089552237, "grad_norm": 48.988365173339844, "learning_rate": 9.19047619047619e-06, "loss": 43.5104, "step": 7805 }, { "epoch": 185.8597014925373, "grad_norm": 56.43540954589844, "learning_rate": 9.18922305764411e-06, "loss": 43.6525, "step": 7806 }, { "epoch": 185.88358208955225, "grad_norm": 46.069435119628906, "learning_rate": 9.187969924812032e-06, "loss": 43.4069, "step": 7807 }, { "epoch": 185.90746268656716, "grad_norm": 39.751705169677734, "learning_rate": 9.186716791979951e-06, "loss": 43.334, "step": 7808 }, { "epoch": 185.9313432835821, "grad_norm": 46.80768966674805, "learning_rate": 9.18546365914787e-06, "loss": 41.914, "step": 7809 }, { "epoch": 185.955223880597, "grad_norm": 38.70549392700195, "learning_rate": 9.18421052631579e-06, "loss": 43.7965, "step": 7810 }, { "epoch": 185.97910447761194, "grad_norm": 33.33662414550781, "learning_rate": 9.18295739348371e-06, "loss": 42.2128, "step": 7811 }, { "epoch": 186.0, "grad_norm": 42.501155853271484, "learning_rate": 9.18170426065163e-06, "loss": 36.0084, "step": 7812 }, { "epoch": 186.02388059701494, "grad_norm": 40.92805862426758, "learning_rate": 9.180451127819549e-06, "loss": 43.0312, "step": 7813 }, { "epoch": 186.04776119402985, "grad_norm": 34.84563446044922, "learning_rate": 9.17919799498747e-06, "loss": 42.8822, "step": 7814 }, { "epoch": 186.07164179104478, "grad_norm": 27.465234756469727, "learning_rate": 9.177944862155388e-06, "loss": 43.2051, "step": 7815 }, { "epoch": 186.0955223880597, "grad_norm": 36.75338363647461, "learning_rate": 9.17669172932331e-06, "loss": 41.1784, "step": 7816 }, { "epoch": 186.11940298507463, "grad_norm": 47.202701568603516, "learning_rate": 9.17543859649123e-06, "loss": 42.8061, "step": 7817 }, { "epoch": 186.14328358208957, "grad_norm": 35.21329116821289, "learning_rate": 9.174185463659149e-06, "loss": 43.1316, "step": 7818 }, { "epoch": 186.16716417910447, "grad_norm": 27.600418090820312, "learning_rate": 9.172932330827068e-06, "loss": 42.7668, "step": 7819 }, { "epoch": 186.1910447761194, "grad_norm": 46.3722038269043, "learning_rate": 9.171679197994988e-06, "loss": 44.4291, "step": 7820 }, { "epoch": 186.21492537313432, "grad_norm": 37.76526641845703, "learning_rate": 9.170426065162908e-06, "loss": 42.964, "step": 7821 }, { "epoch": 186.23880597014926, "grad_norm": 27.865131378173828, "learning_rate": 9.169172932330827e-06, "loss": 42.4656, "step": 7822 }, { "epoch": 186.26268656716417, "grad_norm": 31.589683532714844, "learning_rate": 9.167919799498749e-06, "loss": 42.4538, "step": 7823 }, { "epoch": 186.2865671641791, "grad_norm": 41.379058837890625, "learning_rate": 9.166666666666666e-06, "loss": 43.2337, "step": 7824 }, { "epoch": 186.31044776119404, "grad_norm": 35.93637466430664, "learning_rate": 9.165413533834588e-06, "loss": 41.7727, "step": 7825 }, { "epoch": 186.33432835820895, "grad_norm": 29.648672103881836, "learning_rate": 9.164160401002507e-06, "loss": 43.7275, "step": 7826 }, { "epoch": 186.3582089552239, "grad_norm": 22.539348602294922, "learning_rate": 9.162907268170427e-06, "loss": 43.0448, "step": 7827 }, { "epoch": 186.3820895522388, "grad_norm": 31.496742248535156, "learning_rate": 9.161654135338347e-06, "loss": 43.3235, "step": 7828 }, { "epoch": 186.40597014925373, "grad_norm": 27.494714736938477, "learning_rate": 9.160401002506266e-06, "loss": 42.2111, "step": 7829 }, { "epoch": 186.42985074626867, "grad_norm": 39.6995735168457, "learning_rate": 9.159147869674186e-06, "loss": 43.8583, "step": 7830 }, { "epoch": 186.45373134328358, "grad_norm": 44.616390228271484, "learning_rate": 9.157894736842105e-06, "loss": 43.1443, "step": 7831 }, { "epoch": 186.47761194029852, "grad_norm": 37.15000534057617, "learning_rate": 9.156641604010027e-06, "loss": 43.7389, "step": 7832 }, { "epoch": 186.50149253731342, "grad_norm": 32.24622344970703, "learning_rate": 9.155388471177946e-06, "loss": 42.566, "step": 7833 }, { "epoch": 186.52537313432836, "grad_norm": 28.09488868713379, "learning_rate": 9.154135338345866e-06, "loss": 43.3224, "step": 7834 }, { "epoch": 186.54925373134327, "grad_norm": 43.34132385253906, "learning_rate": 9.152882205513785e-06, "loss": 42.2653, "step": 7835 }, { "epoch": 186.5731343283582, "grad_norm": 37.8883056640625, "learning_rate": 9.151629072681705e-06, "loss": 42.4098, "step": 7836 }, { "epoch": 186.59701492537314, "grad_norm": 26.457744598388672, "learning_rate": 9.150375939849625e-06, "loss": 44.2239, "step": 7837 }, { "epoch": 186.62089552238805, "grad_norm": 23.77232551574707, "learning_rate": 9.149122807017544e-06, "loss": 43.7133, "step": 7838 }, { "epoch": 186.644776119403, "grad_norm": 32.34585189819336, "learning_rate": 9.147869674185464e-06, "loss": 43.1097, "step": 7839 }, { "epoch": 186.6686567164179, "grad_norm": 40.74631881713867, "learning_rate": 9.146616541353385e-06, "loss": 43.3843, "step": 7840 }, { "epoch": 186.69253731343284, "grad_norm": 31.526451110839844, "learning_rate": 9.145363408521305e-06, "loss": 42.1462, "step": 7841 }, { "epoch": 186.71641791044777, "grad_norm": 29.849029541015625, "learning_rate": 9.144110275689224e-06, "loss": 41.4517, "step": 7842 }, { "epoch": 186.74029850746268, "grad_norm": 46.14763641357422, "learning_rate": 9.142857142857144e-06, "loss": 43.6828, "step": 7843 }, { "epoch": 186.76417910447762, "grad_norm": 29.060964584350586, "learning_rate": 9.141604010025063e-06, "loss": 42.8401, "step": 7844 }, { "epoch": 186.78805970149253, "grad_norm": 29.804529190063477, "learning_rate": 9.140350877192983e-06, "loss": 42.2589, "step": 7845 }, { "epoch": 186.81194029850747, "grad_norm": 24.214675903320312, "learning_rate": 9.139097744360903e-06, "loss": 44.5865, "step": 7846 }, { "epoch": 186.83582089552237, "grad_norm": 40.95576858520508, "learning_rate": 9.137844611528824e-06, "loss": 43.1832, "step": 7847 }, { "epoch": 186.8597014925373, "grad_norm": 45.6334228515625, "learning_rate": 9.136591478696742e-06, "loss": 42.2239, "step": 7848 }, { "epoch": 186.88358208955225, "grad_norm": 22.144073486328125, "learning_rate": 9.135338345864663e-06, "loss": 42.834, "step": 7849 }, { "epoch": 186.90746268656716, "grad_norm": 41.78306579589844, "learning_rate": 9.134085213032581e-06, "loss": 41.8102, "step": 7850 }, { "epoch": 186.9313432835821, "grad_norm": 60.63203811645508, "learning_rate": 9.132832080200502e-06, "loss": 41.8373, "step": 7851 }, { "epoch": 186.955223880597, "grad_norm": 32.649009704589844, "learning_rate": 9.131578947368422e-06, "loss": 42.7243, "step": 7852 }, { "epoch": 186.97910447761194, "grad_norm": 60.176368713378906, "learning_rate": 9.130325814536342e-06, "loss": 43.7076, "step": 7853 }, { "epoch": 187.0, "grad_norm": 34.826778411865234, "learning_rate": 9.129072681704261e-06, "loss": 36.2024, "step": 7854 }, { "epoch": 187.02388059701494, "grad_norm": 77.55919647216797, "learning_rate": 9.12781954887218e-06, "loss": 42.4281, "step": 7855 }, { "epoch": 187.04776119402985, "grad_norm": 38.300228118896484, "learning_rate": 9.126566416040102e-06, "loss": 42.0331, "step": 7856 }, { "epoch": 187.07164179104478, "grad_norm": 94.77706146240234, "learning_rate": 9.12531328320802e-06, "loss": 42.6255, "step": 7857 }, { "epoch": 187.0955223880597, "grad_norm": 75.2891616821289, "learning_rate": 9.124060150375941e-06, "loss": 42.5707, "step": 7858 }, { "epoch": 187.11940298507463, "grad_norm": 69.42463684082031, "learning_rate": 9.12280701754386e-06, "loss": 43.4412, "step": 7859 }, { "epoch": 187.14328358208957, "grad_norm": 71.69522857666016, "learning_rate": 9.12155388471178e-06, "loss": 42.0621, "step": 7860 }, { "epoch": 187.16716417910447, "grad_norm": 60.53205490112305, "learning_rate": 9.1203007518797e-06, "loss": 42.2368, "step": 7861 }, { "epoch": 187.1910447761194, "grad_norm": 62.059078216552734, "learning_rate": 9.11904761904762e-06, "loss": 43.9762, "step": 7862 }, { "epoch": 187.21492537313432, "grad_norm": 60.76016616821289, "learning_rate": 9.11779448621554e-06, "loss": 42.2956, "step": 7863 }, { "epoch": 187.23880597014926, "grad_norm": NaN, "learning_rate": 9.116541353383459e-06, "loss": 59.8308, "step": 7864 }, { "epoch": 187.26268656716417, "grad_norm": 117.1172103881836, "learning_rate": 9.116541353383459e-06, "loss": 42.8994, "step": 7865 }, { "epoch": 187.2865671641791, "grad_norm": 36.338539123535156, "learning_rate": 9.115288220551378e-06, "loss": 44.6887, "step": 7866 }, { "epoch": 187.31044776119404, "grad_norm": 117.24983978271484, "learning_rate": 9.114035087719298e-06, "loss": 45.3314, "step": 7867 }, { "epoch": 187.33432835820895, "grad_norm": 105.75101470947266, "learning_rate": 9.11278195488722e-06, "loss": 47.1684, "step": 7868 }, { "epoch": 187.3582089552239, "grad_norm": 94.0297622680664, "learning_rate": 9.111528822055139e-06, "loss": 46.3184, "step": 7869 }, { "epoch": 187.3820895522388, "grad_norm": 128.11460876464844, "learning_rate": 9.110275689223059e-06, "loss": 45.4642, "step": 7870 }, { "epoch": 187.40597014925373, "grad_norm": 68.23126220703125, "learning_rate": 9.109022556390978e-06, "loss": 46.0183, "step": 7871 }, { "epoch": 187.42985074626867, "grad_norm": 127.36836242675781, "learning_rate": 9.107769423558898e-06, "loss": 48.2838, "step": 7872 }, { "epoch": 187.45373134328358, "grad_norm": 94.4326171875, "learning_rate": 9.106516290726817e-06, "loss": 45.5114, "step": 7873 }, { "epoch": 187.47761194029852, "grad_norm": 80.82902526855469, "learning_rate": 9.105263157894739e-06, "loss": 45.2363, "step": 7874 }, { "epoch": 187.50149253731342, "grad_norm": 88.6158447265625, "learning_rate": 9.104010025062657e-06, "loss": 45.7324, "step": 7875 }, { "epoch": 187.52537313432836, "grad_norm": 71.13653564453125, "learning_rate": 9.102756892230578e-06, "loss": 44.9489, "step": 7876 }, { "epoch": 187.54925373134327, "grad_norm": 86.65029907226562, "learning_rate": 9.101503759398497e-06, "loss": 47.3925, "step": 7877 }, { "epoch": 187.5731343283582, "grad_norm": 74.06425476074219, "learning_rate": 9.100250626566417e-06, "loss": 47.0441, "step": 7878 }, { "epoch": 187.59701492537314, "grad_norm": 73.37405395507812, "learning_rate": 9.098997493734337e-06, "loss": 46.7387, "step": 7879 }, { "epoch": 187.62089552238805, "grad_norm": 65.9063491821289, "learning_rate": 9.097744360902256e-06, "loss": 46.0156, "step": 7880 }, { "epoch": 187.644776119403, "grad_norm": 64.36770629882812, "learning_rate": 9.096491228070178e-06, "loss": 47.2289, "step": 7881 }, { "epoch": 187.6686567164179, "grad_norm": 77.95767974853516, "learning_rate": 9.095238095238095e-06, "loss": 46.719, "step": 7882 }, { "epoch": 187.69253731343284, "grad_norm": 63.57596969604492, "learning_rate": 9.093984962406017e-06, "loss": 47.6364, "step": 7883 }, { "epoch": 187.71641791044777, "grad_norm": 70.62786102294922, "learning_rate": 9.092731829573935e-06, "loss": 45.9877, "step": 7884 }, { "epoch": 187.74029850746268, "grad_norm": 66.49168395996094, "learning_rate": 9.091478696741856e-06, "loss": 46.4989, "step": 7885 }, { "epoch": 187.76417910447762, "grad_norm": 67.90526580810547, "learning_rate": 9.090225563909776e-06, "loss": 46.4051, "step": 7886 }, { "epoch": 187.78805970149253, "grad_norm": 66.38755798339844, "learning_rate": 9.088972431077695e-06, "loss": 48.2885, "step": 7887 }, { "epoch": 187.81194029850747, "grad_norm": 45.47574234008789, "learning_rate": 9.087719298245615e-06, "loss": 48.894, "step": 7888 }, { "epoch": 187.83582089552237, "grad_norm": 99.2913589477539, "learning_rate": 9.086466165413534e-06, "loss": 44.985, "step": 7889 }, { "epoch": 187.8597014925373, "grad_norm": 69.80074310302734, "learning_rate": 9.085213032581454e-06, "loss": 46.3278, "step": 7890 }, { "epoch": 187.88358208955225, "grad_norm": 112.97005462646484, "learning_rate": 9.083959899749374e-06, "loss": 47.703, "step": 7891 }, { "epoch": 187.90746268656716, "grad_norm": 107.71286010742188, "learning_rate": 9.082706766917295e-06, "loss": 48.1937, "step": 7892 }, { "epoch": 187.9313432835821, "grad_norm": 89.39302825927734, "learning_rate": 9.081453634085213e-06, "loss": 45.5158, "step": 7893 }, { "epoch": 187.955223880597, "grad_norm": 92.1109848022461, "learning_rate": 9.080200501253134e-06, "loss": 46.6717, "step": 7894 }, { "epoch": 187.97910447761194, "grad_norm": 102.06716918945312, "learning_rate": 9.078947368421054e-06, "loss": 47.8201, "step": 7895 }, { "epoch": 188.0, "grad_norm": 93.22721862792969, "learning_rate": 9.077694235588973e-06, "loss": 42.6335, "step": 7896 }, { "epoch": 188.02388059701494, "grad_norm": 88.64990234375, "learning_rate": 9.076441102756893e-06, "loss": 46.5677, "step": 7897 }, { "epoch": 188.04776119402985, "grad_norm": 75.4064712524414, "learning_rate": 9.075187969924812e-06, "loss": 45.7132, "step": 7898 }, { "epoch": 188.07164179104478, "grad_norm": 90.9974594116211, "learning_rate": 9.073934837092732e-06, "loss": 48.1919, "step": 7899 }, { "epoch": 188.0955223880597, "grad_norm": 73.4759521484375, "learning_rate": 9.072681704260652e-06, "loss": 46.6734, "step": 7900 }, { "epoch": 188.11940298507463, "grad_norm": 84.57060241699219, "learning_rate": 9.071428571428573e-06, "loss": 44.9733, "step": 7901 }, { "epoch": 188.14328358208957, "grad_norm": 83.07115936279297, "learning_rate": 9.070175438596493e-06, "loss": 46.51, "step": 7902 }, { "epoch": 188.16716417910447, "grad_norm": 102.78768920898438, "learning_rate": 9.068922305764412e-06, "loss": 48.7774, "step": 7903 }, { "epoch": 188.1910447761194, "grad_norm": 85.22908020019531, "learning_rate": 9.067669172932332e-06, "loss": 47.7442, "step": 7904 }, { "epoch": 188.21492537313432, "grad_norm": 94.43974304199219, "learning_rate": 9.066416040100251e-06, "loss": 45.0615, "step": 7905 }, { "epoch": 188.23880597014926, "grad_norm": 76.01085662841797, "learning_rate": 9.065162907268171e-06, "loss": 45.716, "step": 7906 }, { "epoch": 188.26268656716417, "grad_norm": 114.36836242675781, "learning_rate": 9.06390977443609e-06, "loss": 50.8167, "step": 7907 }, { "epoch": 188.2865671641791, "grad_norm": 103.42585754394531, "learning_rate": 9.06265664160401e-06, "loss": 47.1027, "step": 7908 }, { "epoch": 188.31044776119404, "grad_norm": NaN, "learning_rate": 9.061403508771931e-06, "loss": 63.108, "step": 7909 }, { "epoch": 188.33432835820895, "grad_norm": 164.3147735595703, "learning_rate": 9.061403508771931e-06, "loss": 45.6579, "step": 7910 }, { "epoch": 188.3582089552239, "grad_norm": 169.74057006835938, "learning_rate": 9.06015037593985e-06, "loss": 47.956, "step": 7911 }, { "epoch": 188.3820895522388, "grad_norm": 40.49880599975586, "learning_rate": 9.05889724310777e-06, "loss": 47.9248, "step": 7912 }, { "epoch": 188.40597014925373, "grad_norm": 126.68148803710938, "learning_rate": 9.05764411027569e-06, "loss": 51.7, "step": 7913 }, { "epoch": 188.42985074626867, "grad_norm": 78.71920013427734, "learning_rate": 9.05639097744361e-06, "loss": 51.6456, "step": 7914 }, { "epoch": 188.45373134328358, "grad_norm": 148.0843963623047, "learning_rate": 9.05513784461153e-06, "loss": 49.7085, "step": 7915 }, { "epoch": 188.47761194029852, "grad_norm": 133.39694213867188, "learning_rate": 9.053884711779449e-06, "loss": 50.2062, "step": 7916 }, { "epoch": 188.50149253731342, "grad_norm": 101.5084457397461, "learning_rate": 9.05263157894737e-06, "loss": 50.2694, "step": 7917 }, { "epoch": 188.52537313432836, "grad_norm": 114.27274322509766, "learning_rate": 9.051378446115288e-06, "loss": 52.0106, "step": 7918 }, { "epoch": 188.54925373134327, "grad_norm": 76.4062728881836, "learning_rate": 9.05012531328321e-06, "loss": 49.0237, "step": 7919 }, { "epoch": 188.5731343283582, "grad_norm": 122.54005432128906, "learning_rate": 9.048872180451127e-06, "loss": 51.1967, "step": 7920 }, { "epoch": 188.59701492537314, "grad_norm": 78.65968322753906, "learning_rate": 9.047619047619049e-06, "loss": 49.6068, "step": 7921 }, { "epoch": 188.62089552238805, "grad_norm": 107.99015045166016, "learning_rate": 9.046365914786968e-06, "loss": 52.1905, "step": 7922 }, { "epoch": 188.644776119403, "grad_norm": 67.97981262207031, "learning_rate": 9.045112781954888e-06, "loss": 52.1235, "step": 7923 }, { "epoch": 188.6686567164179, "grad_norm": 61.18889617919922, "learning_rate": 9.043859649122807e-06, "loss": 52.9808, "step": 7924 }, { "epoch": 188.69253731343284, "grad_norm": 74.68904876708984, "learning_rate": 9.042606516290727e-06, "loss": 50.4739, "step": 7925 }, { "epoch": 188.71641791044777, "grad_norm": 64.90814971923828, "learning_rate": 9.041353383458648e-06, "loss": 52.8017, "step": 7926 }, { "epoch": 188.74029850746268, "grad_norm": 90.75438690185547, "learning_rate": 9.040100250626566e-06, "loss": 52.6495, "step": 7927 }, { "epoch": 188.76417910447762, "grad_norm": 70.7194595336914, "learning_rate": 9.038847117794488e-06, "loss": 51.2997, "step": 7928 }, { "epoch": 188.78805970149253, "grad_norm": 68.57101440429688, "learning_rate": 9.037593984962407e-06, "loss": 53.6467, "step": 7929 }, { "epoch": 188.81194029850747, "grad_norm": 70.22977447509766, "learning_rate": 9.036340852130327e-06, "loss": 50.1726, "step": 7930 }, { "epoch": 188.83582089552237, "grad_norm": 45.77416229248047, "learning_rate": 9.035087719298246e-06, "loss": 50.1196, "step": 7931 }, { "epoch": 188.8597014925373, "grad_norm": 84.61479187011719, "learning_rate": 9.033834586466166e-06, "loss": 55.4963, "step": 7932 }, { "epoch": 188.88358208955225, "grad_norm": 68.9653091430664, "learning_rate": 9.032581453634086e-06, "loss": 53.8675, "step": 7933 }, { "epoch": 188.90746268656716, "grad_norm": 70.38067626953125, "learning_rate": 9.031328320802005e-06, "loss": 50.8352, "step": 7934 }, { "epoch": 188.9313432835821, "grad_norm": 92.68718719482422, "learning_rate": 9.030075187969925e-06, "loss": 50.6091, "step": 7935 }, { "epoch": 188.955223880597, "grad_norm": 64.3499984741211, "learning_rate": 9.028822055137846e-06, "loss": 54.2168, "step": 7936 }, { "epoch": 188.97910447761194, "grad_norm": 144.99208068847656, "learning_rate": 9.027568922305766e-06, "loss": 52.3189, "step": 7937 }, { "epoch": 189.0, "grad_norm": 94.81573486328125, "learning_rate": 9.026315789473685e-06, "loss": 45.4228, "step": 7938 }, { "epoch": 189.02388059701494, "grad_norm": 82.54339599609375, "learning_rate": 9.025062656641605e-06, "loss": 51.6774, "step": 7939 }, { "epoch": 189.04776119402985, "grad_norm": 89.21769714355469, "learning_rate": 9.023809523809524e-06, "loss": 51.129, "step": 7940 }, { "epoch": 189.07164179104478, "grad_norm": 99.30877685546875, "learning_rate": 9.022556390977444e-06, "loss": 53.2338, "step": 7941 }, { "epoch": 189.0955223880597, "grad_norm": 80.18331909179688, "learning_rate": 9.021303258145364e-06, "loss": 51.5768, "step": 7942 }, { "epoch": 189.11940298507463, "grad_norm": 104.38993835449219, "learning_rate": 9.020050125313285e-06, "loss": 52.6943, "step": 7943 }, { "epoch": 189.14328358208957, "grad_norm": 65.29129791259766, "learning_rate": 9.018796992481203e-06, "loss": 52.3297, "step": 7944 }, { "epoch": 189.16716417910447, "grad_norm": 157.73797607421875, "learning_rate": 9.017543859649124e-06, "loss": 52.3911, "step": 7945 }, { "epoch": 189.1910447761194, "grad_norm": 123.55492401123047, "learning_rate": 9.016290726817044e-06, "loss": 51.8406, "step": 7946 }, { "epoch": 189.21492537313432, "grad_norm": 92.67152404785156, "learning_rate": 9.015037593984963e-06, "loss": 53.1862, "step": 7947 }, { "epoch": 189.23880597014926, "grad_norm": 89.75629425048828, "learning_rate": 9.013784461152883e-06, "loss": 53.6459, "step": 7948 }, { "epoch": 189.26268656716417, "grad_norm": 104.82584381103516, "learning_rate": 9.012531328320803e-06, "loss": 51.5277, "step": 7949 }, { "epoch": 189.2865671641791, "grad_norm": 84.31354522705078, "learning_rate": 9.011278195488722e-06, "loss": 51.0429, "step": 7950 }, { "epoch": 189.31044776119404, "grad_norm": 129.83505249023438, "learning_rate": 9.010025062656642e-06, "loss": 51.312, "step": 7951 }, { "epoch": 189.33432835820895, "grad_norm": 111.79363250732422, "learning_rate": 9.008771929824563e-06, "loss": 52.9559, "step": 7952 }, { "epoch": 189.3582089552239, "grad_norm": 102.4739990234375, "learning_rate": 9.007518796992481e-06, "loss": 53.0077, "step": 7953 }, { "epoch": 189.3820895522388, "grad_norm": 102.056640625, "learning_rate": 9.006265664160402e-06, "loss": 51.0904, "step": 7954 }, { "epoch": 189.40597014925373, "grad_norm": 112.38508605957031, "learning_rate": 9.005012531328322e-06, "loss": 56.6613, "step": 7955 }, { "epoch": 189.42985074626867, "grad_norm": 96.45929718017578, "learning_rate": 9.003759398496241e-06, "loss": 53.1833, "step": 7956 }, { "epoch": 189.45373134328358, "grad_norm": 108.03256225585938, "learning_rate": 9.002506265664161e-06, "loss": 52.3652, "step": 7957 }, { "epoch": 189.47761194029852, "grad_norm": 99.21728515625, "learning_rate": 9.00125313283208e-06, "loss": 52.947, "step": 7958 }, { "epoch": 189.50149253731342, "grad_norm": 97.00907897949219, "learning_rate": 9e-06, "loss": 53.861, "step": 7959 }, { "epoch": 189.52537313432836, "grad_norm": 88.09246063232422, "learning_rate": 8.99874686716792e-06, "loss": 51.9903, "step": 7960 }, { "epoch": 189.54925373134327, "grad_norm": 144.51100158691406, "learning_rate": 8.997493734335841e-06, "loss": 51.6353, "step": 7961 }, { "epoch": 189.5731343283582, "grad_norm": 137.4646453857422, "learning_rate": 8.99624060150376e-06, "loss": 52.1263, "step": 7962 }, { "epoch": 189.59701492537314, "grad_norm": 82.87004852294922, "learning_rate": 8.99498746867168e-06, "loss": 53.4347, "step": 7963 }, { "epoch": 189.62089552238805, "grad_norm": 73.51374053955078, "learning_rate": 8.9937343358396e-06, "loss": 54.3519, "step": 7964 }, { "epoch": 189.644776119403, "grad_norm": 124.1756362915039, "learning_rate": 8.99248120300752e-06, "loss": 51.7761, "step": 7965 }, { "epoch": 189.6686567164179, "grad_norm": 103.279052734375, "learning_rate": 8.991228070175439e-06, "loss": 52.0611, "step": 7966 }, { "epoch": 189.69253731343284, "grad_norm": 114.74515533447266, "learning_rate": 8.989974937343359e-06, "loss": 52.4413, "step": 7967 }, { "epoch": 189.71641791044777, "grad_norm": 117.58612823486328, "learning_rate": 8.988721804511278e-06, "loss": 52.8869, "step": 7968 }, { "epoch": 189.74029850746268, "grad_norm": 97.4476089477539, "learning_rate": 8.9874686716792e-06, "loss": 54.2015, "step": 7969 }, { "epoch": 189.76417910447762, "grad_norm": 84.25241088867188, "learning_rate": 8.98621553884712e-06, "loss": 50.7358, "step": 7970 }, { "epoch": 189.78805970149253, "grad_norm": 120.82125854492188, "learning_rate": 8.984962406015039e-06, "loss": 55.0902, "step": 7971 }, { "epoch": 189.81194029850747, "grad_norm": 109.24608612060547, "learning_rate": 8.983709273182958e-06, "loss": 52.6568, "step": 7972 }, { "epoch": 189.83582089552237, "grad_norm": 94.20904541015625, "learning_rate": 8.982456140350878e-06, "loss": 51.7781, "step": 7973 }, { "epoch": 189.8597014925373, "grad_norm": 90.83358764648438, "learning_rate": 8.981203007518798e-06, "loss": 51.6348, "step": 7974 }, { "epoch": 189.88358208955225, "grad_norm": 103.75609588623047, "learning_rate": 8.979949874686717e-06, "loss": 52.4758, "step": 7975 }, { "epoch": 189.90746268656716, "grad_norm": 81.22749328613281, "learning_rate": 8.978696741854638e-06, "loss": 53.4991, "step": 7976 }, { "epoch": 189.9313432835821, "grad_norm": 122.41837310791016, "learning_rate": 8.977443609022556e-06, "loss": 53.3596, "step": 7977 }, { "epoch": 189.955223880597, "grad_norm": 93.01902770996094, "learning_rate": 8.976190476190478e-06, "loss": 53.743, "step": 7978 }, { "epoch": 189.97910447761194, "grad_norm": 110.40689086914062, "learning_rate": 8.974937343358396e-06, "loss": 54.3228, "step": 7979 }, { "epoch": 190.0, "grad_norm": 109.52339172363281, "learning_rate": 8.973684210526317e-06, "loss": 47.9354, "step": 7980 }, { "epoch": 190.0, "step": 7980, "total_flos": 3.92332713507634e+17, "train_loss": 4.278731836591448, "train_runtime": 25654.6059, "train_samples_per_second": 39.637, "train_steps_per_second": 0.311 }, { "epoch": 190.02388059701494, "grad_norm": Infinity, "learning_rate": 1e-05, "loss": 52.4803, "step": 7981 }, { "epoch": 190.04776119402985, "grad_norm": Infinity, "learning_rate": 1e-05, "loss": 52.896, "step": 7982 }, { "epoch": 190.07164179104478, "grad_norm": 441.7268371582031, "learning_rate": 1e-05, "loss": 51.0126, "step": 7983 }, { "epoch": 190.0955223880597, "grad_norm": 493.6844482421875, "learning_rate": 9.998809523809524e-06, "loss": 55.3637, "step": 7984 }, { "epoch": 190.11940298507463, "grad_norm": 251.88214111328125, "learning_rate": 9.997619047619048e-06, "loss": 48.297, "step": 7985 }, { "epoch": 190.14328358208957, "grad_norm": 171.21153259277344, "learning_rate": 9.996428571428572e-06, "loss": 43.8059, "step": 7986 }, { "epoch": 190.16716417910447, "grad_norm": 102.77701568603516, "learning_rate": 9.995238095238095e-06, "loss": 43.0885, "step": 7987 }, { "epoch": 190.1910447761194, "grad_norm": 99.9967041015625, "learning_rate": 9.99404761904762e-06, "loss": 43.7896, "step": 7988 }, { "epoch": 190.21492537313432, "grad_norm": 66.79180145263672, "learning_rate": 9.992857142857144e-06, "loss": 40.7604, "step": 7989 }, { "epoch": 190.23880597014926, "grad_norm": 49.20097732543945, "learning_rate": 9.991666666666668e-06, "loss": 43.0224, "step": 7990 }, { "epoch": 190.26268656716417, "grad_norm": 66.32890319824219, "learning_rate": 9.990476190476191e-06, "loss": 40.7674, "step": 7991 }, { "epoch": 190.2865671641791, "grad_norm": 44.98344421386719, "learning_rate": 9.989285714285715e-06, "loss": 41.5453, "step": 7992 }, { "epoch": 190.31044776119404, "grad_norm": 32.713680267333984, "learning_rate": 9.988095238095239e-06, "loss": 40.4575, "step": 7993 }, { "epoch": 190.33432835820895, "grad_norm": 28.200599670410156, "learning_rate": 9.986904761904764e-06, "loss": 40.2662, "step": 7994 }, { "epoch": 190.3582089552239, "grad_norm": 26.295700073242188, "learning_rate": 9.985714285714286e-06, "loss": 41.0945, "step": 7995 }, { "epoch": 190.3820895522388, "grad_norm": 23.81634521484375, "learning_rate": 9.984523809523811e-06, "loss": 39.8172, "step": 7996 }, { "epoch": 190.40597014925373, "grad_norm": 23.616661071777344, "learning_rate": 9.983333333333333e-06, "loss": 39.8794, "step": 7997 }, { "epoch": 190.42985074626867, "grad_norm": 25.093244552612305, "learning_rate": 9.982142857142858e-06, "loss": 40.4312, "step": 7998 }, { "epoch": 190.45373134328358, "grad_norm": 20.444486618041992, "learning_rate": 9.980952380952382e-06, "loss": 40.2981, "step": 7999 }, { "epoch": 190.47761194029852, "grad_norm": 32.319217681884766, "learning_rate": 9.979761904761906e-06, "loss": 40.1029, "step": 8000 }, { "epoch": 190.50149253731342, "grad_norm": 27.1708984375, "learning_rate": 9.97857142857143e-06, "loss": 40.2249, "step": 8001 }, { "epoch": 190.52537313432836, "grad_norm": NaN, "learning_rate": 9.977380952380953e-06, "loss": 37.4824, "step": 8002 }, { "epoch": 190.54925373134327, "grad_norm": 17.54867935180664, "learning_rate": 9.977380952380953e-06, "loss": 38.6291, "step": 8003 }, { "epoch": 190.5731343283582, "grad_norm": 16.21549415588379, "learning_rate": 9.976190476190477e-06, "loss": 38.1713, "step": 8004 }, { "epoch": 190.59701492537314, "grad_norm": 18.15376853942871, "learning_rate": 9.975000000000002e-06, "loss": 38.2825, "step": 8005 }, { "epoch": 190.62089552238805, "grad_norm": 23.3570613861084, "learning_rate": 9.973809523809524e-06, "loss": 39.1566, "step": 8006 }, { "epoch": 190.644776119403, "grad_norm": 26.723541259765625, "learning_rate": 9.972619047619049e-06, "loss": 39.1849, "step": 8007 }, { "epoch": 190.6686567164179, "grad_norm": 16.985681533813477, "learning_rate": 9.971428571428571e-06, "loss": 37.7583, "step": 8008 }, { "epoch": 190.69253731343284, "grad_norm": 30.02682876586914, "learning_rate": 9.970238095238096e-06, "loss": 40.4904, "step": 8009 }, { "epoch": 190.71641791044777, "grad_norm": 20.605270385742188, "learning_rate": 9.96904761904762e-06, "loss": 38.376, "step": 8010 }, { "epoch": 190.74029850746268, "grad_norm": 16.961578369140625, "learning_rate": 9.967857142857144e-06, "loss": 39.2046, "step": 8011 }, { "epoch": 190.76417910447762, "grad_norm": 17.805517196655273, "learning_rate": 9.966666666666667e-06, "loss": 40.0389, "step": 8012 }, { "epoch": 190.78805970149253, "grad_norm": 19.87175750732422, "learning_rate": 9.965476190476191e-06, "loss": 40.2082, "step": 8013 }, { "epoch": 190.81194029850747, "grad_norm": 14.827139854431152, "learning_rate": 9.964285714285714e-06, "loss": 38.3013, "step": 8014 }, { "epoch": 190.83582089552237, "grad_norm": 21.773862838745117, "learning_rate": 9.963095238095238e-06, "loss": 38.1243, "step": 8015 }, { "epoch": 190.8597014925373, "grad_norm": 17.08941078186035, "learning_rate": 9.961904761904763e-06, "loss": 38.4414, "step": 8016 }, { "epoch": 190.88358208955225, "grad_norm": 21.989667892456055, "learning_rate": 9.960714285714287e-06, "loss": 37.9022, "step": 8017 }, { "epoch": 190.90746268656716, "grad_norm": 20.80973243713379, "learning_rate": 9.95952380952381e-06, "loss": 39.3492, "step": 8018 }, { "epoch": 190.9313432835821, "grad_norm": 16.177169799804688, "learning_rate": 9.958333333333334e-06, "loss": 37.8216, "step": 8019 }, { "epoch": 190.955223880597, "grad_norm": 15.633136749267578, "learning_rate": 9.957142857142858e-06, "loss": 39.4773, "step": 8020 }, { "epoch": 190.97910447761194, "grad_norm": 16.583450317382812, "learning_rate": 9.955952380952382e-06, "loss": 38.428, "step": 8021 }, { "epoch": 191.0, "grad_norm": 17.35057830810547, "learning_rate": 9.954761904761905e-06, "loss": 32.5048, "step": 8022 }, { "epoch": 191.02388059701494, "grad_norm": 15.571488380432129, "learning_rate": 9.953571428571429e-06, "loss": 38.6245, "step": 8023 }, { "epoch": 191.04776119402985, "grad_norm": 16.987716674804688, "learning_rate": 9.952380952380954e-06, "loss": 38.1887, "step": 8024 }, { "epoch": 191.07164179104478, "grad_norm": 15.907127380371094, "learning_rate": 9.951190476190476e-06, "loss": 38.5188, "step": 8025 }, { "epoch": 191.0955223880597, "grad_norm": 14.632792472839355, "learning_rate": 9.950000000000001e-06, "loss": 39.141, "step": 8026 }, { "epoch": 191.11940298507463, "grad_norm": 17.629247665405273, "learning_rate": 9.948809523809525e-06, "loss": 38.007, "step": 8027 }, { "epoch": 191.14328358208957, "grad_norm": NaN, "learning_rate": 9.947619047619049e-06, "loss": 69.9276, "step": 8028 }, { "epoch": 191.16716417910447, "grad_norm": 18.94240951538086, "learning_rate": 9.947619047619049e-06, "loss": 37.8418, "step": 8029 }, { "epoch": 191.1910447761194, "grad_norm": 18.839948654174805, "learning_rate": 9.946428571428572e-06, "loss": 37.8526, "step": 8030 }, { "epoch": 191.21492537313432, "grad_norm": 14.232295989990234, "learning_rate": 9.945238095238096e-06, "loss": 38.1558, "step": 8031 }, { "epoch": 191.23880597014926, "grad_norm": 15.381818771362305, "learning_rate": 9.94404761904762e-06, "loss": 37.9595, "step": 8032 }, { "epoch": 191.26268656716417, "grad_norm": 17.140670776367188, "learning_rate": 9.942857142857145e-06, "loss": 38.8779, "step": 8033 }, { "epoch": 191.2865671641791, "grad_norm": 18.122289657592773, "learning_rate": 9.941666666666667e-06, "loss": 38.0931, "step": 8034 }, { "epoch": 191.31044776119404, "grad_norm": 14.274928092956543, "learning_rate": 9.940476190476192e-06, "loss": 38.2908, "step": 8035 }, { "epoch": 191.33432835820895, "grad_norm": 18.1863956451416, "learning_rate": 9.939285714285714e-06, "loss": 37.1521, "step": 8036 }, { "epoch": 191.3582089552239, "grad_norm": 24.29243278503418, "learning_rate": 9.93809523809524e-06, "loss": 35.9136, "step": 8037 }, { "epoch": 191.3820895522388, "grad_norm": 18.86484146118164, "learning_rate": 9.936904761904763e-06, "loss": 38.4593, "step": 8038 }, { "epoch": 191.40597014925373, "grad_norm": 13.947208404541016, "learning_rate": 9.935714285714286e-06, "loss": 37.8925, "step": 8039 }, { "epoch": 191.42985074626867, "grad_norm": 23.31012535095215, "learning_rate": 9.93452380952381e-06, "loss": 37.635, "step": 8040 }, { "epoch": 191.45373134328358, "grad_norm": 16.41149139404297, "learning_rate": 9.933333333333334e-06, "loss": 39.8144, "step": 8041 }, { "epoch": 191.47761194029852, "grad_norm": 18.565690994262695, "learning_rate": 9.932142857142857e-06, "loss": 38.132, "step": 8042 }, { "epoch": 191.50149253731342, "grad_norm": 26.473716735839844, "learning_rate": 9.930952380952383e-06, "loss": 38.2176, "step": 8043 }, { "epoch": 191.52537313432836, "grad_norm": 18.78125762939453, "learning_rate": 9.929761904761906e-06, "loss": 38.9379, "step": 8044 }, { "epoch": 191.54925373134327, "grad_norm": 34.09467315673828, "learning_rate": 9.92857142857143e-06, "loss": 38.1285, "step": 8045 }, { "epoch": 191.5731343283582, "grad_norm": 27.37683868408203, "learning_rate": 9.927380952380953e-06, "loss": 38.5859, "step": 8046 }, { "epoch": 191.59701492537314, "grad_norm": 38.51087188720703, "learning_rate": 9.926190476190477e-06, "loss": 38.3501, "step": 8047 }, { "epoch": 191.62089552238805, "grad_norm": NaN, "learning_rate": 9.925e-06, "loss": 44.5791, "step": 8048 }, { "epoch": 191.644776119403, "grad_norm": 29.464855194091797, "learning_rate": 9.925e-06, "loss": 39.7041, "step": 8049 }, { "epoch": 191.6686567164179, "grad_norm": 32.26063919067383, "learning_rate": 9.923809523809524e-06, "loss": 38.6149, "step": 8050 }, { "epoch": 191.69253731343284, "grad_norm": 27.70380401611328, "learning_rate": 9.922619047619048e-06, "loss": 38.1828, "step": 8051 }, { "epoch": 191.71641791044777, "grad_norm": 33.598445892333984, "learning_rate": 9.921428571428572e-06, "loss": 38.5361, "step": 8052 }, { "epoch": 191.74029850746268, "grad_norm": 30.742996215820312, "learning_rate": 9.920238095238097e-06, "loss": 38.2798, "step": 8053 }, { "epoch": 191.76417910447762, "grad_norm": 26.842199325561523, "learning_rate": 9.91904761904762e-06, "loss": 38.3534, "step": 8054 }, { "epoch": 191.78805970149253, "grad_norm": 25.476842880249023, "learning_rate": 9.917857142857144e-06, "loss": 38.7694, "step": 8055 }, { "epoch": 191.81194029850747, "grad_norm": 33.05502700805664, "learning_rate": 9.916666666666668e-06, "loss": 38.1734, "step": 8056 }, { "epoch": 191.83582089552237, "grad_norm": 27.849227905273438, "learning_rate": 9.915476190476191e-06, "loss": 36.4576, "step": 8057 }, { "epoch": 191.8597014925373, "grad_norm": 29.301816940307617, "learning_rate": 9.914285714285715e-06, "loss": 38.1189, "step": 8058 }, { "epoch": 191.88358208955225, "grad_norm": 31.922199249267578, "learning_rate": 9.91309523809524e-06, "loss": 38.8657, "step": 8059 }, { "epoch": 191.90746268656716, "grad_norm": 26.626665115356445, "learning_rate": 9.911904761904762e-06, "loss": 37.9762, "step": 8060 }, { "epoch": 191.9313432835821, "grad_norm": 23.473674774169922, "learning_rate": 9.910714285714288e-06, "loss": 37.5389, "step": 8061 }, { "epoch": 191.955223880597, "grad_norm": 32.28257751464844, "learning_rate": 9.90952380952381e-06, "loss": 38.1126, "step": 8062 }, { "epoch": 191.97910447761194, "grad_norm": 25.231307983398438, "learning_rate": 9.908333333333335e-06, "loss": 38.2216, "step": 8063 }, { "epoch": 192.0, "grad_norm": NaN, "learning_rate": 9.907142857142858e-06, "loss": 54.0147, "step": 8064 }, { "epoch": 192.02388059701494, "grad_norm": 31.353717803955078, "learning_rate": 9.907142857142858e-06, "loss": 38.4074, "step": 8065 }, { "epoch": 192.04776119402985, "grad_norm": 30.8924503326416, "learning_rate": 9.905952380952382e-06, "loss": 37.7876, "step": 8066 }, { "epoch": 192.07164179104478, "grad_norm": 28.787822723388672, "learning_rate": 9.904761904761906e-06, "loss": 38.7704, "step": 8067 }, { "epoch": 192.0955223880597, "grad_norm": 29.935930252075195, "learning_rate": 9.90357142857143e-06, "loss": 37.1704, "step": 8068 }, { "epoch": 192.11940298507463, "grad_norm": 30.520862579345703, "learning_rate": 9.902380952380953e-06, "loss": 38.6613, "step": 8069 }, { "epoch": 192.14328358208957, "grad_norm": 22.295175552368164, "learning_rate": 9.901190476190476e-06, "loss": 38.7869, "step": 8070 }, { "epoch": 192.16716417910447, "grad_norm": NaN, "learning_rate": 9.9e-06, "loss": 41.744, "step": 8071 }, { "epoch": 192.1910447761194, "grad_norm": 32.97742462158203, "learning_rate": 9.9e-06, "loss": 39.0328, "step": 8072 }, { "epoch": 192.21492537313432, "grad_norm": 28.674301147460938, "learning_rate": 9.898809523809525e-06, "loss": 38.5525, "step": 8073 }, { "epoch": 192.23880597014926, "grad_norm": 31.001100540161133, "learning_rate": 9.897619047619047e-06, "loss": 38.7256, "step": 8074 }, { "epoch": 192.26268656716417, "grad_norm": 27.50682258605957, "learning_rate": 9.896428571428573e-06, "loss": 38.6324, "step": 8075 }, { "epoch": 192.2865671641791, "grad_norm": 29.152971267700195, "learning_rate": 9.895238095238096e-06, "loss": 37.3363, "step": 8076 }, { "epoch": 192.31044776119404, "grad_norm": 23.952505111694336, "learning_rate": 9.89404761904762e-06, "loss": 38.3122, "step": 8077 }, { "epoch": 192.33432835820895, "grad_norm": 34.11610412597656, "learning_rate": 9.892857142857143e-06, "loss": 38.5167, "step": 8078 }, { "epoch": 192.3582089552239, "grad_norm": 25.457550048828125, "learning_rate": 9.891666666666667e-06, "loss": 37.1989, "step": 8079 }, { "epoch": 192.3820895522388, "grad_norm": 28.14068603515625, "learning_rate": 9.89047619047619e-06, "loss": 38.4137, "step": 8080 }, { "epoch": 192.40597014925373, "grad_norm": 29.55773162841797, "learning_rate": 9.889285714285714e-06, "loss": 37.9116, "step": 8081 }, { "epoch": 192.42985074626867, "grad_norm": NaN, "learning_rate": 9.88809523809524e-06, "loss": 33.8244, "step": 8082 }, { "epoch": 192.45373134328358, "grad_norm": 24.517562866210938, "learning_rate": 9.88809523809524e-06, "loss": 37.5466, "step": 8083 }, { "epoch": 192.47761194029852, "grad_norm": 24.37813949584961, "learning_rate": 9.886904761904763e-06, "loss": 37.6051, "step": 8084 }, { "epoch": 192.50149253731342, "grad_norm": 28.54468536376953, "learning_rate": 9.885714285714287e-06, "loss": 38.4495, "step": 8085 }, { "epoch": 192.52537313432836, "grad_norm": 22.6004581451416, "learning_rate": 9.88452380952381e-06, "loss": 37.1362, "step": 8086 }, { "epoch": 192.54925373134327, "grad_norm": 30.039899826049805, "learning_rate": 9.883333333333334e-06, "loss": 37.9443, "step": 8087 }, { "epoch": 192.5731343283582, "grad_norm": 28.983667373657227, "learning_rate": 9.882142857142858e-06, "loss": 37.8541, "step": 8088 }, { "epoch": 192.59701492537314, "grad_norm": 26.619503021240234, "learning_rate": 9.880952380952381e-06, "loss": 37.3685, "step": 8089 }, { "epoch": 192.62089552238805, "grad_norm": 25.137807846069336, "learning_rate": 9.879761904761905e-06, "loss": 36.9982, "step": 8090 }, { "epoch": 192.644776119403, "grad_norm": 29.406036376953125, "learning_rate": 9.87857142857143e-06, "loss": 38.4477, "step": 8091 }, { "epoch": 192.6686567164179, "grad_norm": 25.904155731201172, "learning_rate": 9.877380952380952e-06, "loss": 36.9548, "step": 8092 }, { "epoch": 192.69253731343284, "grad_norm": 29.080020904541016, "learning_rate": 9.876190476190478e-06, "loss": 38.5147, "step": 8093 }, { "epoch": 192.71641791044777, "grad_norm": 24.259777069091797, "learning_rate": 9.875000000000001e-06, "loss": 38.4572, "step": 8094 }, { "epoch": 192.74029850746268, "grad_norm": 30.02660369873047, "learning_rate": 9.873809523809525e-06, "loss": 38.2938, "step": 8095 }, { "epoch": 192.76417910447762, "grad_norm": 26.388124465942383, "learning_rate": 9.872619047619048e-06, "loss": 38.2691, "step": 8096 }, { "epoch": 192.78805970149253, "grad_norm": 29.877897262573242, "learning_rate": 9.871428571428572e-06, "loss": 37.2446, "step": 8097 }, { "epoch": 192.81194029850747, "grad_norm": 24.555816650390625, "learning_rate": 9.870238095238096e-06, "loss": 37.7283, "step": 8098 }, { "epoch": 192.83582089552237, "grad_norm": 29.00787925720215, "learning_rate": 9.869047619047621e-06, "loss": 37.5237, "step": 8099 }, { "epoch": 192.8597014925373, "grad_norm": 26.88437271118164, "learning_rate": 9.867857142857143e-06, "loss": 38.1217, "step": 8100 }, { "epoch": 192.88358208955225, "grad_norm": 27.196168899536133, "learning_rate": 9.866666666666668e-06, "loss": 37.9326, "step": 8101 }, { "epoch": 192.90746268656716, "grad_norm": 25.58395004272461, "learning_rate": 9.86547619047619e-06, "loss": 38.3009, "step": 8102 }, { "epoch": 192.9313432835821, "grad_norm": 30.2125186920166, "learning_rate": 9.864285714285715e-06, "loss": 38.0861, "step": 8103 }, { "epoch": 192.955223880597, "grad_norm": 22.175670623779297, "learning_rate": 9.863095238095239e-06, "loss": 38.6503, "step": 8104 }, { "epoch": 192.97910447761194, "grad_norm": 30.515262603759766, "learning_rate": 9.861904761904763e-06, "loss": 38.1392, "step": 8105 }, { "epoch": 193.0, "grad_norm": 22.113122940063477, "learning_rate": 9.860714285714286e-06, "loss": 34.0352, "step": 8106 }, { "epoch": 193.02388059701494, "grad_norm": 32.851016998291016, "learning_rate": 9.85952380952381e-06, "loss": 37.6278, "step": 8107 }, { "epoch": 193.04776119402985, "grad_norm": 29.324411392211914, "learning_rate": 9.858333333333334e-06, "loss": 39.0323, "step": 8108 }, { "epoch": 193.07164179104478, "grad_norm": 26.576251983642578, "learning_rate": 9.857142857142859e-06, "loss": 38.2389, "step": 8109 }, { "epoch": 193.0955223880597, "grad_norm": 25.70130157470703, "learning_rate": 9.85595238095238e-06, "loss": 37.5488, "step": 8110 }, { "epoch": 193.11940298507463, "grad_norm": 28.945816040039062, "learning_rate": 9.854761904761906e-06, "loss": 38.3826, "step": 8111 }, { "epoch": 193.14328358208957, "grad_norm": 26.942113876342773, "learning_rate": 9.85357142857143e-06, "loss": 37.8922, "step": 8112 }, { "epoch": 193.16716417910447, "grad_norm": 28.610998153686523, "learning_rate": 9.852380952380953e-06, "loss": 37.7559, "step": 8113 }, { "epoch": 193.1910447761194, "grad_norm": 27.649139404296875, "learning_rate": 9.851190476190477e-06, "loss": 38.1339, "step": 8114 }, { "epoch": 193.21492537313432, "grad_norm": 24.258901596069336, "learning_rate": 9.85e-06, "loss": 38.9597, "step": 8115 }, { "epoch": 193.23880597014926, "grad_norm": 21.32745361328125, "learning_rate": 9.848809523809524e-06, "loss": 37.4101, "step": 8116 }, { "epoch": 193.26268656716417, "grad_norm": 25.53805923461914, "learning_rate": 9.847619047619048e-06, "loss": 35.1791, "step": 8117 }, { "epoch": 193.2865671641791, "grad_norm": 21.969554901123047, "learning_rate": 9.846428571428573e-06, "loss": 39.1762, "step": 8118 }, { "epoch": 193.31044776119404, "grad_norm": 37.39038848876953, "learning_rate": 9.845238095238097e-06, "loss": 37.2427, "step": 8119 }, { "epoch": 193.33432835820895, "grad_norm": 33.77116394042969, "learning_rate": 9.84404761904762e-06, "loss": 37.6819, "step": 8120 }, { "epoch": 193.3582089552239, "grad_norm": 23.498018264770508, "learning_rate": 9.842857142857144e-06, "loss": 38.6812, "step": 8121 }, { "epoch": 193.3820895522388, "grad_norm": 25.90577507019043, "learning_rate": 9.841666666666668e-06, "loss": 38.1744, "step": 8122 }, { "epoch": 193.40597014925373, "grad_norm": 25.708383560180664, "learning_rate": 9.840476190476191e-06, "loss": 37.9103, "step": 8123 }, { "epoch": 193.42985074626867, "grad_norm": 19.413223266601562, "learning_rate": 9.839285714285715e-06, "loss": 37.3252, "step": 8124 }, { "epoch": 193.45373134328358, "grad_norm": 30.142765045166016, "learning_rate": 9.838095238095238e-06, "loss": 38.1061, "step": 8125 }, { "epoch": 193.47761194029852, "grad_norm": 24.21076774597168, "learning_rate": 9.836904761904764e-06, "loss": 37.9563, "step": 8126 }, { "epoch": 193.50149253731342, "grad_norm": 30.14665985107422, "learning_rate": 9.835714285714286e-06, "loss": 37.8519, "step": 8127 }, { "epoch": 193.52537313432836, "grad_norm": 27.567338943481445, "learning_rate": 9.834523809523811e-06, "loss": 37.6953, "step": 8128 }, { "epoch": 193.54925373134327, "grad_norm": 29.56963348388672, "learning_rate": 9.833333333333333e-06, "loss": 38.6179, "step": 8129 }, { "epoch": 193.5731343283582, "grad_norm": 25.726116180419922, "learning_rate": 9.832142857142858e-06, "loss": 39.0633, "step": 8130 }, { "epoch": 193.59701492537314, "grad_norm": 28.918392181396484, "learning_rate": 9.830952380952382e-06, "loss": 37.2939, "step": 8131 }, { "epoch": 193.62089552238805, "grad_norm": 26.439655303955078, "learning_rate": 9.829761904761905e-06, "loss": 36.7785, "step": 8132 }, { "epoch": 193.644776119403, "grad_norm": 30.66209602355957, "learning_rate": 9.828571428571429e-06, "loss": 38.4436, "step": 8133 }, { "epoch": 193.6686567164179, "grad_norm": 28.186386108398438, "learning_rate": 9.827380952380953e-06, "loss": 37.3892, "step": 8134 }, { "epoch": 193.69253731343284, "grad_norm": 29.687488555908203, "learning_rate": 9.826190476190476e-06, "loss": 36.881, "step": 8135 }, { "epoch": 193.71641791044777, "grad_norm": NaN, "learning_rate": 9.825000000000002e-06, "loss": 61.7147, "step": 8136 }, { "epoch": 193.74029850746268, "grad_norm": 27.555301666259766, "learning_rate": 9.825000000000002e-06, "loss": 38.5232, "step": 8137 }, { "epoch": 193.76417910447762, "grad_norm": 24.061349868774414, "learning_rate": 9.823809523809524e-06, "loss": 38.7169, "step": 8138 }, { "epoch": 193.78805970149253, "grad_norm": 23.157058715820312, "learning_rate": 9.822619047619049e-06, "loss": 37.1344, "step": 8139 }, { "epoch": 193.81194029850747, "grad_norm": 26.823884963989258, "learning_rate": 9.821428571428573e-06, "loss": 37.9303, "step": 8140 }, { "epoch": 193.83582089552237, "grad_norm": 23.348529815673828, "learning_rate": 9.820238095238096e-06, "loss": 36.8772, "step": 8141 }, { "epoch": 193.8597014925373, "grad_norm": 29.376649856567383, "learning_rate": 9.81904761904762e-06, "loss": 38.3394, "step": 8142 }, { "epoch": 193.88358208955225, "grad_norm": 26.600069046020508, "learning_rate": 9.817857142857143e-06, "loss": 38.7027, "step": 8143 }, { "epoch": 193.90746268656716, "grad_norm": 27.550716400146484, "learning_rate": 9.816666666666667e-06, "loss": 38.6352, "step": 8144 }, { "epoch": 193.9313432835821, "grad_norm": 24.643022537231445, "learning_rate": 9.81547619047619e-06, "loss": 38.334, "step": 8145 }, { "epoch": 193.955223880597, "grad_norm": 25.975337982177734, "learning_rate": 9.814285714285716e-06, "loss": 38.192, "step": 8146 }, { "epoch": 193.97910447761194, "grad_norm": 20.999217987060547, "learning_rate": 9.81309523809524e-06, "loss": 37.1881, "step": 8147 }, { "epoch": 194.0, "grad_norm": 23.38616943359375, "learning_rate": 9.811904761904763e-06, "loss": 34.2557, "step": 8148 }, { "epoch": 194.02388059701494, "grad_norm": 19.22756576538086, "learning_rate": 9.810714285714287e-06, "loss": 36.5685, "step": 8149 }, { "epoch": 194.04776119402985, "grad_norm": 29.112741470336914, "learning_rate": 9.80952380952381e-06, "loss": 38.4037, "step": 8150 }, { "epoch": 194.07164179104478, "grad_norm": 23.135292053222656, "learning_rate": 9.808333333333334e-06, "loss": 38.7293, "step": 8151 }, { "epoch": 194.0955223880597, "grad_norm": 29.674514770507812, "learning_rate": 9.807142857142858e-06, "loss": 36.4426, "step": 8152 }, { "epoch": 194.11940298507463, "grad_norm": 26.233339309692383, "learning_rate": 9.805952380952381e-06, "loss": 38.8275, "step": 8153 }, { "epoch": 194.14328358208957, "grad_norm": 29.903133392333984, "learning_rate": 9.804761904761907e-06, "loss": 38.7159, "step": 8154 }, { "epoch": 194.16716417910447, "grad_norm": 23.831342697143555, "learning_rate": 9.803571428571428e-06, "loss": 38.1488, "step": 8155 }, { "epoch": 194.1910447761194, "grad_norm": 27.747787475585938, "learning_rate": 9.802380952380954e-06, "loss": 37.2102, "step": 8156 }, { "epoch": 194.21492537313432, "grad_norm": 22.538318634033203, "learning_rate": 9.801190476190477e-06, "loss": 39.4014, "step": 8157 }, { "epoch": 194.23880597014926, "grad_norm": 31.50472068786621, "learning_rate": 9.800000000000001e-06, "loss": 37.8116, "step": 8158 }, { "epoch": 194.26268656716417, "grad_norm": 26.331830978393555, "learning_rate": 9.798809523809525e-06, "loss": 37.7444, "step": 8159 }, { "epoch": 194.2865671641791, "grad_norm": 29.495019912719727, "learning_rate": 9.797619047619048e-06, "loss": 36.9883, "step": 8160 }, { "epoch": 194.31044776119404, "grad_norm": 28.174238204956055, "learning_rate": 9.796428571428572e-06, "loss": 37.2823, "step": 8161 }, { "epoch": 194.33432835820895, "grad_norm": 28.516429901123047, "learning_rate": 9.795238095238097e-06, "loss": 37.691, "step": 8162 }, { "epoch": 194.3582089552239, "grad_norm": 25.835765838623047, "learning_rate": 9.794047619047619e-06, "loss": 38.1477, "step": 8163 }, { "epoch": 194.3820895522388, "grad_norm": 25.793895721435547, "learning_rate": 9.792857142857144e-06, "loss": 36.5988, "step": 8164 }, { "epoch": 194.40597014925373, "grad_norm": 20.387521743774414, "learning_rate": 9.791666666666666e-06, "loss": 38.1124, "step": 8165 }, { "epoch": 194.42985074626867, "grad_norm": 24.153411865234375, "learning_rate": 9.790476190476192e-06, "loss": 38.2235, "step": 8166 }, { "epoch": 194.45373134328358, "grad_norm": 17.82645034790039, "learning_rate": 9.789285714285715e-06, "loss": 38.8839, "step": 8167 }, { "epoch": 194.47761194029852, "grad_norm": 26.742172241210938, "learning_rate": 9.788095238095239e-06, "loss": 37.5795, "step": 8168 }, { "epoch": 194.50149253731342, "grad_norm": 19.46932601928711, "learning_rate": 9.786904761904763e-06, "loss": 38.3185, "step": 8169 }, { "epoch": 194.52537313432836, "grad_norm": 27.765174865722656, "learning_rate": 9.785714285714286e-06, "loss": 37.7904, "step": 8170 }, { "epoch": 194.54925373134327, "grad_norm": 23.08241081237793, "learning_rate": 9.78452380952381e-06, "loss": 37.9733, "step": 8171 }, { "epoch": 194.5731343283582, "grad_norm": 23.362966537475586, "learning_rate": 9.783333333333335e-06, "loss": 38.658, "step": 8172 }, { "epoch": 194.59701492537314, "grad_norm": 20.630313873291016, "learning_rate": 9.782142857142857e-06, "loss": 38.6314, "step": 8173 }, { "epoch": 194.62089552238805, "grad_norm": 22.159696578979492, "learning_rate": 9.780952380952382e-06, "loss": 37.8746, "step": 8174 }, { "epoch": 194.644776119403, "grad_norm": 17.65380859375, "learning_rate": 9.779761904761906e-06, "loss": 37.3745, "step": 8175 }, { "epoch": 194.6686567164179, "grad_norm": 24.66594886779785, "learning_rate": 9.77857142857143e-06, "loss": 38.3225, "step": 8176 }, { "epoch": 194.69253731343284, "grad_norm": 18.938581466674805, "learning_rate": 9.777380952380953e-06, "loss": 38.4467, "step": 8177 }, { "epoch": 194.71641791044777, "grad_norm": 26.19101905822754, "learning_rate": 9.776190476190477e-06, "loss": 37.6916, "step": 8178 }, { "epoch": 194.74029850746268, "grad_norm": 23.304584503173828, "learning_rate": 9.775e-06, "loss": 37.0147, "step": 8179 }, { "epoch": 194.76417910447762, "grad_norm": 22.964975357055664, "learning_rate": 9.773809523809524e-06, "loss": 37.2688, "step": 8180 }, { "epoch": 194.78805970149253, "grad_norm": 20.74350929260254, "learning_rate": 9.77261904761905e-06, "loss": 38.9556, "step": 8181 }, { "epoch": 194.81194029850747, "grad_norm": 20.30811309814453, "learning_rate": 9.771428571428571e-06, "loss": 36.9859, "step": 8182 }, { "epoch": 194.83582089552237, "grad_norm": 19.99317741394043, "learning_rate": 9.770238095238097e-06, "loss": 38.7915, "step": 8183 }, { "epoch": 194.8597014925373, "grad_norm": 19.990209579467773, "learning_rate": 9.76904761904762e-06, "loss": 36.9583, "step": 8184 }, { "epoch": 194.88358208955225, "grad_norm": 18.58809471130371, "learning_rate": 9.767857142857144e-06, "loss": 37.8493, "step": 8185 }, { "epoch": 194.90746268656716, "grad_norm": 19.796863555908203, "learning_rate": 9.766666666666667e-06, "loss": 37.5244, "step": 8186 }, { "epoch": 194.9313432835821, "grad_norm": 16.461299896240234, "learning_rate": 9.765476190476191e-06, "loss": 38.1746, "step": 8187 }, { "epoch": 194.955223880597, "grad_norm": 22.582000732421875, "learning_rate": 9.764285714285715e-06, "loss": 38.2809, "step": 8188 }, { "epoch": 194.97910447761194, "grad_norm": 18.955257415771484, "learning_rate": 9.76309523809524e-06, "loss": 36.9266, "step": 8189 }, { "epoch": 195.0, "grad_norm": 17.642061233520508, "learning_rate": 9.761904761904762e-06, "loss": 32.7614, "step": 8190 }, { "epoch": 195.02388059701494, "grad_norm": 19.75026512145996, "learning_rate": 9.760714285714287e-06, "loss": 38.3124, "step": 8191 }, { "epoch": 195.04776119402985, "grad_norm": 17.81207275390625, "learning_rate": 9.75952380952381e-06, "loss": 38.404, "step": 8192 }, { "epoch": 195.07164179104478, "grad_norm": 17.180660247802734, "learning_rate": 9.758333333333334e-06, "loss": 37.2074, "step": 8193 }, { "epoch": 195.0955223880597, "grad_norm": 18.23331069946289, "learning_rate": 9.757142857142858e-06, "loss": 37.6715, "step": 8194 }, { "epoch": 195.11940298507463, "grad_norm": 15.751823425292969, "learning_rate": 9.755952380952382e-06, "loss": 37.7539, "step": 8195 }, { "epoch": 195.14328358208957, "grad_norm": 20.383882522583008, "learning_rate": 9.754761904761905e-06, "loss": 37.5887, "step": 8196 }, { "epoch": 195.16716417910447, "grad_norm": 16.265350341796875, "learning_rate": 9.753571428571429e-06, "loss": 38.5134, "step": 8197 }, { "epoch": 195.1910447761194, "grad_norm": 18.961380004882812, "learning_rate": 9.752380952380953e-06, "loss": 37.9139, "step": 8198 }, { "epoch": 195.21492537313432, "grad_norm": 17.0926513671875, "learning_rate": 9.751190476190478e-06, "loss": 37.3768, "step": 8199 }, { "epoch": 195.23880597014926, "grad_norm": 17.138843536376953, "learning_rate": 9.75e-06, "loss": 37.9512, "step": 8200 }, { "epoch": 195.26268656716417, "grad_norm": 17.132709503173828, "learning_rate": 9.748809523809525e-06, "loss": 38.0403, "step": 8201 }, { "epoch": 195.2865671641791, "grad_norm": 16.376869201660156, "learning_rate": 9.747619047619049e-06, "loss": 37.8101, "step": 8202 }, { "epoch": 195.31044776119404, "grad_norm": 14.971624374389648, "learning_rate": 9.746428571428572e-06, "loss": 38.0216, "step": 8203 }, { "epoch": 195.33432835820895, "grad_norm": 16.75035285949707, "learning_rate": 9.745238095238096e-06, "loss": 37.994, "step": 8204 }, { "epoch": 195.3582089552239, "grad_norm": 15.411774635314941, "learning_rate": 9.74404761904762e-06, "loss": 38.9679, "step": 8205 }, { "epoch": 195.3820895522388, "grad_norm": 14.596837997436523, "learning_rate": 9.742857142857143e-06, "loss": 36.7789, "step": 8206 }, { "epoch": 195.40597014925373, "grad_norm": 20.539121627807617, "learning_rate": 9.741666666666667e-06, "loss": 37.5518, "step": 8207 }, { "epoch": 195.42985074626867, "grad_norm": 16.56817626953125, "learning_rate": 9.74047619047619e-06, "loss": 37.3836, "step": 8208 }, { "epoch": 195.45373134328358, "grad_norm": 17.15846061706543, "learning_rate": 9.739285714285716e-06, "loss": 37.1393, "step": 8209 }, { "epoch": 195.47761194029852, "grad_norm": 17.65296745300293, "learning_rate": 9.73809523809524e-06, "loss": 38.287, "step": 8210 }, { "epoch": 195.50149253731342, "grad_norm": 14.050993919372559, "learning_rate": 9.736904761904763e-06, "loss": 37.2698, "step": 8211 }, { "epoch": 195.52537313432836, "grad_norm": 16.587993621826172, "learning_rate": 9.735714285714287e-06, "loss": 38.2786, "step": 8212 }, { "epoch": 195.54925373134327, "grad_norm": 15.469551086425781, "learning_rate": 9.73452380952381e-06, "loss": 37.2241, "step": 8213 }, { "epoch": 195.5731343283582, "grad_norm": 15.310493469238281, "learning_rate": 9.733333333333334e-06, "loss": 37.7574, "step": 8214 }, { "epoch": 195.59701492537314, "grad_norm": 12.98195743560791, "learning_rate": 9.732142857142858e-06, "loss": 37.173, "step": 8215 }, { "epoch": 195.62089552238805, "grad_norm": 18.167980194091797, "learning_rate": 9.730952380952383e-06, "loss": 37.4605, "step": 8216 }, { "epoch": 195.644776119403, "grad_norm": 15.992667198181152, "learning_rate": 9.729761904761905e-06, "loss": 36.284, "step": 8217 }, { "epoch": 195.6686567164179, "grad_norm": 11.325750350952148, "learning_rate": 9.72857142857143e-06, "loss": 37.5665, "step": 8218 }, { "epoch": 195.69253731343284, "grad_norm": 17.86125373840332, "learning_rate": 9.727380952380954e-06, "loss": 38.5734, "step": 8219 }, { "epoch": 195.71641791044777, "grad_norm": 15.634565353393555, "learning_rate": 9.726190476190477e-06, "loss": 37.0224, "step": 8220 }, { "epoch": 195.74029850746268, "grad_norm": 15.09487247467041, "learning_rate": 9.725000000000001e-06, "loss": 37.3904, "step": 8221 }, { "epoch": 195.76417910447762, "grad_norm": 18.640703201293945, "learning_rate": 9.723809523809525e-06, "loss": 38.1708, "step": 8222 }, { "epoch": 195.78805970149253, "grad_norm": 20.45812225341797, "learning_rate": 9.722619047619048e-06, "loss": 38.8948, "step": 8223 }, { "epoch": 195.81194029850747, "grad_norm": 15.778278350830078, "learning_rate": 9.721428571428573e-06, "loss": 38.3331, "step": 8224 }, { "epoch": 195.83582089552237, "grad_norm": 19.648868560791016, "learning_rate": 9.720238095238095e-06, "loss": 36.9344, "step": 8225 }, { "epoch": 195.8597014925373, "grad_norm": 18.820405960083008, "learning_rate": 9.71904761904762e-06, "loss": 37.0866, "step": 8226 }, { "epoch": 195.88358208955225, "grad_norm": 16.977508544921875, "learning_rate": 9.717857142857143e-06, "loss": 39.2188, "step": 8227 }, { "epoch": 195.90746268656716, "grad_norm": 18.5307559967041, "learning_rate": 9.716666666666668e-06, "loss": 38.635, "step": 8228 }, { "epoch": 195.9313432835821, "grad_norm": 16.042844772338867, "learning_rate": 9.715476190476192e-06, "loss": 38.3158, "step": 8229 }, { "epoch": 195.955223880597, "grad_norm": 18.010839462280273, "learning_rate": 9.714285714285715e-06, "loss": 38.3673, "step": 8230 }, { "epoch": 195.97910447761194, "grad_norm": 19.28598976135254, "learning_rate": 9.713095238095239e-06, "loss": 37.6234, "step": 8231 }, { "epoch": 196.0, "grad_norm": 12.07419490814209, "learning_rate": 9.711904761904762e-06, "loss": 33.0793, "step": 8232 }, { "epoch": 196.02388059701494, "grad_norm": 18.44976234436035, "learning_rate": 9.710714285714286e-06, "loss": 37.1636, "step": 8233 }, { "epoch": 196.04776119402985, "grad_norm": 15.406002044677734, "learning_rate": 9.70952380952381e-06, "loss": 36.0832, "step": 8234 }, { "epoch": 196.07164179104478, "grad_norm": 16.38665008544922, "learning_rate": 9.708333333333333e-06, "loss": 37.709, "step": 8235 }, { "epoch": 196.0955223880597, "grad_norm": 16.234838485717773, "learning_rate": 9.707142857142859e-06, "loss": 37.3668, "step": 8236 }, { "epoch": 196.11940298507463, "grad_norm": 15.862046241760254, "learning_rate": 9.705952380952382e-06, "loss": 37.2736, "step": 8237 }, { "epoch": 196.14328358208957, "grad_norm": 14.635769844055176, "learning_rate": 9.704761904761906e-06, "loss": 37.643, "step": 8238 }, { "epoch": 196.16716417910447, "grad_norm": 20.509693145751953, "learning_rate": 9.70357142857143e-06, "loss": 37.8172, "step": 8239 }, { "epoch": 196.1910447761194, "grad_norm": 17.599340438842773, "learning_rate": 9.702380952380953e-06, "loss": 37.2197, "step": 8240 }, { "epoch": 196.21492537313432, "grad_norm": 22.59776496887207, "learning_rate": 9.701190476190477e-06, "loss": 38.7183, "step": 8241 }, { "epoch": 196.23880597014926, "grad_norm": 16.052112579345703, "learning_rate": 9.7e-06, "loss": 39.4656, "step": 8242 }, { "epoch": 196.26268656716417, "grad_norm": 15.938725471496582, "learning_rate": 9.698809523809526e-06, "loss": 36.0209, "step": 8243 }, { "epoch": 196.2865671641791, "grad_norm": 18.32191276550293, "learning_rate": 9.697619047619048e-06, "loss": 37.7406, "step": 8244 }, { "epoch": 196.31044776119404, "grad_norm": 17.4626407623291, "learning_rate": 9.696428571428573e-06, "loss": 37.5677, "step": 8245 }, { "epoch": 196.33432835820895, "grad_norm": 16.99214744567871, "learning_rate": 9.695238095238096e-06, "loss": 38.1292, "step": 8246 }, { "epoch": 196.3582089552239, "grad_norm": 20.447288513183594, "learning_rate": 9.69404761904762e-06, "loss": 36.6892, "step": 8247 }, { "epoch": 196.3820895522388, "grad_norm": 16.08639907836914, "learning_rate": 9.692857142857144e-06, "loss": 38.0742, "step": 8248 }, { "epoch": 196.40597014925373, "grad_norm": 20.349925994873047, "learning_rate": 9.691666666666667e-06, "loss": 36.1244, "step": 8249 }, { "epoch": 196.42985074626867, "grad_norm": 18.42910385131836, "learning_rate": 9.690476190476191e-06, "loss": 38.6168, "step": 8250 }, { "epoch": 196.45373134328358, "grad_norm": 19.19205665588379, "learning_rate": 9.689285714285716e-06, "loss": 38.4484, "step": 8251 }, { "epoch": 196.47761194029852, "grad_norm": 15.112396240234375, "learning_rate": 9.688095238095238e-06, "loss": 38.2654, "step": 8252 }, { "epoch": 196.50149253731342, "grad_norm": 22.324058532714844, "learning_rate": 9.686904761904764e-06, "loss": 39.2476, "step": 8253 }, { "epoch": 196.52537313432836, "grad_norm": 19.882596969604492, "learning_rate": 9.685714285714285e-06, "loss": 37.9678, "step": 8254 }, { "epoch": 196.54925373134327, "grad_norm": 20.174253463745117, "learning_rate": 9.68452380952381e-06, "loss": 38.0835, "step": 8255 }, { "epoch": 196.5731343283582, "grad_norm": 15.697175979614258, "learning_rate": 9.683333333333334e-06, "loss": 37.7158, "step": 8256 }, { "epoch": 196.59701492537314, "grad_norm": 17.792743682861328, "learning_rate": 9.682142857142858e-06, "loss": 38.0134, "step": 8257 }, { "epoch": 196.62089552238805, "grad_norm": 19.55414390563965, "learning_rate": 9.680952380952382e-06, "loss": 38.0596, "step": 8258 }, { "epoch": 196.644776119403, "grad_norm": 16.656089782714844, "learning_rate": 9.679761904761905e-06, "loss": 37.9196, "step": 8259 }, { "epoch": 196.6686567164179, "grad_norm": 15.824597358703613, "learning_rate": 9.678571428571429e-06, "loss": 37.6505, "step": 8260 }, { "epoch": 196.69253731343284, "grad_norm": 16.158740997314453, "learning_rate": 9.677380952380954e-06, "loss": 37.6144, "step": 8261 }, { "epoch": 196.71641791044777, "grad_norm": 15.026473045349121, "learning_rate": 9.676190476190476e-06, "loss": 38.89, "step": 8262 }, { "epoch": 196.74029850746268, "grad_norm": 17.907974243164062, "learning_rate": 9.675000000000001e-06, "loss": 37.778, "step": 8263 }, { "epoch": 196.76417910447762, "grad_norm": 18.452228546142578, "learning_rate": 9.673809523809525e-06, "loss": 36.5277, "step": 8264 }, { "epoch": 196.78805970149253, "grad_norm": 13.613883972167969, "learning_rate": 9.672619047619049e-06, "loss": 37.7714, "step": 8265 }, { "epoch": 196.81194029850747, "grad_norm": 15.803605079650879, "learning_rate": 9.671428571428572e-06, "loss": 37.4731, "step": 8266 }, { "epoch": 196.83582089552237, "grad_norm": 15.423070907592773, "learning_rate": 9.670238095238096e-06, "loss": 38.3607, "step": 8267 }, { "epoch": 196.8597014925373, "grad_norm": 14.29737663269043, "learning_rate": 9.66904761904762e-06, "loss": 37.8142, "step": 8268 }, { "epoch": 196.88358208955225, "grad_norm": 12.915861129760742, "learning_rate": 9.667857142857143e-06, "loss": 38.1578, "step": 8269 }, { "epoch": 196.90746268656716, "grad_norm": 17.85457420349121, "learning_rate": 9.666666666666667e-06, "loss": 37.3879, "step": 8270 }, { "epoch": 196.9313432835821, "grad_norm": 17.17386817932129, "learning_rate": 9.665476190476192e-06, "loss": 38.6943, "step": 8271 }, { "epoch": 196.955223880597, "grad_norm": 13.46798038482666, "learning_rate": 9.664285714285716e-06, "loss": 36.7541, "step": 8272 }, { "epoch": 196.97910447761194, "grad_norm": 18.20702362060547, "learning_rate": 9.66309523809524e-06, "loss": 37.5384, "step": 8273 }, { "epoch": 197.0, "grad_norm": 12.276162147521973, "learning_rate": 9.661904761904763e-06, "loss": 32.1864, "step": 8274 }, { "epoch": 197.02388059701494, "grad_norm": 24.191837310791016, "learning_rate": 9.660714285714287e-06, "loss": 37.9378, "step": 8275 }, { "epoch": 197.04776119402985, "grad_norm": 16.841337203979492, "learning_rate": 9.65952380952381e-06, "loss": 37.5576, "step": 8276 }, { "epoch": 197.07164179104478, "grad_norm": 18.5413761138916, "learning_rate": 9.658333333333334e-06, "loss": 36.504, "step": 8277 }, { "epoch": 197.0955223880597, "grad_norm": 22.87081527709961, "learning_rate": 9.657142857142859e-06, "loss": 38.4459, "step": 8278 }, { "epoch": 197.11940298507463, "grad_norm": 16.935012817382812, "learning_rate": 9.655952380952381e-06, "loss": 36.5404, "step": 8279 }, { "epoch": 197.14328358208957, "grad_norm": 15.1573486328125, "learning_rate": 9.654761904761906e-06, "loss": 38.72, "step": 8280 }, { "epoch": 197.16716417910447, "grad_norm": 14.638708114624023, "learning_rate": 9.653571428571428e-06, "loss": 37.1901, "step": 8281 }, { "epoch": 197.1910447761194, "grad_norm": 16.643617630004883, "learning_rate": 9.652380952380954e-06, "loss": 36.4447, "step": 8282 }, { "epoch": 197.21492537313432, "grad_norm": 17.821044921875, "learning_rate": 9.651190476190477e-06, "loss": 36.5, "step": 8283 }, { "epoch": 197.23880597014926, "grad_norm": 15.265460014343262, "learning_rate": 9.65e-06, "loss": 37.3383, "step": 8284 }, { "epoch": 197.26268656716417, "grad_norm": 17.732948303222656, "learning_rate": 9.648809523809524e-06, "loss": 36.9182, "step": 8285 }, { "epoch": 197.2865671641791, "grad_norm": 13.246759414672852, "learning_rate": 9.647619047619048e-06, "loss": 38.1302, "step": 8286 }, { "epoch": 197.31044776119404, "grad_norm": 16.793123245239258, "learning_rate": 9.646428571428572e-06, "loss": 38.0947, "step": 8287 }, { "epoch": 197.33432835820895, "grad_norm": 15.542506217956543, "learning_rate": 9.645238095238097e-06, "loss": 36.9537, "step": 8288 }, { "epoch": 197.3582089552239, "grad_norm": 20.053983688354492, "learning_rate": 9.644047619047619e-06, "loss": 38.043, "step": 8289 }, { "epoch": 197.3820895522388, "grad_norm": 21.54052734375, "learning_rate": 9.642857142857144e-06, "loss": 37.6954, "step": 8290 }, { "epoch": 197.40597014925373, "grad_norm": 14.850491523742676, "learning_rate": 9.641666666666666e-06, "loss": 37.2277, "step": 8291 }, { "epoch": 197.42985074626867, "grad_norm": 18.214035034179688, "learning_rate": 9.640476190476191e-06, "loss": 37.2554, "step": 8292 }, { "epoch": 197.45373134328358, "grad_norm": 14.643274307250977, "learning_rate": 9.639285714285715e-06, "loss": 38.0327, "step": 8293 }, { "epoch": 197.47761194029852, "grad_norm": 16.960464477539062, "learning_rate": 9.638095238095239e-06, "loss": 38.2719, "step": 8294 }, { "epoch": 197.50149253731342, "grad_norm": 16.77781105041504, "learning_rate": 9.636904761904762e-06, "loss": 37.8132, "step": 8295 }, { "epoch": 197.52537313432836, "grad_norm": 16.169910430908203, "learning_rate": 9.635714285714286e-06, "loss": 36.9185, "step": 8296 }, { "epoch": 197.54925373134327, "grad_norm": 16.040151596069336, "learning_rate": 9.63452380952381e-06, "loss": 37.9674, "step": 8297 }, { "epoch": 197.5731343283582, "grad_norm": 18.088455200195312, "learning_rate": 9.633333333333335e-06, "loss": 38.4402, "step": 8298 }, { "epoch": 197.59701492537314, "grad_norm": 17.952877044677734, "learning_rate": 9.632142857142858e-06, "loss": 38.6546, "step": 8299 }, { "epoch": 197.62089552238805, "grad_norm": NaN, "learning_rate": 9.630952380952382e-06, "loss": 60.4204, "step": 8300 }, { "epoch": 197.644776119403, "grad_norm": 20.842395782470703, "learning_rate": 9.630952380952382e-06, "loss": 38.1086, "step": 8301 }, { "epoch": 197.6686567164179, "grad_norm": 16.893850326538086, "learning_rate": 9.629761904761906e-06, "loss": 37.6307, "step": 8302 }, { "epoch": 197.69253731343284, "grad_norm": 21.6165714263916, "learning_rate": 9.62857142857143e-06, "loss": 38.3061, "step": 8303 }, { "epoch": 197.71641791044777, "grad_norm": 22.543777465820312, "learning_rate": 9.627380952380953e-06, "loss": 37.4894, "step": 8304 }, { "epoch": 197.74029850746268, "grad_norm": 15.899398803710938, "learning_rate": 9.626190476190477e-06, "loss": 39.1086, "step": 8305 }, { "epoch": 197.76417910447762, "grad_norm": 33.675968170166016, "learning_rate": 9.625e-06, "loss": 38.5525, "step": 8306 }, { "epoch": 197.78805970149253, "grad_norm": 23.818912506103516, "learning_rate": 9.623809523809524e-06, "loss": 37.7922, "step": 8307 }, { "epoch": 197.81194029850747, "grad_norm": 31.32359504699707, "learning_rate": 9.622619047619049e-06, "loss": 37.6434, "step": 8308 }, { "epoch": 197.83582089552237, "grad_norm": 21.7730712890625, "learning_rate": 9.621428571428573e-06, "loss": 38.4835, "step": 8309 }, { "epoch": 197.8597014925373, "grad_norm": 32.48667907714844, "learning_rate": 9.620238095238096e-06, "loss": 38.0251, "step": 8310 }, { "epoch": 197.88358208955225, "grad_norm": 16.73311424255371, "learning_rate": 9.61904761904762e-06, "loss": 38.1521, "step": 8311 }, { "epoch": 197.90746268656716, "grad_norm": 36.096553802490234, "learning_rate": 9.617857142857144e-06, "loss": 36.5312, "step": 8312 }, { "epoch": 197.9313432835821, "grad_norm": 24.470571517944336, "learning_rate": 9.616666666666667e-06, "loss": 36.2796, "step": 8313 }, { "epoch": 197.955223880597, "grad_norm": 37.85520553588867, "learning_rate": 9.615476190476193e-06, "loss": 37.2368, "step": 8314 }, { "epoch": 197.97910447761194, "grad_norm": 35.127803802490234, "learning_rate": 9.614285714285714e-06, "loss": 38.4903, "step": 8315 }, { "epoch": 198.0, "grad_norm": 26.169633865356445, "learning_rate": 9.61309523809524e-06, "loss": 32.8102, "step": 8316 }, { "epoch": 198.02388059701494, "grad_norm": 27.632892608642578, "learning_rate": 9.611904761904762e-06, "loss": 37.6685, "step": 8317 }, { "epoch": 198.04776119402985, "grad_norm": 26.571184158325195, "learning_rate": 9.610714285714287e-06, "loss": 37.188, "step": 8318 }, { "epoch": 198.07164179104478, "grad_norm": 21.513154983520508, "learning_rate": 9.60952380952381e-06, "loss": 38.8865, "step": 8319 }, { "epoch": 198.0955223880597, "grad_norm": 32.81885528564453, "learning_rate": 9.608333333333334e-06, "loss": 37.3792, "step": 8320 }, { "epoch": 198.11940298507463, "grad_norm": 27.440208435058594, "learning_rate": 9.607142857142858e-06, "loss": 37.5087, "step": 8321 }, { "epoch": 198.14328358208957, "grad_norm": 33.64108657836914, "learning_rate": 9.605952380952381e-06, "loss": 39.1434, "step": 8322 }, { "epoch": 198.16716417910447, "grad_norm": 32.416358947753906, "learning_rate": 9.604761904761905e-06, "loss": 38.1156, "step": 8323 }, { "epoch": 198.1910447761194, "grad_norm": 29.13149070739746, "learning_rate": 9.60357142857143e-06, "loss": 36.4538, "step": 8324 }, { "epoch": 198.21492537313432, "grad_norm": 29.666540145874023, "learning_rate": 9.602380952380952e-06, "loss": 38.0201, "step": 8325 }, { "epoch": 198.23880597014926, "grad_norm": 29.291536331176758, "learning_rate": 9.601190476190478e-06, "loss": 36.4241, "step": 8326 }, { "epoch": 198.26268656716417, "grad_norm": 22.8216609954834, "learning_rate": 9.600000000000001e-06, "loss": 37.9777, "step": 8327 }, { "epoch": 198.2865671641791, "grad_norm": 34.89872360229492, "learning_rate": 9.598809523809525e-06, "loss": 38.2485, "step": 8328 }, { "epoch": 198.31044776119404, "grad_norm": 30.85089874267578, "learning_rate": 9.597619047619048e-06, "loss": 37.071, "step": 8329 }, { "epoch": 198.33432835820895, "grad_norm": 30.49010467529297, "learning_rate": 9.596428571428572e-06, "loss": 37.4761, "step": 8330 }, { "epoch": 198.3582089552239, "grad_norm": 28.778947830200195, "learning_rate": 9.595238095238096e-06, "loss": 36.5581, "step": 8331 }, { "epoch": 198.3820895522388, "grad_norm": 26.166597366333008, "learning_rate": 9.59404761904762e-06, "loss": 37.0474, "step": 8332 }, { "epoch": 198.40597014925373, "grad_norm": 22.517654418945312, "learning_rate": 9.592857142857143e-06, "loss": 35.7226, "step": 8333 }, { "epoch": 198.42985074626867, "grad_norm": 33.42197036743164, "learning_rate": 9.591666666666667e-06, "loss": 38.6154, "step": 8334 }, { "epoch": 198.45373134328358, "grad_norm": 25.83587074279785, "learning_rate": 9.590476190476192e-06, "loss": 37.2725, "step": 8335 }, { "epoch": 198.47761194029852, "grad_norm": 34.74106979370117, "learning_rate": 9.589285714285716e-06, "loss": 38.0993, "step": 8336 }, { "epoch": 198.50149253731342, "grad_norm": 33.30450439453125, "learning_rate": 9.588095238095239e-06, "loss": 37.8493, "step": 8337 }, { "epoch": 198.52537313432836, "grad_norm": 26.693960189819336, "learning_rate": 9.586904761904763e-06, "loss": 38.3353, "step": 8338 }, { "epoch": 198.54925373134327, "grad_norm": 26.671049118041992, "learning_rate": 9.585714285714286e-06, "loss": 37.1855, "step": 8339 }, { "epoch": 198.5731343283582, "grad_norm": 29.15550994873047, "learning_rate": 9.58452380952381e-06, "loss": 38.1117, "step": 8340 }, { "epoch": 198.59701492537314, "grad_norm": 25.527177810668945, "learning_rate": 9.583333333333335e-06, "loss": 37.4152, "step": 8341 }, { "epoch": 198.62089552238805, "grad_norm": 30.142030715942383, "learning_rate": 9.582142857142857e-06, "loss": 38.7658, "step": 8342 }, { "epoch": 198.644776119403, "grad_norm": 26.978553771972656, "learning_rate": 9.580952380952383e-06, "loss": 36.8429, "step": 8343 }, { "epoch": 198.6686567164179, "grad_norm": 29.713537216186523, "learning_rate": 9.579761904761904e-06, "loss": 36.5465, "step": 8344 }, { "epoch": 198.69253731343284, "grad_norm": 27.22649383544922, "learning_rate": 9.57857142857143e-06, "loss": 38.3485, "step": 8345 }, { "epoch": 198.71641791044777, "grad_norm": 29.604351043701172, "learning_rate": 9.577380952380953e-06, "loss": 37.0354, "step": 8346 }, { "epoch": 198.74029850746268, "grad_norm": 26.05364227294922, "learning_rate": 9.576190476190477e-06, "loss": 38.0035, "step": 8347 }, { "epoch": 198.76417910447762, "grad_norm": 30.7071533203125, "learning_rate": 9.575e-06, "loss": 37.6573, "step": 8348 }, { "epoch": 198.78805970149253, "grad_norm": 27.888521194458008, "learning_rate": 9.573809523809524e-06, "loss": 37.6404, "step": 8349 }, { "epoch": 198.81194029850747, "grad_norm": 28.2436466217041, "learning_rate": 9.572619047619048e-06, "loss": 38.7478, "step": 8350 }, { "epoch": 198.83582089552237, "grad_norm": 27.264436721801758, "learning_rate": 9.571428571428573e-06, "loss": 36.9871, "step": 8351 }, { "epoch": 198.8597014925373, "grad_norm": 27.143701553344727, "learning_rate": 9.570238095238095e-06, "loss": 37.7301, "step": 8352 }, { "epoch": 198.88358208955225, "grad_norm": 25.06464385986328, "learning_rate": 9.56904761904762e-06, "loss": 38.1237, "step": 8353 }, { "epoch": 198.90746268656716, "grad_norm": 30.2600040435791, "learning_rate": 9.567857142857142e-06, "loss": 37.5021, "step": 8354 }, { "epoch": 198.9313432835821, "grad_norm": 28.089567184448242, "learning_rate": 9.566666666666668e-06, "loss": 36.0255, "step": 8355 }, { "epoch": 198.955223880597, "grad_norm": 28.468385696411133, "learning_rate": 9.565476190476191e-06, "loss": 37.7092, "step": 8356 }, { "epoch": 198.97910447761194, "grad_norm": 24.718828201293945, "learning_rate": 9.564285714285715e-06, "loss": 38.1477, "step": 8357 }, { "epoch": 199.0, "grad_norm": 25.489274978637695, "learning_rate": 9.563095238095239e-06, "loss": 33.5207, "step": 8358 }, { "epoch": 199.02388059701494, "grad_norm": 23.228055953979492, "learning_rate": 9.561904761904762e-06, "loss": 36.6224, "step": 8359 }, { "epoch": 199.04776119402985, "grad_norm": 29.2338924407959, "learning_rate": 9.560714285714286e-06, "loss": 37.4582, "step": 8360 }, { "epoch": 199.07164179104478, "grad_norm": 25.881567001342773, "learning_rate": 9.559523809523811e-06, "loss": 37.7522, "step": 8361 }, { "epoch": 199.0955223880597, "grad_norm": 32.1788444519043, "learning_rate": 9.558333333333335e-06, "loss": 38.0786, "step": 8362 }, { "epoch": 199.11940298507463, "grad_norm": 29.03343391418457, "learning_rate": 9.557142857142858e-06, "loss": 37.7836, "step": 8363 }, { "epoch": 199.14328358208957, "grad_norm": 26.068822860717773, "learning_rate": 9.555952380952382e-06, "loss": 37.4076, "step": 8364 }, { "epoch": 199.16716417910447, "grad_norm": 25.541522979736328, "learning_rate": 9.554761904761906e-06, "loss": 38.1175, "step": 8365 }, { "epoch": 199.1910447761194, "grad_norm": 28.156938552856445, "learning_rate": 9.55357142857143e-06, "loss": 38.3294, "step": 8366 }, { "epoch": 199.21492537313432, "grad_norm": 24.11278533935547, "learning_rate": 9.552380952380953e-06, "loss": 37.0253, "step": 8367 }, { "epoch": 199.23880597014926, "grad_norm": 31.672948837280273, "learning_rate": 9.551190476190476e-06, "loss": 37.647, "step": 8368 }, { "epoch": 199.26268656716417, "grad_norm": 31.267433166503906, "learning_rate": 9.55e-06, "loss": 38.0205, "step": 8369 }, { "epoch": 199.2865671641791, "grad_norm": 24.666099548339844, "learning_rate": 9.548809523809525e-06, "loss": 37.0519, "step": 8370 }, { "epoch": 199.31044776119404, "grad_norm": 24.412540435791016, "learning_rate": 9.547619047619049e-06, "loss": 36.7093, "step": 8371 }, { "epoch": 199.33432835820895, "grad_norm": 25.304214477539062, "learning_rate": 9.546428571428573e-06, "loss": 37.3666, "step": 8372 }, { "epoch": 199.3582089552239, "grad_norm": 18.673847198486328, "learning_rate": 9.545238095238096e-06, "loss": 36.7329, "step": 8373 }, { "epoch": 199.3820895522388, "grad_norm": 31.444181442260742, "learning_rate": 9.54404761904762e-06, "loss": 37.595, "step": 8374 }, { "epoch": 199.40597014925373, "grad_norm": 28.37276268005371, "learning_rate": 9.542857142857143e-06, "loss": 36.5752, "step": 8375 }, { "epoch": 199.42985074626867, "grad_norm": 31.71356773376465, "learning_rate": 9.541666666666669e-06, "loss": 37.6783, "step": 8376 }, { "epoch": 199.45373134328358, "grad_norm": 29.976390838623047, "learning_rate": 9.54047619047619e-06, "loss": 36.7449, "step": 8377 }, { "epoch": 199.47761194029852, "grad_norm": 24.911964416503906, "learning_rate": 9.539285714285716e-06, "loss": 37.8768, "step": 8378 }, { "epoch": 199.50149253731342, "grad_norm": 18.54723358154297, "learning_rate": 9.538095238095238e-06, "loss": 36.9114, "step": 8379 }, { "epoch": 199.52537313432836, "grad_norm": 31.114011764526367, "learning_rate": 9.536904761904763e-06, "loss": 38.7622, "step": 8380 }, { "epoch": 199.54925373134327, "grad_norm": 20.96200942993164, "learning_rate": 9.535714285714287e-06, "loss": 37.7349, "step": 8381 }, { "epoch": 199.5731343283582, "grad_norm": 30.523534774780273, "learning_rate": 9.53452380952381e-06, "loss": 37.2072, "step": 8382 }, { "epoch": 199.59701492537314, "grad_norm": 26.291833877563477, "learning_rate": 9.533333333333334e-06, "loss": 37.9218, "step": 8383 }, { "epoch": 199.62089552238805, "grad_norm": 30.261066436767578, "learning_rate": 9.532142857142858e-06, "loss": 36.9925, "step": 8384 }, { "epoch": 199.644776119403, "grad_norm": 30.21729850769043, "learning_rate": 9.530952380952381e-06, "loss": 38.014, "step": 8385 }, { "epoch": 199.6686567164179, "grad_norm": 26.573165893554688, "learning_rate": 9.529761904761905e-06, "loss": 38.287, "step": 8386 }, { "epoch": 199.69253731343284, "grad_norm": 24.21065330505371, "learning_rate": 9.528571428571429e-06, "loss": 38.5124, "step": 8387 }, { "epoch": 199.71641791044777, "grad_norm": 26.77363395690918, "learning_rate": 9.527380952380954e-06, "loss": 38.4566, "step": 8388 }, { "epoch": 199.74029850746268, "grad_norm": 23.666229248046875, "learning_rate": 9.526190476190476e-06, "loss": 38.0738, "step": 8389 }, { "epoch": 199.76417910447762, "grad_norm": 26.105844497680664, "learning_rate": 9.525000000000001e-06, "loss": 35.2158, "step": 8390 }, { "epoch": 199.78805970149253, "grad_norm": 23.236692428588867, "learning_rate": 9.523809523809525e-06, "loss": 37.9765, "step": 8391 }, { "epoch": 199.81194029850747, "grad_norm": 28.990320205688477, "learning_rate": 9.522619047619048e-06, "loss": 39.4678, "step": 8392 }, { "epoch": 199.83582089552237, "grad_norm": 26.152000427246094, "learning_rate": 9.521428571428572e-06, "loss": 38.2819, "step": 8393 }, { "epoch": 199.8597014925373, "grad_norm": 27.403459548950195, "learning_rate": 9.520238095238096e-06, "loss": 37.7756, "step": 8394 }, { "epoch": 199.88358208955225, "grad_norm": 25.223661422729492, "learning_rate": 9.51904761904762e-06, "loss": 38.0363, "step": 8395 }, { "epoch": 199.90746268656716, "grad_norm": 28.14125633239746, "learning_rate": 9.517857142857143e-06, "loss": 37.2465, "step": 8396 }, { "epoch": 199.9313432835821, "grad_norm": NaN, "learning_rate": 9.516666666666668e-06, "loss": 60.1323, "step": 8397 }, { "epoch": 199.955223880597, "grad_norm": 25.910566329956055, "learning_rate": 9.516666666666668e-06, "loss": 37.1015, "step": 8398 }, { "epoch": 199.97910447761194, "grad_norm": 30.221599578857422, "learning_rate": 9.515476190476192e-06, "loss": 37.6695, "step": 8399 }, { "epoch": 200.0, "grad_norm": 22.35158348083496, "learning_rate": 9.514285714285715e-06, "loss": 32.3938, "step": 8400 }, { "epoch": 200.0, "step": 8400, "total_flos": 4.129605818803725e+17, "train_loss": 1.9183280272710892, "train_runtime": 12838.6174, "train_samples_per_second": 83.373, "train_steps_per_second": 0.654 }, { "epoch": 200.02388059701494, "grad_norm": 23.81245994567871, "learning_rate": 1e-05, "loss": 37.5421, "step": 8401 }, { "epoch": 200.04776119402985, "grad_norm": Infinity, "learning_rate": 9.998866213151928e-06, "loss": 45.3604, "step": 8402 }, { "epoch": 200.07164179104478, "grad_norm": Infinity, "learning_rate": 9.998866213151928e-06, "loss": 46.7324, "step": 8403 }, { "epoch": 200.0955223880597, "grad_norm": 504.5543212890625, "learning_rate": 9.998866213151928e-06, "loss": 46.4227, "step": 8404 }, { "epoch": 200.11940298507463, "grad_norm": 304.8294372558594, "learning_rate": 9.997732426303856e-06, "loss": 42.1724, "step": 8405 }, { "epoch": 200.14328358208957, "grad_norm": 102.3563232421875, "learning_rate": 9.996598639455783e-06, "loss": 39.149, "step": 8406 }, { "epoch": 200.16716417910447, "grad_norm": 111.65090942382812, "learning_rate": 9.99546485260771e-06, "loss": 39.3382, "step": 8407 }, { "epoch": 200.1910447761194, "grad_norm": 107.08712005615234, "learning_rate": 9.994331065759638e-06, "loss": 39.5328, "step": 8408 }, { "epoch": 200.21492537313432, "grad_norm": 47.29191589355469, "learning_rate": 9.993197278911566e-06, "loss": 38.9979, "step": 8409 }, { "epoch": 200.23880597014926, "grad_norm": 46.862098693847656, "learning_rate": 9.992063492063493e-06, "loss": 37.9798, "step": 8410 }, { "epoch": 200.26268656716417, "grad_norm": 37.27891159057617, "learning_rate": 9.99092970521542e-06, "loss": 38.5795, "step": 8411 }, { "epoch": 200.2865671641791, "grad_norm": 28.95697021484375, "learning_rate": 9.989795918367348e-06, "loss": 39.2989, "step": 8412 }, { "epoch": 200.31044776119404, "grad_norm": 31.485437393188477, "learning_rate": 9.988662131519276e-06, "loss": 37.808, "step": 8413 }, { "epoch": 200.33432835820895, "grad_norm": 25.093996047973633, "learning_rate": 9.987528344671202e-06, "loss": 37.9118, "step": 8414 }, { "epoch": 200.3582089552239, "grad_norm": 18.148664474487305, "learning_rate": 9.98639455782313e-06, "loss": 37.8239, "step": 8415 }, { "epoch": 200.3820895522388, "grad_norm": 24.032161712646484, "learning_rate": 9.985260770975057e-06, "loss": 37.5528, "step": 8416 }, { "epoch": 200.40597014925373, "grad_norm": 19.333480834960938, "learning_rate": 9.984126984126986e-06, "loss": 37.2521, "step": 8417 }, { "epoch": 200.42985074626867, "grad_norm": 26.17083740234375, "learning_rate": 9.982993197278913e-06, "loss": 37.8435, "step": 8418 }, { "epoch": 200.45373134328358, "grad_norm": 21.792404174804688, "learning_rate": 9.981859410430839e-06, "loss": 38.5489, "step": 8419 }, { "epoch": 200.47761194029852, "grad_norm": 18.229753494262695, "learning_rate": 9.980725623582768e-06, "loss": 37.0981, "step": 8420 }, { "epoch": 200.50149253731342, "grad_norm": 23.669044494628906, "learning_rate": 9.979591836734694e-06, "loss": 37.6026, "step": 8421 }, { "epoch": 200.52537313432836, "grad_norm": 24.45855712890625, "learning_rate": 9.978458049886622e-06, "loss": 37.5576, "step": 8422 }, { "epoch": 200.54925373134327, "grad_norm": 17.91193199157715, "learning_rate": 9.977324263038549e-06, "loss": 38.3311, "step": 8423 }, { "epoch": 200.5731343283582, "grad_norm": 19.74493408203125, "learning_rate": 9.976190476190477e-06, "loss": 37.4511, "step": 8424 }, { "epoch": 200.59701492537314, "grad_norm": 15.995309829711914, "learning_rate": 9.975056689342404e-06, "loss": 38.1505, "step": 8425 }, { "epoch": 200.62089552238805, "grad_norm": 21.274045944213867, "learning_rate": 9.973922902494332e-06, "loss": 36.8368, "step": 8426 }, { "epoch": 200.644776119403, "grad_norm": 16.583993911743164, "learning_rate": 9.972789115646259e-06, "loss": 37.2429, "step": 8427 }, { "epoch": 200.6686567164179, "grad_norm": 24.277111053466797, "learning_rate": 9.971655328798187e-06, "loss": 38.7733, "step": 8428 }, { "epoch": 200.69253731343284, "grad_norm": 17.2474308013916, "learning_rate": 9.970521541950114e-06, "loss": 38.1177, "step": 8429 }, { "epoch": 200.71641791044777, "grad_norm": 19.051393508911133, "learning_rate": 9.969387755102042e-06, "loss": 37.7148, "step": 8430 }, { "epoch": 200.74029850746268, "grad_norm": NaN, "learning_rate": 9.968253968253969e-06, "loss": 65.8134, "step": 8431 }, { "epoch": 200.76417910447762, "grad_norm": 22.762027740478516, "learning_rate": 9.968253968253969e-06, "loss": 38.2388, "step": 8432 }, { "epoch": 200.78805970149253, "grad_norm": 15.53449821472168, "learning_rate": 9.967120181405897e-06, "loss": 37.2729, "step": 8433 }, { "epoch": 200.81194029850747, "grad_norm": 23.350296020507812, "learning_rate": 9.965986394557824e-06, "loss": 36.1391, "step": 8434 }, { "epoch": 200.83582089552237, "grad_norm": 20.47188949584961, "learning_rate": 9.964852607709752e-06, "loss": 36.8894, "step": 8435 }, { "epoch": 200.8597014925373, "grad_norm": 20.669742584228516, "learning_rate": 9.963718820861679e-06, "loss": 36.2852, "step": 8436 }, { "epoch": 200.88358208955225, "grad_norm": 28.16716957092285, "learning_rate": 9.962585034013607e-06, "loss": 38.1568, "step": 8437 }, { "epoch": 200.90746268656716, "grad_norm": 20.141733169555664, "learning_rate": 9.961451247165534e-06, "loss": 38.1183, "step": 8438 }, { "epoch": 200.9313432835821, "grad_norm": 31.327373504638672, "learning_rate": 9.960317460317462e-06, "loss": 38.2826, "step": 8439 }, { "epoch": 200.955223880597, "grad_norm": 23.03270721435547, "learning_rate": 9.959183673469387e-06, "loss": 37.7775, "step": 8440 }, { "epoch": 200.97910447761194, "grad_norm": 30.23394775390625, "learning_rate": 9.958049886621317e-06, "loss": 36.9908, "step": 8441 }, { "epoch": 201.0, "grad_norm": 19.29242515563965, "learning_rate": 9.956916099773244e-06, "loss": 32.7172, "step": 8442 }, { "epoch": 201.02388059701494, "grad_norm": 33.536903381347656, "learning_rate": 9.955782312925172e-06, "loss": 36.1264, "step": 8443 }, { "epoch": 201.04776119402985, "grad_norm": 30.620258331298828, "learning_rate": 9.954648526077099e-06, "loss": 37.4299, "step": 8444 }, { "epoch": 201.07164179104478, "grad_norm": 29.916500091552734, "learning_rate": 9.953514739229025e-06, "loss": 38.3549, "step": 8445 }, { "epoch": 201.0955223880597, "grad_norm": 26.54745864868164, "learning_rate": 9.952380952380954e-06, "loss": 38.2229, "step": 8446 }, { "epoch": 201.11940298507463, "grad_norm": 29.324121475219727, "learning_rate": 9.95124716553288e-06, "loss": 37.9866, "step": 8447 }, { "epoch": 201.14328358208957, "grad_norm": 22.872337341308594, "learning_rate": 9.950113378684807e-06, "loss": 37.0148, "step": 8448 }, { "epoch": 201.16716417910447, "grad_norm": 28.70613670349121, "learning_rate": 9.948979591836737e-06, "loss": 36.8655, "step": 8449 }, { "epoch": 201.1910447761194, "grad_norm": 22.34271240234375, "learning_rate": 9.947845804988662e-06, "loss": 36.3357, "step": 8450 }, { "epoch": 201.21492537313432, "grad_norm": 31.665597915649414, "learning_rate": 9.946712018140592e-06, "loss": 37.386, "step": 8451 }, { "epoch": 201.23880597014926, "grad_norm": 23.60234260559082, "learning_rate": 9.945578231292517e-06, "loss": 37.0227, "step": 8452 }, { "epoch": 201.26268656716417, "grad_norm": 31.781818389892578, "learning_rate": 9.944444444444445e-06, "loss": 38.0446, "step": 8453 }, { "epoch": 201.2865671641791, "grad_norm": 24.861406326293945, "learning_rate": 9.943310657596372e-06, "loss": 38.2091, "step": 8454 }, { "epoch": 201.31044776119404, "grad_norm": 28.904706954956055, "learning_rate": 9.9421768707483e-06, "loss": 36.8503, "step": 8455 }, { "epoch": 201.33432835820895, "grad_norm": 24.053878784179688, "learning_rate": 9.941043083900227e-06, "loss": 38.1396, "step": 8456 }, { "epoch": 201.3582089552239, "grad_norm": 29.818387985229492, "learning_rate": 9.939909297052155e-06, "loss": 36.598, "step": 8457 }, { "epoch": 201.3820895522388, "grad_norm": 22.70626449584961, "learning_rate": 9.938775510204082e-06, "loss": 36.83, "step": 8458 }, { "epoch": 201.40597014925373, "grad_norm": 30.062395095825195, "learning_rate": 9.93764172335601e-06, "loss": 37.1404, "step": 8459 }, { "epoch": 201.42985074626867, "grad_norm": 24.94289207458496, "learning_rate": 9.936507936507937e-06, "loss": 37.4896, "step": 8460 }, { "epoch": 201.45373134328358, "grad_norm": 34.83537673950195, "learning_rate": 9.935374149659865e-06, "loss": 37.4165, "step": 8461 }, { "epoch": 201.47761194029852, "grad_norm": 30.582931518554688, "learning_rate": 9.934240362811792e-06, "loss": 36.9354, "step": 8462 }, { "epoch": 201.50149253731342, "grad_norm": 28.539152145385742, "learning_rate": 9.93310657596372e-06, "loss": 37.6099, "step": 8463 }, { "epoch": 201.52537313432836, "grad_norm": 28.249475479125977, "learning_rate": 9.931972789115647e-06, "loss": 36.0266, "step": 8464 }, { "epoch": 201.54925373134327, "grad_norm": 26.3381290435791, "learning_rate": 9.930839002267575e-06, "loss": 37.0545, "step": 8465 }, { "epoch": 201.5731343283582, "grad_norm": 23.9443359375, "learning_rate": 9.929705215419502e-06, "loss": 37.6213, "step": 8466 }, { "epoch": 201.59701492537314, "grad_norm": 32.69965744018555, "learning_rate": 9.92857142857143e-06, "loss": 39.0624, "step": 8467 }, { "epoch": 201.62089552238805, "grad_norm": 26.842876434326172, "learning_rate": 9.927437641723356e-06, "loss": 37.8574, "step": 8468 }, { "epoch": 201.644776119403, "grad_norm": 28.573163986206055, "learning_rate": 9.926303854875285e-06, "loss": 38.1716, "step": 8469 }, { "epoch": 201.6686567164179, "grad_norm": 28.380313873291016, "learning_rate": 9.92517006802721e-06, "loss": 37.498, "step": 8470 }, { "epoch": 201.69253731343284, "grad_norm": 25.541645050048828, "learning_rate": 9.92403628117914e-06, "loss": 37.255, "step": 8471 }, { "epoch": 201.71641791044777, "grad_norm": 25.202747344970703, "learning_rate": 9.922902494331067e-06, "loss": 38.1171, "step": 8472 }, { "epoch": 201.74029850746268, "grad_norm": 32.81104278564453, "learning_rate": 9.921768707482993e-06, "loss": 37.3737, "step": 8473 }, { "epoch": 201.76417910447762, "grad_norm": 26.47867774963379, "learning_rate": 9.920634920634922e-06, "loss": 38.4246, "step": 8474 }, { "epoch": 201.78805970149253, "grad_norm": 30.32525062561035, "learning_rate": 9.919501133786848e-06, "loss": 37.4488, "step": 8475 }, { "epoch": 201.81194029850747, "grad_norm": 26.98731231689453, "learning_rate": 9.918367346938776e-06, "loss": 37.9646, "step": 8476 }, { "epoch": 201.83582089552237, "grad_norm": 26.333789825439453, "learning_rate": 9.917233560090703e-06, "loss": 37.9806, "step": 8477 }, { "epoch": 201.8597014925373, "grad_norm": 21.36071014404297, "learning_rate": 9.91609977324263e-06, "loss": 35.9579, "step": 8478 }, { "epoch": 201.88358208955225, "grad_norm": 25.346942901611328, "learning_rate": 9.91496598639456e-06, "loss": 37.2274, "step": 8479 }, { "epoch": 201.90746268656716, "grad_norm": 22.294967651367188, "learning_rate": 9.913832199546486e-06, "loss": 37.1912, "step": 8480 }, { "epoch": 201.9313432835821, "grad_norm": 28.8373966217041, "learning_rate": 9.912698412698413e-06, "loss": 38.0725, "step": 8481 }, { "epoch": 201.955223880597, "grad_norm": 25.565916061401367, "learning_rate": 9.91156462585034e-06, "loss": 38.5633, "step": 8482 }, { "epoch": 201.97910447761194, "grad_norm": 29.3934268951416, "learning_rate": 9.910430839002268e-06, "loss": 39.6998, "step": 8483 }, { "epoch": 202.0, "grad_norm": 20.555400848388672, "learning_rate": 9.909297052154196e-06, "loss": 32.8394, "step": 8484 }, { "epoch": 202.02388059701494, "grad_norm": 31.105358123779297, "learning_rate": 9.908163265306123e-06, "loss": 38.0253, "step": 8485 }, { "epoch": 202.04776119402985, "grad_norm": 28.788240432739258, "learning_rate": 9.90702947845805e-06, "loss": 38.106, "step": 8486 }, { "epoch": 202.07164179104478, "grad_norm": 30.641401290893555, "learning_rate": 9.905895691609978e-06, "loss": 37.1781, "step": 8487 }, { "epoch": 202.0955223880597, "grad_norm": 29.78472900390625, "learning_rate": 9.904761904761906e-06, "loss": 37.2946, "step": 8488 }, { "epoch": 202.11940298507463, "grad_norm": 24.245201110839844, "learning_rate": 9.903628117913833e-06, "loss": 37.3295, "step": 8489 }, { "epoch": 202.14328358208957, "grad_norm": 27.062395095825195, "learning_rate": 9.90249433106576e-06, "loss": 37.7033, "step": 8490 }, { "epoch": 202.16716417910447, "grad_norm": 29.303415298461914, "learning_rate": 9.901360544217688e-06, "loss": 36.5479, "step": 8491 }, { "epoch": 202.1910447761194, "grad_norm": 23.434593200683594, "learning_rate": 9.900226757369616e-06, "loss": 36.5344, "step": 8492 }, { "epoch": 202.21492537313432, "grad_norm": 27.116130828857422, "learning_rate": 9.899092970521543e-06, "loss": 36.8862, "step": 8493 }, { "epoch": 202.23880597014926, "grad_norm": 23.95987892150879, "learning_rate": 9.89795918367347e-06, "loss": 37.2771, "step": 8494 }, { "epoch": 202.26268656716417, "grad_norm": NaN, "learning_rate": 9.896825396825398e-06, "loss": 64.9095, "step": 8495 }, { "epoch": 202.2865671641791, "grad_norm": 26.921449661254883, "learning_rate": 9.896825396825398e-06, "loss": 36.0847, "step": 8496 }, { "epoch": 202.31044776119404, "grad_norm": 25.219202041625977, "learning_rate": 9.895691609977326e-06, "loss": 36.8732, "step": 8497 }, { "epoch": 202.33432835820895, "grad_norm": 29.823617935180664, "learning_rate": 9.894557823129253e-06, "loss": 38.1664, "step": 8498 }, { "epoch": 202.3582089552239, "grad_norm": 24.486186981201172, "learning_rate": 9.893424036281179e-06, "loss": 38.4029, "step": 8499 }, { "epoch": 202.3820895522388, "grad_norm": 28.144065856933594, "learning_rate": 9.892290249433108e-06, "loss": 37.321, "step": 8500 }, { "epoch": 202.40597014925373, "grad_norm": 22.483613967895508, "learning_rate": 9.891156462585036e-06, "loss": 37.651, "step": 8501 }, { "epoch": 202.42985074626867, "grad_norm": 25.333362579345703, "learning_rate": 9.890022675736961e-06, "loss": 37.4532, "step": 8502 }, { "epoch": 202.45373134328358, "grad_norm": 19.66573143005371, "learning_rate": 9.88888888888889e-06, "loss": 37.0419, "step": 8503 }, { "epoch": 202.47761194029852, "grad_norm": 23.879840850830078, "learning_rate": 9.887755102040816e-06, "loss": 37.5707, "step": 8504 }, { "epoch": 202.50149253731342, "grad_norm": 17.775848388671875, "learning_rate": 9.886621315192746e-06, "loss": 37.9697, "step": 8505 }, { "epoch": 202.52537313432836, "grad_norm": 24.975574493408203, "learning_rate": 9.885487528344671e-06, "loss": 36.9127, "step": 8506 }, { "epoch": 202.54925373134327, "grad_norm": 18.6658935546875, "learning_rate": 9.884353741496599e-06, "loss": 37.4472, "step": 8507 }, { "epoch": 202.5731343283582, "grad_norm": 23.0604248046875, "learning_rate": 9.883219954648526e-06, "loss": 37.623, "step": 8508 }, { "epoch": 202.59701492537314, "grad_norm": 20.533077239990234, "learning_rate": 9.882086167800454e-06, "loss": 38.4347, "step": 8509 }, { "epoch": 202.62089552238805, "grad_norm": 19.784536361694336, "learning_rate": 9.880952380952381e-06, "loss": 37.1416, "step": 8510 }, { "epoch": 202.644776119403, "grad_norm": 21.132734298706055, "learning_rate": 9.879818594104309e-06, "loss": 38.3573, "step": 8511 }, { "epoch": 202.6686567164179, "grad_norm": 16.398435592651367, "learning_rate": 9.878684807256236e-06, "loss": 36.9268, "step": 8512 }, { "epoch": 202.69253731343284, "grad_norm": 29.87149429321289, "learning_rate": 9.877551020408164e-06, "loss": 38.7687, "step": 8513 }, { "epoch": 202.71641791044777, "grad_norm": 21.406925201416016, "learning_rate": 9.876417233560091e-06, "loss": 37.5118, "step": 8514 }, { "epoch": 202.74029850746268, "grad_norm": 33.05040740966797, "learning_rate": 9.875283446712019e-06, "loss": 37.2421, "step": 8515 }, { "epoch": 202.76417910447762, "grad_norm": 28.560482025146484, "learning_rate": 9.874149659863946e-06, "loss": 37.2285, "step": 8516 }, { "epoch": 202.78805970149253, "grad_norm": 32.275150299072266, "learning_rate": 9.873015873015874e-06, "loss": 37.5002, "step": 8517 }, { "epoch": 202.81194029850747, "grad_norm": 26.057050704956055, "learning_rate": 9.871882086167801e-06, "loss": 38.5121, "step": 8518 }, { "epoch": 202.83582089552237, "grad_norm": 26.400562286376953, "learning_rate": 9.870748299319729e-06, "loss": 37.2058, "step": 8519 }, { "epoch": 202.8597014925373, "grad_norm": 19.530229568481445, "learning_rate": 9.869614512471656e-06, "loss": 37.6345, "step": 8520 }, { "epoch": 202.88358208955225, "grad_norm": 28.110204696655273, "learning_rate": 9.868480725623584e-06, "loss": 37.4057, "step": 8521 }, { "epoch": 202.90746268656716, "grad_norm": 24.72870445251465, "learning_rate": 9.867346938775511e-06, "loss": 37.214, "step": 8522 }, { "epoch": 202.9313432835821, "grad_norm": 27.58170509338379, "learning_rate": 9.866213151927439e-06, "loss": 36.9991, "step": 8523 }, { "epoch": 202.955223880597, "grad_norm": 24.87717056274414, "learning_rate": 9.865079365079366e-06, "loss": 38.3773, "step": 8524 }, { "epoch": 202.97910447761194, "grad_norm": 27.23594856262207, "learning_rate": 9.863945578231294e-06, "loss": 37.8239, "step": 8525 }, { "epoch": 203.0, "grad_norm": 21.036359786987305, "learning_rate": 9.862811791383221e-06, "loss": 32.8495, "step": 8526 }, { "epoch": 203.02388059701494, "grad_norm": 25.65850257873535, "learning_rate": 9.861678004535147e-06, "loss": 37.4606, "step": 8527 }, { "epoch": 203.04776119402985, "grad_norm": 21.02652359008789, "learning_rate": 9.860544217687076e-06, "loss": 37.9006, "step": 8528 }, { "epoch": 203.07164179104478, "grad_norm": 27.834203720092773, "learning_rate": 9.859410430839002e-06, "loss": 37.1006, "step": 8529 }, { "epoch": 203.0955223880597, "grad_norm": 22.7817440032959, "learning_rate": 9.858276643990931e-06, "loss": 36.8413, "step": 8530 }, { "epoch": 203.11940298507463, "grad_norm": 32.14323425292969, "learning_rate": 9.857142857142859e-06, "loss": 37.8483, "step": 8531 }, { "epoch": 203.14328358208957, "grad_norm": 26.14910316467285, "learning_rate": 9.856009070294785e-06, "loss": 38.3764, "step": 8532 }, { "epoch": 203.16716417910447, "grad_norm": 27.673919677734375, "learning_rate": 9.854875283446714e-06, "loss": 36.9376, "step": 8533 }, { "epoch": 203.1910447761194, "grad_norm": 24.009227752685547, "learning_rate": 9.85374149659864e-06, "loss": 36.506, "step": 8534 }, { "epoch": 203.21492537313432, "grad_norm": 29.15199089050293, "learning_rate": 9.852607709750567e-06, "loss": 37.311, "step": 8535 }, { "epoch": 203.23880597014926, "grad_norm": 21.224517822265625, "learning_rate": 9.851473922902495e-06, "loss": 36.0035, "step": 8536 }, { "epoch": 203.26268656716417, "grad_norm": 34.10138702392578, "learning_rate": 9.850340136054422e-06, "loss": 38.4266, "step": 8537 }, { "epoch": 203.2865671641791, "grad_norm": 30.168445587158203, "learning_rate": 9.849206349206351e-06, "loss": 37.3233, "step": 8538 }, { "epoch": 203.31044776119404, "grad_norm": 24.34166145324707, "learning_rate": 9.848072562358277e-06, "loss": 37.2346, "step": 8539 }, { "epoch": 203.33432835820895, "grad_norm": 22.984699249267578, "learning_rate": 9.846938775510205e-06, "loss": 37.3328, "step": 8540 }, { "epoch": 203.3582089552239, "grad_norm": 26.927013397216797, "learning_rate": 9.845804988662132e-06, "loss": 37.3275, "step": 8541 }, { "epoch": 203.3820895522388, "grad_norm": 22.42952537536621, "learning_rate": 9.84467120181406e-06, "loss": 38.1146, "step": 8542 }, { "epoch": 203.40597014925373, "grad_norm": 26.905364990234375, "learning_rate": 9.843537414965987e-06, "loss": 37.6205, "step": 8543 }, { "epoch": 203.42985074626867, "grad_norm": 23.009504318237305, "learning_rate": 9.842403628117915e-06, "loss": 36.9357, "step": 8544 }, { "epoch": 203.45373134328358, "grad_norm": 25.13264274597168, "learning_rate": 9.841269841269842e-06, "loss": 37.3651, "step": 8545 }, { "epoch": 203.47761194029852, "grad_norm": 24.366397857666016, "learning_rate": 9.84013605442177e-06, "loss": 37.44, "step": 8546 }, { "epoch": 203.50149253731342, "grad_norm": 24.072046279907227, "learning_rate": 9.839002267573697e-06, "loss": 36.5196, "step": 8547 }, { "epoch": 203.52537313432836, "grad_norm": 20.05785369873047, "learning_rate": 9.837868480725625e-06, "loss": 37.6953, "step": 8548 }, { "epoch": 203.54925373134327, "grad_norm": NaN, "learning_rate": 9.836734693877552e-06, "loss": 31.6875, "step": 8549 }, { "epoch": 203.5731343283582, "grad_norm": 26.91598892211914, "learning_rate": 9.836734693877552e-06, "loss": 37.5256, "step": 8550 }, { "epoch": 203.59701492537314, "grad_norm": 23.437747955322266, "learning_rate": 9.83560090702948e-06, "loss": 38.1517, "step": 8551 }, { "epoch": 203.62089552238805, "grad_norm": 22.155426025390625, "learning_rate": 9.834467120181407e-06, "loss": 38.0603, "step": 8552 }, { "epoch": 203.644776119403, "grad_norm": 21.953975677490234, "learning_rate": 9.833333333333333e-06, "loss": 37.3232, "step": 8553 }, { "epoch": 203.6686567164179, "grad_norm": 22.587839126586914, "learning_rate": 9.832199546485262e-06, "loss": 36.6528, "step": 8554 }, { "epoch": 203.69253731343284, "grad_norm": 20.159603118896484, "learning_rate": 9.83106575963719e-06, "loss": 38.2609, "step": 8555 }, { "epoch": 203.71641791044777, "grad_norm": 21.83957290649414, "learning_rate": 9.829931972789115e-06, "loss": 38.0038, "step": 8556 }, { "epoch": 203.74029850746268, "grad_norm": 20.712881088256836, "learning_rate": 9.828798185941045e-06, "loss": 36.8639, "step": 8557 }, { "epoch": 203.76417910447762, "grad_norm": 24.437532424926758, "learning_rate": 9.82766439909297e-06, "loss": 37.3083, "step": 8558 }, { "epoch": 203.78805970149253, "grad_norm": 18.273344039916992, "learning_rate": 9.8265306122449e-06, "loss": 37.1752, "step": 8559 }, { "epoch": 203.81194029850747, "grad_norm": 22.60394859313965, "learning_rate": 9.825396825396825e-06, "loss": 37.4568, "step": 8560 }, { "epoch": 203.83582089552237, "grad_norm": 18.706016540527344, "learning_rate": 9.824263038548753e-06, "loss": 37.6933, "step": 8561 }, { "epoch": 203.8597014925373, "grad_norm": 23.628393173217773, "learning_rate": 9.823129251700682e-06, "loss": 38.1902, "step": 8562 }, { "epoch": 203.88358208955225, "grad_norm": 22.622568130493164, "learning_rate": 9.821995464852608e-06, "loss": 37.5829, "step": 8563 }, { "epoch": 203.90746268656716, "grad_norm": 23.485595703125, "learning_rate": 9.820861678004537e-06, "loss": 37.6715, "step": 8564 }, { "epoch": 203.9313432835821, "grad_norm": 24.30974006652832, "learning_rate": 9.819727891156463e-06, "loss": 38.7099, "step": 8565 }, { "epoch": 203.955223880597, "grad_norm": 21.94779396057129, "learning_rate": 9.81859410430839e-06, "loss": 37.1328, "step": 8566 }, { "epoch": 203.97910447761194, "grad_norm": 23.196590423583984, "learning_rate": 9.817460317460318e-06, "loss": 36.9389, "step": 8567 }, { "epoch": 204.0, "grad_norm": 17.97230339050293, "learning_rate": 9.816326530612245e-06, "loss": 33.5231, "step": 8568 }, { "epoch": 204.02388059701494, "grad_norm": 18.16822624206543, "learning_rate": 9.815192743764173e-06, "loss": 37.7035, "step": 8569 }, { "epoch": 204.04776119402985, "grad_norm": 19.428035736083984, "learning_rate": 9.8140589569161e-06, "loss": 37.3438, "step": 8570 }, { "epoch": 204.07164179104478, "grad_norm": 16.83907699584961, "learning_rate": 9.812925170068028e-06, "loss": 37.7712, "step": 8571 }, { "epoch": 204.0955223880597, "grad_norm": 17.97744369506836, "learning_rate": 9.811791383219955e-06, "loss": 37.8334, "step": 8572 }, { "epoch": 204.11940298507463, "grad_norm": 17.03276252746582, "learning_rate": 9.810657596371883e-06, "loss": 37.0864, "step": 8573 }, { "epoch": 204.14328358208957, "grad_norm": 16.34898567199707, "learning_rate": 9.80952380952381e-06, "loss": 37.4734, "step": 8574 }, { "epoch": 204.16716417910447, "grad_norm": 16.26249122619629, "learning_rate": 9.808390022675738e-06, "loss": 36.2818, "step": 8575 }, { "epoch": 204.1910447761194, "grad_norm": 18.367773056030273, "learning_rate": 9.807256235827665e-06, "loss": 37.1955, "step": 8576 }, { "epoch": 204.21492537313432, "grad_norm": 14.366243362426758, "learning_rate": 9.806122448979593e-06, "loss": 36.6985, "step": 8577 }, { "epoch": 204.23880597014926, "grad_norm": 18.252262115478516, "learning_rate": 9.80498866213152e-06, "loss": 38.7752, "step": 8578 }, { "epoch": 204.26268656716417, "grad_norm": 16.947233200073242, "learning_rate": 9.803854875283448e-06, "loss": 37.0114, "step": 8579 }, { "epoch": 204.2865671641791, "grad_norm": 19.83219337463379, "learning_rate": 9.802721088435375e-06, "loss": 37.5581, "step": 8580 }, { "epoch": 204.31044776119404, "grad_norm": 12.658903121948242, "learning_rate": 9.801587301587301e-06, "loss": 35.1772, "step": 8581 }, { "epoch": 204.33432835820895, "grad_norm": 21.653749465942383, "learning_rate": 9.80045351473923e-06, "loss": 36.6293, "step": 8582 }, { "epoch": 204.3582089552239, "grad_norm": 20.153541564941406, "learning_rate": 9.799319727891158e-06, "loss": 37.3803, "step": 8583 }, { "epoch": 204.3820895522388, "grad_norm": 14.480081558227539, "learning_rate": 9.798185941043085e-06, "loss": 37.3014, "step": 8584 }, { "epoch": 204.40597014925373, "grad_norm": 17.002986907958984, "learning_rate": 9.797052154195013e-06, "loss": 36.4257, "step": 8585 }, { "epoch": 204.42985074626867, "grad_norm": 16.259973526000977, "learning_rate": 9.795918367346939e-06, "loss": 37.4563, "step": 8586 }, { "epoch": 204.45373134328358, "grad_norm": 14.481822967529297, "learning_rate": 9.794784580498868e-06, "loss": 36.5634, "step": 8587 }, { "epoch": 204.47761194029852, "grad_norm": 16.941505432128906, "learning_rate": 9.793650793650794e-06, "loss": 36.4001, "step": 8588 }, { "epoch": 204.50149253731342, "grad_norm": 14.08771800994873, "learning_rate": 9.792517006802721e-06, "loss": 36.7239, "step": 8589 }, { "epoch": 204.52537313432836, "grad_norm": 17.970989227294922, "learning_rate": 9.791383219954649e-06, "loss": 38.1865, "step": 8590 }, { "epoch": 204.54925373134327, "grad_norm": 22.19449234008789, "learning_rate": 9.790249433106576e-06, "loss": 37.6346, "step": 8591 }, { "epoch": 204.5731343283582, "grad_norm": 16.227018356323242, "learning_rate": 9.789115646258505e-06, "loss": 38.6723, "step": 8592 }, { "epoch": 204.59701492537314, "grad_norm": 15.995810508728027, "learning_rate": 9.787981859410431e-06, "loss": 37.3718, "step": 8593 }, { "epoch": 204.62089552238805, "grad_norm": 16.17877769470215, "learning_rate": 9.786848072562359e-06, "loss": 38.2895, "step": 8594 }, { "epoch": 204.644776119403, "grad_norm": 16.616500854492188, "learning_rate": 9.785714285714286e-06, "loss": 37.6198, "step": 8595 }, { "epoch": 204.6686567164179, "grad_norm": 15.92837142944336, "learning_rate": 9.784580498866214e-06, "loss": 37.6528, "step": 8596 }, { "epoch": 204.69253731343284, "grad_norm": 14.861062049865723, "learning_rate": 9.783446712018141e-06, "loss": 38.6355, "step": 8597 }, { "epoch": 204.71641791044777, "grad_norm": 14.655280113220215, "learning_rate": 9.782312925170069e-06, "loss": 37.8583, "step": 8598 }, { "epoch": 204.74029850746268, "grad_norm": 15.297805786132812, "learning_rate": 9.781179138321996e-06, "loss": 38.0771, "step": 8599 }, { "epoch": 204.76417910447762, "grad_norm": 14.192242622375488, "learning_rate": 9.780045351473924e-06, "loss": 38.3822, "step": 8600 }, { "epoch": 204.78805970149253, "grad_norm": 16.92637825012207, "learning_rate": 9.778911564625851e-06, "loss": 37.8253, "step": 8601 }, { "epoch": 204.81194029850747, "grad_norm": 13.849637031555176, "learning_rate": 9.777777777777779e-06, "loss": 38.0741, "step": 8602 }, { "epoch": 204.83582089552237, "grad_norm": 20.005584716796875, "learning_rate": 9.776643990929706e-06, "loss": 36.9411, "step": 8603 }, { "epoch": 204.8597014925373, "grad_norm": 16.371997833251953, "learning_rate": 9.775510204081634e-06, "loss": 35.9473, "step": 8604 }, { "epoch": 204.88358208955225, "grad_norm": 18.115108489990234, "learning_rate": 9.774376417233561e-06, "loss": 36.1243, "step": 8605 }, { "epoch": 204.90746268656716, "grad_norm": 16.897581100463867, "learning_rate": 9.773242630385489e-06, "loss": 38.1212, "step": 8606 }, { "epoch": 204.9313432835821, "grad_norm": 22.27849006652832, "learning_rate": 9.772108843537416e-06, "loss": 36.6232, "step": 8607 }, { "epoch": 204.955223880597, "grad_norm": 16.656368255615234, "learning_rate": 9.770975056689344e-06, "loss": 38.9034, "step": 8608 }, { "epoch": 204.97910447761194, "grad_norm": 21.257678985595703, "learning_rate": 9.769841269841271e-06, "loss": 37.4408, "step": 8609 }, { "epoch": 205.0, "grad_norm": 18.820884704589844, "learning_rate": 9.768707482993199e-06, "loss": 32.95, "step": 8610 }, { "epoch": 205.02388059701494, "grad_norm": 17.434200286865234, "learning_rate": 9.767573696145124e-06, "loss": 37.1015, "step": 8611 }, { "epoch": 205.04776119402985, "grad_norm": 24.209625244140625, "learning_rate": 9.766439909297054e-06, "loss": 37.2563, "step": 8612 }, { "epoch": 205.07164179104478, "grad_norm": 18.49870491027832, "learning_rate": 9.765306122448981e-06, "loss": 37.4353, "step": 8613 }, { "epoch": 205.0955223880597, "grad_norm": 19.67914390563965, "learning_rate": 9.764172335600907e-06, "loss": 36.5181, "step": 8614 }, { "epoch": 205.11940298507463, "grad_norm": 21.088788986206055, "learning_rate": 9.763038548752836e-06, "loss": 37.5769, "step": 8615 }, { "epoch": 205.14328358208957, "grad_norm": 13.879831314086914, "learning_rate": 9.761904761904762e-06, "loss": 38.6267, "step": 8616 }, { "epoch": 205.16716417910447, "grad_norm": 25.4019718170166, "learning_rate": 9.760770975056691e-06, "loss": 37.8395, "step": 8617 }, { "epoch": 205.1910447761194, "grad_norm": 17.966567993164062, "learning_rate": 9.759637188208617e-06, "loss": 36.8954, "step": 8618 }, { "epoch": 205.21492537313432, "grad_norm": 21.86101722717285, "learning_rate": 9.758503401360544e-06, "loss": 37.7932, "step": 8619 }, { "epoch": 205.23880597014926, "grad_norm": 19.588476181030273, "learning_rate": 9.757369614512474e-06, "loss": 36.7557, "step": 8620 }, { "epoch": 205.26268656716417, "grad_norm": 21.534090042114258, "learning_rate": 9.7562358276644e-06, "loss": 38.0934, "step": 8621 }, { "epoch": 205.2865671641791, "grad_norm": 21.240619659423828, "learning_rate": 9.755102040816327e-06, "loss": 37.9864, "step": 8622 }, { "epoch": 205.31044776119404, "grad_norm": 19.190460205078125, "learning_rate": 9.753968253968254e-06, "loss": 37.7771, "step": 8623 }, { "epoch": 205.33432835820895, "grad_norm": 18.091548919677734, "learning_rate": 9.752834467120182e-06, "loss": 37.3592, "step": 8624 }, { "epoch": 205.3582089552239, "grad_norm": 23.65395164489746, "learning_rate": 9.75170068027211e-06, "loss": 37.2367, "step": 8625 }, { "epoch": 205.3820895522388, "grad_norm": 15.69301986694336, "learning_rate": 9.750566893424037e-06, "loss": 36.6072, "step": 8626 }, { "epoch": 205.40597014925373, "grad_norm": 29.14418601989746, "learning_rate": 9.749433106575964e-06, "loss": 36.2826, "step": 8627 }, { "epoch": 205.42985074626867, "grad_norm": 19.379981994628906, "learning_rate": 9.748299319727892e-06, "loss": 36.8289, "step": 8628 }, { "epoch": 205.45373134328358, "grad_norm": 24.329856872558594, "learning_rate": 9.74716553287982e-06, "loss": 36.0532, "step": 8629 }, { "epoch": 205.47761194029852, "grad_norm": 21.218088150024414, "learning_rate": 9.746031746031747e-06, "loss": 37.3484, "step": 8630 }, { "epoch": 205.50149253731342, "grad_norm": 17.96894645690918, "learning_rate": 9.744897959183674e-06, "loss": 36.6539, "step": 8631 }, { "epoch": 205.52537313432836, "grad_norm": 29.74249839782715, "learning_rate": 9.743764172335602e-06, "loss": 36.3547, "step": 8632 }, { "epoch": 205.54925373134327, "grad_norm": 19.569395065307617, "learning_rate": 9.74263038548753e-06, "loss": 36.5655, "step": 8633 }, { "epoch": 205.5731343283582, "grad_norm": 37.414424896240234, "learning_rate": 9.741496598639457e-06, "loss": 37.9095, "step": 8634 }, { "epoch": 205.59701492537314, "grad_norm": 32.67489242553711, "learning_rate": 9.740362811791384e-06, "loss": 37.9754, "step": 8635 }, { "epoch": 205.62089552238805, "grad_norm": 26.269237518310547, "learning_rate": 9.739229024943312e-06, "loss": 36.5291, "step": 8636 }, { "epoch": 205.644776119403, "grad_norm": 26.150583267211914, "learning_rate": 9.73809523809524e-06, "loss": 36.9841, "step": 8637 }, { "epoch": 205.6686567164179, "grad_norm": 29.86044692993164, "learning_rate": 9.736961451247167e-06, "loss": 37.4311, "step": 8638 }, { "epoch": 205.69253731343284, "grad_norm": 24.11771011352539, "learning_rate": 9.735827664399093e-06, "loss": 37.3217, "step": 8639 }, { "epoch": 205.71641791044777, "grad_norm": 35.74076461791992, "learning_rate": 9.734693877551022e-06, "loss": 38.697, "step": 8640 }, { "epoch": 205.74029850746268, "grad_norm": 29.668153762817383, "learning_rate": 9.733560090702948e-06, "loss": 38.0177, "step": 8641 }, { "epoch": 205.76417910447762, "grad_norm": 26.31679344177246, "learning_rate": 9.732426303854877e-06, "loss": 36.4184, "step": 8642 }, { "epoch": 205.78805970149253, "grad_norm": 25.133056640625, "learning_rate": 9.731292517006804e-06, "loss": 38.2124, "step": 8643 }, { "epoch": 205.81194029850747, "grad_norm": 29.476600646972656, "learning_rate": 9.73015873015873e-06, "loss": 37.3606, "step": 8644 }, { "epoch": 205.83582089552237, "grad_norm": 28.86757469177246, "learning_rate": 9.72902494331066e-06, "loss": 38.1226, "step": 8645 }, { "epoch": 205.8597014925373, "grad_norm": 34.02037048339844, "learning_rate": 9.727891156462585e-06, "loss": 38.4452, "step": 8646 }, { "epoch": 205.88358208955225, "grad_norm": 32.310176849365234, "learning_rate": 9.726757369614513e-06, "loss": 35.7592, "step": 8647 }, { "epoch": 205.90746268656716, "grad_norm": 27.667375564575195, "learning_rate": 9.72562358276644e-06, "loss": 37.4783, "step": 8648 }, { "epoch": 205.9313432835821, "grad_norm": 25.79292869567871, "learning_rate": 9.724489795918368e-06, "loss": 38.5843, "step": 8649 }, { "epoch": 205.955223880597, "grad_norm": 30.97063446044922, "learning_rate": 9.723356009070297e-06, "loss": 38.1735, "step": 8650 }, { "epoch": 205.97910447761194, "grad_norm": 27.409870147705078, "learning_rate": 9.722222222222223e-06, "loss": 37.2472, "step": 8651 }, { "epoch": 206.0, "grad_norm": 26.654399871826172, "learning_rate": 9.72108843537415e-06, "loss": 32.5645, "step": 8652 }, { "epoch": 206.02388059701494, "grad_norm": 26.989267349243164, "learning_rate": 9.719954648526078e-06, "loss": 37.6551, "step": 8653 }, { "epoch": 206.04776119402985, "grad_norm": 27.682390213012695, "learning_rate": 9.718820861678005e-06, "loss": 37.2163, "step": 8654 }, { "epoch": 206.07164179104478, "grad_norm": 23.36117172241211, "learning_rate": 9.717687074829933e-06, "loss": 37.1891, "step": 8655 }, { "epoch": 206.0955223880597, "grad_norm": 28.605031967163086, "learning_rate": 9.71655328798186e-06, "loss": 37.4626, "step": 8656 }, { "epoch": 206.11940298507463, "grad_norm": 29.006641387939453, "learning_rate": 9.715419501133788e-06, "loss": 37.3505, "step": 8657 }, { "epoch": 206.14328358208957, "grad_norm": 28.94482421875, "learning_rate": 9.714285714285715e-06, "loss": 38.1489, "step": 8658 }, { "epoch": 206.16716417910447, "grad_norm": 28.851049423217773, "learning_rate": 9.713151927437643e-06, "loss": 36.5958, "step": 8659 }, { "epoch": 206.1910447761194, "grad_norm": 25.789871215820312, "learning_rate": 9.71201814058957e-06, "loss": 37.5911, "step": 8660 }, { "epoch": 206.21492537313432, "grad_norm": 20.312673568725586, "learning_rate": 9.710884353741498e-06, "loss": 37.4759, "step": 8661 }, { "epoch": 206.23880597014926, "grad_norm": 29.052183151245117, "learning_rate": 9.709750566893425e-06, "loss": 36.9356, "step": 8662 }, { "epoch": 206.26268656716417, "grad_norm": NaN, "learning_rate": 9.708616780045353e-06, "loss": 59.7149, "step": 8663 }, { "epoch": 206.2865671641791, "grad_norm": 23.565332412719727, "learning_rate": 9.708616780045353e-06, "loss": 36.9372, "step": 8664 }, { "epoch": 206.31044776119404, "grad_norm": 33.91110610961914, "learning_rate": 9.707482993197278e-06, "loss": 37.12, "step": 8665 }, { "epoch": 206.33432835820895, "grad_norm": 30.193803787231445, "learning_rate": 9.706349206349208e-06, "loss": 38.121, "step": 8666 }, { "epoch": 206.3582089552239, "grad_norm": 25.15426254272461, "learning_rate": 9.705215419501135e-06, "loss": 38.1279, "step": 8667 }, { "epoch": 206.3820895522388, "grad_norm": NaN, "learning_rate": 9.704081632653061e-06, "loss": 46.2935, "step": 8668 }, { "epoch": 206.40597014925373, "grad_norm": 25.120586395263672, "learning_rate": 9.704081632653061e-06, "loss": 37.7689, "step": 8669 }, { "epoch": 206.42985074626867, "grad_norm": 25.196691513061523, "learning_rate": 9.70294784580499e-06, "loss": 38.0344, "step": 8670 }, { "epoch": 206.45373134328358, "grad_norm": 21.404460906982422, "learning_rate": 9.701814058956916e-06, "loss": 36.8867, "step": 8671 }, { "epoch": 206.47761194029852, "grad_norm": 29.804731369018555, "learning_rate": 9.700680272108845e-06, "loss": 36.2151, "step": 8672 }, { "epoch": 206.50149253731342, "grad_norm": 27.454139709472656, "learning_rate": 9.699546485260771e-06, "loss": 36.4495, "step": 8673 }, { "epoch": 206.52537313432836, "grad_norm": 29.241275787353516, "learning_rate": 9.698412698412698e-06, "loss": 36.5479, "step": 8674 }, { "epoch": 206.54925373134327, "grad_norm": 29.399368286132812, "learning_rate": 9.697278911564628e-06, "loss": 37.5718, "step": 8675 }, { "epoch": 206.5731343283582, "grad_norm": 25.647436141967773, "learning_rate": 9.696145124716553e-06, "loss": 37.3242, "step": 8676 }, { "epoch": 206.59701492537314, "grad_norm": 23.48780632019043, "learning_rate": 9.695011337868483e-06, "loss": 37.484, "step": 8677 }, { "epoch": 206.62089552238805, "grad_norm": 30.65043067932129, "learning_rate": 9.693877551020408e-06, "loss": 37.318, "step": 8678 }, { "epoch": 206.644776119403, "grad_norm": 27.024131774902344, "learning_rate": 9.692743764172336e-06, "loss": 37.3023, "step": 8679 }, { "epoch": 206.6686567164179, "grad_norm": 28.0163516998291, "learning_rate": 9.691609977324263e-06, "loss": 37.4882, "step": 8680 }, { "epoch": 206.69253731343284, "grad_norm": 27.679622650146484, "learning_rate": 9.690476190476191e-06, "loss": 37.6525, "step": 8681 }, { "epoch": 206.71641791044777, "grad_norm": 25.003210067749023, "learning_rate": 9.689342403628118e-06, "loss": 36.7752, "step": 8682 }, { "epoch": 206.74029850746268, "grad_norm": 24.170604705810547, "learning_rate": 9.688208616780046e-06, "loss": 38.1328, "step": 8683 }, { "epoch": 206.76417910447762, "grad_norm": 26.91878890991211, "learning_rate": 9.687074829931973e-06, "loss": 37.0418, "step": 8684 }, { "epoch": 206.78805970149253, "grad_norm": 21.186622619628906, "learning_rate": 9.685941043083901e-06, "loss": 37.6866, "step": 8685 }, { "epoch": 206.81194029850747, "grad_norm": 32.290283203125, "learning_rate": 9.684807256235828e-06, "loss": 37.996, "step": 8686 }, { "epoch": 206.83582089552237, "grad_norm": 27.53929328918457, "learning_rate": 9.683673469387756e-06, "loss": 37.8191, "step": 8687 }, { "epoch": 206.8597014925373, "grad_norm": 28.326705932617188, "learning_rate": 9.682539682539683e-06, "loss": 37.1428, "step": 8688 }, { "epoch": 206.88358208955225, "grad_norm": 27.45022201538086, "learning_rate": 9.681405895691611e-06, "loss": 38.0987, "step": 8689 }, { "epoch": 206.90746268656716, "grad_norm": 21.459352493286133, "learning_rate": 9.680272108843538e-06, "loss": 36.5714, "step": 8690 }, { "epoch": 206.9313432835821, "grad_norm": 20.296875, "learning_rate": 9.679138321995466e-06, "loss": 34.7537, "step": 8691 }, { "epoch": 206.955223880597, "grad_norm": 32.78303527832031, "learning_rate": 9.678004535147393e-06, "loss": 37.9229, "step": 8692 }, { "epoch": 206.97910447761194, "grad_norm": 25.335390090942383, "learning_rate": 9.676870748299321e-06, "loss": 37.0462, "step": 8693 }, { "epoch": 207.0, "grad_norm": 27.832971572875977, "learning_rate": 9.675736961451247e-06, "loss": 32.6488, "step": 8694 }, { "epoch": 207.02388059701494, "grad_norm": 28.880048751831055, "learning_rate": 9.674603174603176e-06, "loss": 36.2167, "step": 8695 }, { "epoch": 207.04776119402985, "grad_norm": 23.16868782043457, "learning_rate": 9.673469387755103e-06, "loss": 35.8238, "step": 8696 }, { "epoch": 207.07164179104478, "grad_norm": 23.041492462158203, "learning_rate": 9.672335600907031e-06, "loss": 37.2479, "step": 8697 }, { "epoch": 207.0955223880597, "grad_norm": 27.763071060180664, "learning_rate": 9.671201814058958e-06, "loss": 37.8876, "step": 8698 }, { "epoch": 207.11940298507463, "grad_norm": 23.571678161621094, "learning_rate": 9.670068027210884e-06, "loss": 37.9851, "step": 8699 }, { "epoch": 207.14328358208957, "grad_norm": 28.939640045166016, "learning_rate": 9.668934240362813e-06, "loss": 37.6695, "step": 8700 }, { "epoch": 207.16716417910447, "grad_norm": 25.583032608032227, "learning_rate": 9.66780045351474e-06, "loss": 36.5894, "step": 8701 }, { "epoch": 207.1910447761194, "grad_norm": 26.849519729614258, "learning_rate": 9.666666666666667e-06, "loss": 37.2977, "step": 8702 }, { "epoch": 207.21492537313432, "grad_norm": NaN, "learning_rate": 9.665532879818596e-06, "loss": 51.6228, "step": 8703 }, { "epoch": 207.23880597014926, "grad_norm": 28.73918914794922, "learning_rate": 9.665532879818596e-06, "loss": 38.3042, "step": 8704 }, { "epoch": 207.26268656716417, "grad_norm": 25.652467727661133, "learning_rate": 9.664399092970522e-06, "loss": 37.2118, "step": 8705 }, { "epoch": 207.2865671641791, "grad_norm": 23.142736434936523, "learning_rate": 9.663265306122451e-06, "loss": 36.7965, "step": 8706 }, { "epoch": 207.31044776119404, "grad_norm": 26.23145866394043, "learning_rate": 9.662131519274377e-06, "loss": 36.7877, "step": 8707 }, { "epoch": 207.33432835820895, "grad_norm": 20.018190383911133, "learning_rate": 9.660997732426304e-06, "loss": 36.7477, "step": 8708 }, { "epoch": 207.3582089552239, "grad_norm": 28.383352279663086, "learning_rate": 9.659863945578232e-06, "loss": 35.9798, "step": 8709 }, { "epoch": 207.3820895522388, "grad_norm": 24.80544090270996, "learning_rate": 9.65873015873016e-06, "loss": 37.4661, "step": 8710 }, { "epoch": 207.40597014925373, "grad_norm": 27.32225227355957, "learning_rate": 9.657596371882087e-06, "loss": 38.4724, "step": 8711 }, { "epoch": 207.42985074626867, "grad_norm": 24.118083953857422, "learning_rate": 9.656462585034014e-06, "loss": 37.5195, "step": 8712 }, { "epoch": 207.45373134328358, "grad_norm": 31.7176513671875, "learning_rate": 9.655328798185942e-06, "loss": 37.9662, "step": 8713 }, { "epoch": 207.47761194029852, "grad_norm": 28.144126892089844, "learning_rate": 9.65419501133787e-06, "loss": 37.9732, "step": 8714 }, { "epoch": 207.50149253731342, "grad_norm": 23.09284782409668, "learning_rate": 9.653061224489797e-06, "loss": 37.4785, "step": 8715 }, { "epoch": 207.52537313432836, "grad_norm": 22.441999435424805, "learning_rate": 9.651927437641724e-06, "loss": 36.9549, "step": 8716 }, { "epoch": 207.54925373134327, "grad_norm": 26.634323120117188, "learning_rate": 9.650793650793652e-06, "loss": 37.5379, "step": 8717 }, { "epoch": 207.5731343283582, "grad_norm": 19.879837036132812, "learning_rate": 9.64965986394558e-06, "loss": 38.1537, "step": 8718 }, { "epoch": 207.59701492537314, "grad_norm": 30.73711395263672, "learning_rate": 9.648526077097507e-06, "loss": 37.6, "step": 8719 }, { "epoch": 207.62089552238805, "grad_norm": 23.9213924407959, "learning_rate": 9.647392290249434e-06, "loss": 36.2659, "step": 8720 }, { "epoch": 207.644776119403, "grad_norm": 26.76925277709961, "learning_rate": 9.646258503401362e-06, "loss": 37.7797, "step": 8721 }, { "epoch": 207.6686567164179, "grad_norm": 27.074373245239258, "learning_rate": 9.64512471655329e-06, "loss": 37.0032, "step": 8722 }, { "epoch": 207.69253731343284, "grad_norm": 20.3933162689209, "learning_rate": 9.643990929705217e-06, "loss": 36.884, "step": 8723 }, { "epoch": 207.71641791044777, "grad_norm": 22.100154876708984, "learning_rate": 9.642857142857144e-06, "loss": 36.168, "step": 8724 }, { "epoch": 207.74029850746268, "grad_norm": 26.020097732543945, "learning_rate": 9.64172335600907e-06, "loss": 36.8531, "step": 8725 }, { "epoch": 207.76417910447762, "grad_norm": 20.40485191345215, "learning_rate": 9.640589569161e-06, "loss": 37.6331, "step": 8726 }, { "epoch": 207.78805970149253, "grad_norm": 26.535852432250977, "learning_rate": 9.639455782312927e-06, "loss": 36.59, "step": 8727 }, { "epoch": 207.81194029850747, "grad_norm": 24.5301456451416, "learning_rate": 9.638321995464852e-06, "loss": 36.8655, "step": 8728 }, { "epoch": 207.83582089552237, "grad_norm": 24.19950294494629, "learning_rate": 9.637188208616782e-06, "loss": 37.2821, "step": 8729 }, { "epoch": 207.8597014925373, "grad_norm": 22.14886474609375, "learning_rate": 9.636054421768707e-06, "loss": 37.5526, "step": 8730 }, { "epoch": 207.88358208955225, "grad_norm": 27.889387130737305, "learning_rate": 9.634920634920637e-06, "loss": 37.4035, "step": 8731 }, { "epoch": 207.90746268656716, "grad_norm": 23.2374267578125, "learning_rate": 9.633786848072562e-06, "loss": 37.0872, "step": 8732 }, { "epoch": 207.9313432835821, "grad_norm": 28.7000675201416, "learning_rate": 9.63265306122449e-06, "loss": 37.2093, "step": 8733 }, { "epoch": 207.955223880597, "grad_norm": 25.44818878173828, "learning_rate": 9.63151927437642e-06, "loss": 36.7601, "step": 8734 }, { "epoch": 207.97910447761194, "grad_norm": 25.34256362915039, "learning_rate": 9.630385487528345e-06, "loss": 38.1114, "step": 8735 }, { "epoch": 208.0, "grad_norm": 21.16939926147461, "learning_rate": 9.629251700680272e-06, "loss": 33.3985, "step": 8736 }, { "epoch": 208.02388059701494, "grad_norm": 26.90691375732422, "learning_rate": 9.6281179138322e-06, "loss": 37.5929, "step": 8737 }, { "epoch": 208.04776119402985, "grad_norm": 21.72243881225586, "learning_rate": 9.626984126984127e-06, "loss": 37.9544, "step": 8738 }, { "epoch": 208.07164179104478, "grad_norm": 25.133729934692383, "learning_rate": 9.625850340136055e-06, "loss": 36.4881, "step": 8739 }, { "epoch": 208.0955223880597, "grad_norm": 21.65189552307129, "learning_rate": 9.624716553287982e-06, "loss": 35.4018, "step": 8740 }, { "epoch": 208.11940298507463, "grad_norm": 20.662872314453125, "learning_rate": 9.62358276643991e-06, "loss": 38.068, "step": 8741 }, { "epoch": 208.14328358208957, "grad_norm": 18.179677963256836, "learning_rate": 9.622448979591837e-06, "loss": 36.9269, "step": 8742 }, { "epoch": 208.16716417910447, "grad_norm": 20.394655227661133, "learning_rate": 9.621315192743765e-06, "loss": 37.5103, "step": 8743 }, { "epoch": 208.1910447761194, "grad_norm": 14.633431434631348, "learning_rate": 9.620181405895692e-06, "loss": 37.4208, "step": 8744 }, { "epoch": 208.21492537313432, "grad_norm": 26.77837371826172, "learning_rate": 9.61904761904762e-06, "loss": 37.438, "step": 8745 }, { "epoch": 208.23880597014926, "grad_norm": 20.291751861572266, "learning_rate": 9.617913832199547e-06, "loss": 37.2541, "step": 8746 }, { "epoch": 208.26268656716417, "grad_norm": 29.006790161132812, "learning_rate": 9.616780045351475e-06, "loss": 37.77, "step": 8747 }, { "epoch": 208.2865671641791, "grad_norm": 23.452924728393555, "learning_rate": 9.6156462585034e-06, "loss": 36.8207, "step": 8748 }, { "epoch": 208.31044776119404, "grad_norm": 25.323101043701172, "learning_rate": 9.61451247165533e-06, "loss": 38.0678, "step": 8749 }, { "epoch": 208.33432835820895, "grad_norm": 24.719270706176758, "learning_rate": 9.613378684807257e-06, "loss": 38.1048, "step": 8750 }, { "epoch": 208.3582089552239, "grad_norm": 23.332378387451172, "learning_rate": 9.612244897959185e-06, "loss": 36.2702, "step": 8751 }, { "epoch": 208.3820895522388, "grad_norm": 18.7159366607666, "learning_rate": 9.611111111111112e-06, "loss": 35.932, "step": 8752 }, { "epoch": 208.40597014925373, "grad_norm": 22.514942169189453, "learning_rate": 9.609977324263038e-06, "loss": 37.6841, "step": 8753 }, { "epoch": 208.42985074626867, "grad_norm": 18.898149490356445, "learning_rate": 9.608843537414967e-06, "loss": 37.062, "step": 8754 }, { "epoch": 208.45373134328358, "grad_norm": 23.485321044921875, "learning_rate": 9.607709750566893e-06, "loss": 37.5833, "step": 8755 }, { "epoch": 208.47761194029852, "grad_norm": 18.638851165771484, "learning_rate": 9.606575963718822e-06, "loss": 37.5329, "step": 8756 }, { "epoch": 208.50149253731342, "grad_norm": 25.822765350341797, "learning_rate": 9.60544217687075e-06, "loss": 38.0558, "step": 8757 }, { "epoch": 208.52537313432836, "grad_norm": 23.212072372436523, "learning_rate": 9.604308390022676e-06, "loss": 37.2138, "step": 8758 }, { "epoch": 208.54925373134327, "grad_norm": 20.474445343017578, "learning_rate": 9.603174603174605e-06, "loss": 35.3232, "step": 8759 }, { "epoch": 208.5731343283582, "grad_norm": 19.00440788269043, "learning_rate": 9.60204081632653e-06, "loss": 37.4117, "step": 8760 }, { "epoch": 208.59701492537314, "grad_norm": 22.211830139160156, "learning_rate": 9.600907029478458e-06, "loss": 37.0947, "step": 8761 }, { "epoch": 208.62089552238805, "grad_norm": 16.204587936401367, "learning_rate": 9.599773242630386e-06, "loss": 36.1761, "step": 8762 }, { "epoch": 208.644776119403, "grad_norm": 26.495813369750977, "learning_rate": 9.598639455782313e-06, "loss": 36.6699, "step": 8763 }, { "epoch": 208.6686567164179, "grad_norm": 22.76972007751465, "learning_rate": 9.597505668934242e-06, "loss": 36.6711, "step": 8764 }, { "epoch": 208.69253731343284, "grad_norm": 23.398727416992188, "learning_rate": 9.596371882086168e-06, "loss": 36.0655, "step": 8765 }, { "epoch": 208.71641791044777, "grad_norm": 21.727886199951172, "learning_rate": 9.595238095238096e-06, "loss": 38.3942, "step": 8766 }, { "epoch": 208.74029850746268, "grad_norm": 25.35695457458496, "learning_rate": 9.594104308390023e-06, "loss": 36.5805, "step": 8767 }, { "epoch": 208.76417910447762, "grad_norm": 20.90379524230957, "learning_rate": 9.59297052154195e-06, "loss": 37.3334, "step": 8768 }, { "epoch": 208.78805970149253, "grad_norm": 23.767805099487305, "learning_rate": 9.591836734693878e-06, "loss": 37.0844, "step": 8769 }, { "epoch": 208.81194029850747, "grad_norm": 22.0218563079834, "learning_rate": 9.590702947845806e-06, "loss": 37.8925, "step": 8770 }, { "epoch": 208.83582089552237, "grad_norm": 28.21807861328125, "learning_rate": 9.589569160997733e-06, "loss": 37.1123, "step": 8771 }, { "epoch": 208.8597014925373, "grad_norm": 21.722558975219727, "learning_rate": 9.58843537414966e-06, "loss": 37.4826, "step": 8772 }, { "epoch": 208.88358208955225, "grad_norm": NaN, "learning_rate": 9.587301587301588e-06, "loss": 66.8301, "step": 8773 }, { "epoch": 208.90746268656716, "grad_norm": 19.700010299682617, "learning_rate": 9.587301587301588e-06, "loss": 37.7419, "step": 8774 }, { "epoch": 208.9313432835821, "grad_norm": 21.414432525634766, "learning_rate": 9.586167800453516e-06, "loss": 37.6931, "step": 8775 }, { "epoch": 208.955223880597, "grad_norm": 16.847640991210938, "learning_rate": 9.585034013605443e-06, "loss": 37.3081, "step": 8776 }, { "epoch": 208.97910447761194, "grad_norm": 18.310691833496094, "learning_rate": 9.58390022675737e-06, "loss": 37.2922, "step": 8777 }, { "epoch": 209.0, "grad_norm": 14.464380264282227, "learning_rate": 9.582766439909298e-06, "loss": 33.5285, "step": 8778 }, { "epoch": 209.02388059701494, "grad_norm": 18.036569595336914, "learning_rate": 9.581632653061226e-06, "loss": 35.9583, "step": 8779 }, { "epoch": 209.04776119402985, "grad_norm": 14.068912506103516, "learning_rate": 9.580498866213153e-06, "loss": 36.8707, "step": 8780 }, { "epoch": 209.07164179104478, "grad_norm": 21.767086029052734, "learning_rate": 9.57936507936508e-06, "loss": 37.8075, "step": 8781 }, { "epoch": 209.0955223880597, "grad_norm": 19.587646484375, "learning_rate": 9.578231292517007e-06, "loss": 37.7072, "step": 8782 }, { "epoch": 209.11940298507463, "grad_norm": 19.14802360534668, "learning_rate": 9.577097505668936e-06, "loss": 36.6308, "step": 8783 }, { "epoch": 209.14328358208957, "grad_norm": 17.630035400390625, "learning_rate": 9.575963718820862e-06, "loss": 36.497, "step": 8784 }, { "epoch": 209.16716417910447, "grad_norm": 19.561717987060547, "learning_rate": 9.57482993197279e-06, "loss": 36.6801, "step": 8785 }, { "epoch": 209.1910447761194, "grad_norm": 15.293615341186523, "learning_rate": 9.573696145124717e-06, "loss": 35.5048, "step": 8786 }, { "epoch": 209.21492537313432, "grad_norm": 18.61341094970703, "learning_rate": 9.572562358276644e-06, "loss": 37.7874, "step": 8787 }, { "epoch": 209.23880597014926, "grad_norm": 19.37540054321289, "learning_rate": 9.571428571428573e-06, "loss": 37.4133, "step": 8788 }, { "epoch": 209.26268656716417, "grad_norm": 13.318099975585938, "learning_rate": 9.570294784580499e-06, "loss": 37.4057, "step": 8789 }, { "epoch": 209.2865671641791, "grad_norm": 15.878247261047363, "learning_rate": 9.569160997732427e-06, "loss": 36.2705, "step": 8790 }, { "epoch": 209.31044776119404, "grad_norm": 16.74108123779297, "learning_rate": 9.568027210884354e-06, "loss": 35.7654, "step": 8791 }, { "epoch": 209.33432835820895, "grad_norm": 17.369836807250977, "learning_rate": 9.566893424036282e-06, "loss": 36.6832, "step": 8792 }, { "epoch": 209.3582089552239, "grad_norm": 17.037837982177734, "learning_rate": 9.565759637188209e-06, "loss": 37.4281, "step": 8793 }, { "epoch": 209.3820895522388, "grad_norm": 14.933297157287598, "learning_rate": 9.564625850340137e-06, "loss": 36.907, "step": 8794 }, { "epoch": 209.40597014925373, "grad_norm": 17.732763290405273, "learning_rate": 9.563492063492064e-06, "loss": 37.0403, "step": 8795 }, { "epoch": 209.42985074626867, "grad_norm": 16.423961639404297, "learning_rate": 9.562358276643991e-06, "loss": 37.6568, "step": 8796 }, { "epoch": 209.45373134328358, "grad_norm": 13.933506965637207, "learning_rate": 9.561224489795919e-06, "loss": 37.5156, "step": 8797 }, { "epoch": 209.47761194029852, "grad_norm": 17.839454650878906, "learning_rate": 9.560090702947846e-06, "loss": 37.8671, "step": 8798 }, { "epoch": 209.50149253731342, "grad_norm": 18.602453231811523, "learning_rate": 9.558956916099774e-06, "loss": 37.6858, "step": 8799 }, { "epoch": 209.52537313432836, "grad_norm": NaN, "learning_rate": 9.557823129251701e-06, "loss": 36.5838, "step": 8800 }, { "epoch": 209.54925373134327, "grad_norm": 15.753517150878906, "learning_rate": 9.557823129251701e-06, "loss": 37.803, "step": 8801 }, { "epoch": 209.5731343283582, "grad_norm": 15.225348472595215, "learning_rate": 9.556689342403629e-06, "loss": 37.2629, "step": 8802 }, { "epoch": 209.59701492537314, "grad_norm": 14.971363067626953, "learning_rate": 9.555555555555556e-06, "loss": 35.9027, "step": 8803 }, { "epoch": 209.62089552238805, "grad_norm": 14.179505348205566, "learning_rate": 9.554421768707484e-06, "loss": 36.3974, "step": 8804 }, { "epoch": 209.644776119403, "grad_norm": 18.063364028930664, "learning_rate": 9.553287981859411e-06, "loss": 36.8283, "step": 8805 }, { "epoch": 209.6686567164179, "grad_norm": 13.856316566467285, "learning_rate": 9.552154195011339e-06, "loss": 37.0903, "step": 8806 }, { "epoch": 209.69253731343284, "grad_norm": 21.6617488861084, "learning_rate": 9.551020408163266e-06, "loss": 38.114, "step": 8807 }, { "epoch": 209.71641791044777, "grad_norm": 18.399335861206055, "learning_rate": 9.549886621315192e-06, "loss": 37.6584, "step": 8808 }, { "epoch": 209.74029850746268, "grad_norm": 14.723889350891113, "learning_rate": 9.548752834467121e-06, "loss": 37.6456, "step": 8809 }, { "epoch": 209.76417910447762, "grad_norm": 18.87386703491211, "learning_rate": 9.547619047619049e-06, "loss": 37.9242, "step": 8810 }, { "epoch": 209.78805970149253, "grad_norm": 17.717641830444336, "learning_rate": 9.546485260770976e-06, "loss": 36.445, "step": 8811 }, { "epoch": 209.81194029850747, "grad_norm": 15.05385971069336, "learning_rate": 9.545351473922904e-06, "loss": 38.2042, "step": 8812 }, { "epoch": 209.83582089552237, "grad_norm": 22.728147506713867, "learning_rate": 9.54421768707483e-06, "loss": 37.3571, "step": 8813 }, { "epoch": 209.8597014925373, "grad_norm": 18.1093807220459, "learning_rate": 9.543083900226759e-06, "loss": 37.8085, "step": 8814 }, { "epoch": 209.88358208955225, "grad_norm": 15.268453598022461, "learning_rate": 9.541950113378685e-06, "loss": 37.8936, "step": 8815 }, { "epoch": 209.90746268656716, "grad_norm": 22.95656967163086, "learning_rate": 9.540816326530612e-06, "loss": 37.1294, "step": 8816 }, { "epoch": 209.9313432835821, "grad_norm": 15.439409255981445, "learning_rate": 9.539682539682541e-06, "loss": 37.3681, "step": 8817 }, { "epoch": 209.955223880597, "grad_norm": 17.66872215270996, "learning_rate": 9.538548752834467e-06, "loss": 38.0217, "step": 8818 }, { "epoch": 209.97910447761194, "grad_norm": 20.06760597229004, "learning_rate": 9.537414965986396e-06, "loss": 37.7129, "step": 8819 }, { "epoch": 210.0, "grad_norm": 19.19893455505371, "learning_rate": 9.536281179138322e-06, "loss": 33.1443, "step": 8820 }, { "epoch": 210.0, "step": 8820, "total_flos": 4.3361080674915085e+17, "train_loss": 1.7935449273678181, "train_runtime": 12838.2452, "train_samples_per_second": 87.545, "train_steps_per_second": 0.687 }, { "epoch": 210.02388059701494, "grad_norm": 15.075254440307617, "learning_rate": 1e-05, "loss": 37.2785, "step": 8821 }, { "epoch": 210.04776119402985, "grad_norm": 142.95925903320312, "learning_rate": 9.99896480331263e-06, "loss": 40.939, "step": 8822 }, { "epoch": 210.07164179104478, "grad_norm": 73.43154907226562, "learning_rate": 9.99792960662526e-06, "loss": 39.356, "step": 8823 }, { "epoch": 210.0955223880597, "grad_norm": 42.96079635620117, "learning_rate": 9.99689440993789e-06, "loss": 38.2294, "step": 8824 }, { "epoch": 210.11940298507463, "grad_norm": 49.12348937988281, "learning_rate": 9.995859213250519e-06, "loss": 38.5214, "step": 8825 }, { "epoch": 210.14328358208957, "grad_norm": 62.188777923583984, "learning_rate": 9.994824016563148e-06, "loss": 36.7933, "step": 8826 }, { "epoch": 210.16716417910447, "grad_norm": 32.76605224609375, "learning_rate": 9.993788819875776e-06, "loss": 38.1907, "step": 8827 }, { "epoch": 210.1910447761194, "grad_norm": 52.76780700683594, "learning_rate": 9.992753623188408e-06, "loss": 37.3923, "step": 8828 }, { "epoch": 210.21492537313432, "grad_norm": 35.62137985229492, "learning_rate": 9.991718426501035e-06, "loss": 37.1639, "step": 8829 }, { "epoch": 210.23880597014926, "grad_norm": 38.32008361816406, "learning_rate": 9.990683229813667e-06, "loss": 36.3991, "step": 8830 }, { "epoch": 210.26268656716417, "grad_norm": 30.2634334564209, "learning_rate": 9.989648033126294e-06, "loss": 36.7869, "step": 8831 }, { "epoch": 210.2865671641791, "grad_norm": 22.547588348388672, "learning_rate": 9.988612836438924e-06, "loss": 37.0281, "step": 8832 }, { "epoch": 210.31044776119404, "grad_norm": 32.26959991455078, "learning_rate": 9.987577639751553e-06, "loss": 37.768, "step": 8833 }, { "epoch": 210.33432835820895, "grad_norm": 24.533544540405273, "learning_rate": 9.986542443064183e-06, "loss": 36.9135, "step": 8834 }, { "epoch": 210.3582089552239, "grad_norm": 17.129941940307617, "learning_rate": 9.985507246376813e-06, "loss": 38.2224, "step": 8835 }, { "epoch": 210.3820895522388, "grad_norm": 19.48621368408203, "learning_rate": 9.984472049689442e-06, "loss": 37.1503, "step": 8836 }, { "epoch": 210.40597014925373, "grad_norm": 19.355815887451172, "learning_rate": 9.983436853002072e-06, "loss": 37.1349, "step": 8837 }, { "epoch": 210.42985074626867, "grad_norm": 20.324405670166016, "learning_rate": 9.982401656314701e-06, "loss": 35.8842, "step": 8838 }, { "epoch": 210.45373134328358, "grad_norm": 15.293591499328613, "learning_rate": 9.981366459627329e-06, "loss": 36.4122, "step": 8839 }, { "epoch": 210.47761194029852, "grad_norm": 20.000492095947266, "learning_rate": 9.98033126293996e-06, "loss": 38.4082, "step": 8840 }, { "epoch": 210.50149253731342, "grad_norm": 17.6369686126709, "learning_rate": 9.979296066252588e-06, "loss": 38.5251, "step": 8841 }, { "epoch": 210.52537313432836, "grad_norm": 20.191181182861328, "learning_rate": 9.978260869565218e-06, "loss": 36.9463, "step": 8842 }, { "epoch": 210.54925373134327, "grad_norm": 16.443561553955078, "learning_rate": 9.977225672877847e-06, "loss": 37.2469, "step": 8843 }, { "epoch": 210.5731343283582, "grad_norm": 15.099014282226562, "learning_rate": 9.976190476190477e-06, "loss": 38.4285, "step": 8844 }, { "epoch": 210.59701492537314, "grad_norm": 15.890122413635254, "learning_rate": 9.975155279503106e-06, "loss": 37.8013, "step": 8845 }, { "epoch": 210.62089552238805, "grad_norm": 18.88666343688965, "learning_rate": 9.974120082815736e-06, "loss": 37.1099, "step": 8846 }, { "epoch": 210.644776119403, "grad_norm": 15.706725120544434, "learning_rate": 9.973084886128365e-06, "loss": 37.0593, "step": 8847 }, { "epoch": 210.6686567164179, "grad_norm": 24.356304168701172, "learning_rate": 9.972049689440995e-06, "loss": 37.0481, "step": 8848 }, { "epoch": 210.69253731343284, "grad_norm": 18.533910751342773, "learning_rate": 9.971014492753624e-06, "loss": 36.6005, "step": 8849 }, { "epoch": 210.71641791044777, "grad_norm": 16.038110733032227, "learning_rate": 9.969979296066254e-06, "loss": 37.6422, "step": 8850 }, { "epoch": 210.74029850746268, "grad_norm": 15.192877769470215, "learning_rate": 9.968944099378883e-06, "loss": 37.0933, "step": 8851 }, { "epoch": 210.76417910447762, "grad_norm": 16.551071166992188, "learning_rate": 9.967908902691513e-06, "loss": 37.2211, "step": 8852 }, { "epoch": 210.78805970149253, "grad_norm": 14.400941848754883, "learning_rate": 9.966873706004142e-06, "loss": 36.0341, "step": 8853 }, { "epoch": 210.81194029850747, "grad_norm": 15.2099027633667, "learning_rate": 9.96583850931677e-06, "loss": 36.9797, "step": 8854 }, { "epoch": 210.83582089552237, "grad_norm": 15.81094741821289, "learning_rate": 9.964803312629401e-06, "loss": 37.4321, "step": 8855 }, { "epoch": 210.8597014925373, "grad_norm": 17.03694725036621, "learning_rate": 9.96376811594203e-06, "loss": 38.3356, "step": 8856 }, { "epoch": 210.88358208955225, "grad_norm": 15.178110122680664, "learning_rate": 9.962732919254659e-06, "loss": 36.5125, "step": 8857 }, { "epoch": 210.90746268656716, "grad_norm": 16.529111862182617, "learning_rate": 9.961697722567288e-06, "loss": 36.6884, "step": 8858 }, { "epoch": 210.9313432835821, "grad_norm": 17.919584274291992, "learning_rate": 9.960662525879918e-06, "loss": 38.351, "step": 8859 }, { "epoch": 210.955223880597, "grad_norm": 15.563426971435547, "learning_rate": 9.959627329192547e-06, "loss": 38.2391, "step": 8860 }, { "epoch": 210.97910447761194, "grad_norm": 17.85155487060547, "learning_rate": 9.958592132505177e-06, "loss": 37.3985, "step": 8861 }, { "epoch": 211.0, "grad_norm": 16.511281967163086, "learning_rate": 9.957556935817806e-06, "loss": 32.5785, "step": 8862 }, { "epoch": 211.02388059701494, "grad_norm": 14.808560371398926, "learning_rate": 9.956521739130436e-06, "loss": 37.2203, "step": 8863 }, { "epoch": 211.04776119402985, "grad_norm": 15.766176223754883, "learning_rate": 9.955486542443065e-06, "loss": 37.1016, "step": 8864 }, { "epoch": 211.07164179104478, "grad_norm": 13.97423267364502, "learning_rate": 9.954451345755695e-06, "loss": 38.1213, "step": 8865 }, { "epoch": 211.0955223880597, "grad_norm": 17.683542251586914, "learning_rate": 9.953416149068323e-06, "loss": 37.1273, "step": 8866 }, { "epoch": 211.11940298507463, "grad_norm": 18.57022476196289, "learning_rate": 9.952380952380954e-06, "loss": 38.2498, "step": 8867 }, { "epoch": 211.14328358208957, "grad_norm": 18.91432762145996, "learning_rate": 9.951345755693582e-06, "loss": 36.2946, "step": 8868 }, { "epoch": 211.16716417910447, "grad_norm": NaN, "learning_rate": 9.950310559006211e-06, "loss": 35.6508, "step": 8869 }, { "epoch": 211.1910447761194, "grad_norm": 18.006126403808594, "learning_rate": 9.950310559006211e-06, "loss": 37.4559, "step": 8870 }, { "epoch": 211.21492537313432, "grad_norm": 14.555634498596191, "learning_rate": 9.949275362318841e-06, "loss": 37.3692, "step": 8871 }, { "epoch": 211.23880597014926, "grad_norm": 16.3458251953125, "learning_rate": 9.94824016563147e-06, "loss": 38.2418, "step": 8872 }, { "epoch": 211.26268656716417, "grad_norm": 13.63530445098877, "learning_rate": 9.9472049689441e-06, "loss": 35.4199, "step": 8873 }, { "epoch": 211.2865671641791, "grad_norm": 13.486242294311523, "learning_rate": 9.94616977225673e-06, "loss": 37.8011, "step": 8874 }, { "epoch": 211.31044776119404, "grad_norm": 14.73640251159668, "learning_rate": 9.945134575569359e-06, "loss": 36.5183, "step": 8875 }, { "epoch": 211.33432835820895, "grad_norm": 16.692045211791992, "learning_rate": 9.944099378881989e-06, "loss": 37.1129, "step": 8876 }, { "epoch": 211.3582089552239, "grad_norm": NaN, "learning_rate": 9.943064182194618e-06, "loss": 63.5603, "step": 8877 }, { "epoch": 211.3820895522388, "grad_norm": NaN, "learning_rate": 9.943064182194618e-06, "loss": 31.2282, "step": 8878 }, { "epoch": 211.40597014925373, "grad_norm": 17.002836227416992, "learning_rate": 9.943064182194618e-06, "loss": 37.1965, "step": 8879 }, { "epoch": 211.42985074626867, "grad_norm": 18.055377960205078, "learning_rate": 9.942028985507248e-06, "loss": 36.6305, "step": 8880 }, { "epoch": 211.45373134328358, "grad_norm": 15.369948387145996, "learning_rate": 9.940993788819877e-06, "loss": 36.7211, "step": 8881 }, { "epoch": 211.47761194029852, "grad_norm": 14.401118278503418, "learning_rate": 9.939958592132507e-06, "loss": 36.1752, "step": 8882 }, { "epoch": 211.50149253731342, "grad_norm": 17.921104431152344, "learning_rate": 9.938923395445136e-06, "loss": 38.1467, "step": 8883 }, { "epoch": 211.52537313432836, "grad_norm": 11.802447319030762, "learning_rate": 9.937888198757764e-06, "loss": 37.3524, "step": 8884 }, { "epoch": 211.54925373134327, "grad_norm": 19.655445098876953, "learning_rate": 9.936853002070395e-06, "loss": 35.8514, "step": 8885 }, { "epoch": 211.5731343283582, "grad_norm": 15.055081367492676, "learning_rate": 9.935817805383023e-06, "loss": 37.7844, "step": 8886 }, { "epoch": 211.59701492537314, "grad_norm": 19.81008529663086, "learning_rate": 9.934782608695653e-06, "loss": 37.4641, "step": 8887 }, { "epoch": 211.62089552238805, "grad_norm": 15.446284294128418, "learning_rate": 9.933747412008282e-06, "loss": 36.2631, "step": 8888 }, { "epoch": 211.644776119403, "grad_norm": 19.153871536254883, "learning_rate": 9.932712215320912e-06, "loss": 38.593, "step": 8889 }, { "epoch": 211.6686567164179, "grad_norm": 17.141765594482422, "learning_rate": 9.931677018633541e-06, "loss": 37.508, "step": 8890 }, { "epoch": 211.69253731343284, "grad_norm": 20.760438919067383, "learning_rate": 9.93064182194617e-06, "loss": 38.5176, "step": 8891 }, { "epoch": 211.71641791044777, "grad_norm": 16.805908203125, "learning_rate": 9.9296066252588e-06, "loss": 37.825, "step": 8892 }, { "epoch": 211.74029850746268, "grad_norm": 20.541210174560547, "learning_rate": 9.92857142857143e-06, "loss": 37.9388, "step": 8893 }, { "epoch": 211.76417910447762, "grad_norm": 18.6077880859375, "learning_rate": 9.927536231884058e-06, "loss": 37.2533, "step": 8894 }, { "epoch": 211.78805970149253, "grad_norm": NaN, "learning_rate": 9.926501035196689e-06, "loss": 56.6819, "step": 8895 }, { "epoch": 211.81194029850747, "grad_norm": 17.51033592224121, "learning_rate": 9.926501035196689e-06, "loss": 36.7446, "step": 8896 }, { "epoch": 211.83582089552237, "grad_norm": 19.205183029174805, "learning_rate": 9.925465838509317e-06, "loss": 36.8939, "step": 8897 }, { "epoch": 211.8597014925373, "grad_norm": 15.204777717590332, "learning_rate": 9.924430641821948e-06, "loss": 37.2798, "step": 8898 }, { "epoch": 211.88358208955225, "grad_norm": 15.828685760498047, "learning_rate": 9.923395445134576e-06, "loss": 36.6658, "step": 8899 }, { "epoch": 211.90746268656716, "grad_norm": 15.359902381896973, "learning_rate": 9.922360248447205e-06, "loss": 36.0643, "step": 8900 }, { "epoch": 211.9313432835821, "grad_norm": 23.389053344726562, "learning_rate": 9.921325051759835e-06, "loss": 37.1585, "step": 8901 }, { "epoch": 211.955223880597, "grad_norm": 16.01727867126465, "learning_rate": 9.920289855072464e-06, "loss": 36.6398, "step": 8902 }, { "epoch": 211.97910447761194, "grad_norm": 18.08060073852539, "learning_rate": 9.919254658385094e-06, "loss": 37.8393, "step": 8903 }, { "epoch": 212.0, "grad_norm": 14.446660995483398, "learning_rate": 9.918219461697723e-06, "loss": 31.096, "step": 8904 }, { "epoch": 212.02388059701494, "grad_norm": 17.174697875976562, "learning_rate": 9.917184265010353e-06, "loss": 36.3044, "step": 8905 }, { "epoch": 212.04776119402985, "grad_norm": 14.765143394470215, "learning_rate": 9.916149068322982e-06, "loss": 36.1536, "step": 8906 }, { "epoch": 212.07164179104478, "grad_norm": 17.595178604125977, "learning_rate": 9.915113871635612e-06, "loss": 36.9917, "step": 8907 }, { "epoch": 212.0955223880597, "grad_norm": 15.640548706054688, "learning_rate": 9.914078674948242e-06, "loss": 35.9084, "step": 8908 }, { "epoch": 212.11940298507463, "grad_norm": 19.422521591186523, "learning_rate": 9.913043478260871e-06, "loss": 36.4987, "step": 8909 }, { "epoch": 212.14328358208957, "grad_norm": 17.555795669555664, "learning_rate": 9.912008281573499e-06, "loss": 37.8731, "step": 8910 }, { "epoch": 212.16716417910447, "grad_norm": 22.860374450683594, "learning_rate": 9.91097308488613e-06, "loss": 36.6072, "step": 8911 }, { "epoch": 212.1910447761194, "grad_norm": 21.4945125579834, "learning_rate": 9.909937888198758e-06, "loss": 35.3901, "step": 8912 }, { "epoch": 212.21492537313432, "grad_norm": 17.8464298248291, "learning_rate": 9.90890269151139e-06, "loss": 36.3299, "step": 8913 }, { "epoch": 212.23880597014926, "grad_norm": 22.104679107666016, "learning_rate": 9.907867494824017e-06, "loss": 37.7505, "step": 8914 }, { "epoch": 212.26268656716417, "grad_norm": 17.718652725219727, "learning_rate": 9.906832298136647e-06, "loss": 36.346, "step": 8915 }, { "epoch": 212.2865671641791, "grad_norm": 16.828168869018555, "learning_rate": 9.905797101449276e-06, "loss": 36.2079, "step": 8916 }, { "epoch": 212.31044776119404, "grad_norm": 17.614103317260742, "learning_rate": 9.904761904761906e-06, "loss": 37.1404, "step": 8917 }, { "epoch": 212.33432835820895, "grad_norm": 25.996273040771484, "learning_rate": 9.903726708074535e-06, "loss": 37.5513, "step": 8918 }, { "epoch": 212.3582089552239, "grad_norm": 16.21636199951172, "learning_rate": 9.902691511387165e-06, "loss": 37.9346, "step": 8919 }, { "epoch": 212.3820895522388, "grad_norm": 20.41476821899414, "learning_rate": 9.901656314699794e-06, "loss": 38.6836, "step": 8920 }, { "epoch": 212.40597014925373, "grad_norm": 25.203231811523438, "learning_rate": 9.900621118012424e-06, "loss": 36.0799, "step": 8921 }, { "epoch": 212.42985074626867, "grad_norm": 18.574546813964844, "learning_rate": 9.899585921325052e-06, "loss": 36.3672, "step": 8922 }, { "epoch": 212.45373134328358, "grad_norm": 25.822708129882812, "learning_rate": 9.898550724637683e-06, "loss": 37.1143, "step": 8923 }, { "epoch": 212.47761194029852, "grad_norm": 18.57192611694336, "learning_rate": 9.89751552795031e-06, "loss": 37.4702, "step": 8924 }, { "epoch": 212.50149253731342, "grad_norm": NaN, "learning_rate": 9.89648033126294e-06, "loss": 55.2507, "step": 8925 }, { "epoch": 212.52537313432836, "grad_norm": 16.169931411743164, "learning_rate": 9.89648033126294e-06, "loss": 37.0546, "step": 8926 }, { "epoch": 212.54925373134327, "grad_norm": 37.11153030395508, "learning_rate": 9.89544513457557e-06, "loss": 37.5354, "step": 8927 }, { "epoch": 212.5731343283582, "grad_norm": 23.903852462768555, "learning_rate": 9.8944099378882e-06, "loss": 37.2159, "step": 8928 }, { "epoch": 212.59701492537314, "grad_norm": 40.34111785888672, "learning_rate": 9.893374741200829e-06, "loss": 36.4423, "step": 8929 }, { "epoch": 212.62089552238805, "grad_norm": 37.706871032714844, "learning_rate": 9.892339544513458e-06, "loss": 36.2335, "step": 8930 }, { "epoch": 212.644776119403, "grad_norm": 30.773954391479492, "learning_rate": 9.891304347826088e-06, "loss": 38.6459, "step": 8931 }, { "epoch": 212.6686567164179, "grad_norm": 32.333580017089844, "learning_rate": 9.890269151138717e-06, "loss": 37.8582, "step": 8932 }, { "epoch": 212.69253731343284, "grad_norm": 27.975984573364258, "learning_rate": 9.889233954451347e-06, "loss": 37.2514, "step": 8933 }, { "epoch": 212.71641791044777, "grad_norm": 26.902097702026367, "learning_rate": 9.888198757763976e-06, "loss": 37.0266, "step": 8934 }, { "epoch": 212.74029850746268, "grad_norm": 34.41338348388672, "learning_rate": 9.887163561076606e-06, "loss": 38.0053, "step": 8935 }, { "epoch": 212.76417910447762, "grad_norm": 29.491994857788086, "learning_rate": 9.886128364389235e-06, "loss": 37.6406, "step": 8936 }, { "epoch": 212.78805970149253, "grad_norm": 33.101356506347656, "learning_rate": 9.885093167701865e-06, "loss": 36.0706, "step": 8937 }, { "epoch": 212.81194029850747, "grad_norm": 25.407678604125977, "learning_rate": 9.884057971014493e-06, "loss": 37.6107, "step": 8938 }, { "epoch": 212.83582089552237, "grad_norm": 31.88374137878418, "learning_rate": 9.883022774327124e-06, "loss": 37.2537, "step": 8939 }, { "epoch": 212.8597014925373, "grad_norm": 27.657949447631836, "learning_rate": 9.881987577639752e-06, "loss": 37.4872, "step": 8940 }, { "epoch": 212.88358208955225, "grad_norm": 30.719676971435547, "learning_rate": 9.880952380952381e-06, "loss": 37.3958, "step": 8941 }, { "epoch": 212.90746268656716, "grad_norm": 25.53170394897461, "learning_rate": 9.879917184265011e-06, "loss": 37.8997, "step": 8942 }, { "epoch": 212.9313432835821, "grad_norm": 31.681127548217773, "learning_rate": 9.87888198757764e-06, "loss": 36.8812, "step": 8943 }, { "epoch": 212.955223880597, "grad_norm": 28.184024810791016, "learning_rate": 9.87784679089027e-06, "loss": 37.533, "step": 8944 }, { "epoch": 212.97910447761194, "grad_norm": 30.316749572753906, "learning_rate": 9.8768115942029e-06, "loss": 37.9202, "step": 8945 }, { "epoch": 213.0, "grad_norm": 23.389223098754883, "learning_rate": 9.875776397515529e-06, "loss": 32.0388, "step": 8946 }, { "epoch": 213.02388059701494, "grad_norm": 29.8602294921875, "learning_rate": 9.874741200828159e-06, "loss": 37.686, "step": 8947 }, { "epoch": 213.04776119402985, "grad_norm": 26.334007263183594, "learning_rate": 9.873706004140788e-06, "loss": 36.9403, "step": 8948 }, { "epoch": 213.07164179104478, "grad_norm": 30.050151824951172, "learning_rate": 9.872670807453418e-06, "loss": 37.0253, "step": 8949 }, { "epoch": 213.0955223880597, "grad_norm": 26.691158294677734, "learning_rate": 9.871635610766045e-06, "loss": 36.9505, "step": 8950 }, { "epoch": 213.11940298507463, "grad_norm": 31.54173469543457, "learning_rate": 9.870600414078677e-06, "loss": 36.5836, "step": 8951 }, { "epoch": 213.14328358208957, "grad_norm": 28.14643096923828, "learning_rate": 9.869565217391304e-06, "loss": 37.0423, "step": 8952 }, { "epoch": 213.16716417910447, "grad_norm": 29.28129005432129, "learning_rate": 9.868530020703934e-06, "loss": 36.4348, "step": 8953 }, { "epoch": 213.1910447761194, "grad_norm": 23.95172882080078, "learning_rate": 9.867494824016564e-06, "loss": 36.9681, "step": 8954 }, { "epoch": 213.21492537313432, "grad_norm": 30.376632690429688, "learning_rate": 9.866459627329193e-06, "loss": 38.3925, "step": 8955 }, { "epoch": 213.23880597014926, "grad_norm": 25.140405654907227, "learning_rate": 9.865424430641823e-06, "loss": 35.3339, "step": 8956 }, { "epoch": 213.26268656716417, "grad_norm": 29.816177368164062, "learning_rate": 9.864389233954452e-06, "loss": 36.7105, "step": 8957 }, { "epoch": 213.2865671641791, "grad_norm": 28.26422882080078, "learning_rate": 9.863354037267082e-06, "loss": 35.156, "step": 8958 }, { "epoch": 213.31044776119404, "grad_norm": 29.071168899536133, "learning_rate": 9.862318840579711e-06, "loss": 37.4681, "step": 8959 }, { "epoch": 213.33432835820895, "grad_norm": 26.617605209350586, "learning_rate": 9.861283643892339e-06, "loss": 36.2425, "step": 8960 }, { "epoch": 213.3582089552239, "grad_norm": 29.758590698242188, "learning_rate": 9.86024844720497e-06, "loss": 37.2265, "step": 8961 }, { "epoch": 213.3820895522388, "grad_norm": 25.337291717529297, "learning_rate": 9.8592132505176e-06, "loss": 37.793, "step": 8962 }, { "epoch": 213.40597014925373, "grad_norm": 31.47548484802246, "learning_rate": 9.85817805383023e-06, "loss": 37.1752, "step": 8963 }, { "epoch": 213.42985074626867, "grad_norm": 27.181623458862305, "learning_rate": 9.857142857142859e-06, "loss": 37.445, "step": 8964 }, { "epoch": 213.45373134328358, "grad_norm": 29.49827766418457, "learning_rate": 9.856107660455487e-06, "loss": 36.1177, "step": 8965 }, { "epoch": 213.47761194029852, "grad_norm": 28.424724578857422, "learning_rate": 9.855072463768118e-06, "loss": 37.834, "step": 8966 }, { "epoch": 213.50149253731342, "grad_norm": 27.049346923828125, "learning_rate": 9.854037267080746e-06, "loss": 37.0673, "step": 8967 }, { "epoch": 213.52537313432836, "grad_norm": 26.61677360534668, "learning_rate": 9.853002070393375e-06, "loss": 37.2056, "step": 8968 }, { "epoch": 213.54925373134327, "grad_norm": 29.839797973632812, "learning_rate": 9.851966873706005e-06, "loss": 37.1301, "step": 8969 }, { "epoch": 213.5731343283582, "grad_norm": 26.292333602905273, "learning_rate": 9.850931677018634e-06, "loss": 37.6243, "step": 8970 }, { "epoch": 213.59701492537314, "grad_norm": 32.21665573120117, "learning_rate": 9.849896480331264e-06, "loss": 38.6864, "step": 8971 }, { "epoch": 213.62089552238805, "grad_norm": 28.92923927307129, "learning_rate": 9.848861283643893e-06, "loss": 37.4806, "step": 8972 }, { "epoch": 213.644776119403, "grad_norm": 25.853914260864258, "learning_rate": 9.847826086956523e-06, "loss": 35.8329, "step": 8973 }, { "epoch": 213.6686567164179, "grad_norm": 25.25511360168457, "learning_rate": 9.846790890269152e-06, "loss": 37.6845, "step": 8974 }, { "epoch": 213.69253731343284, "grad_norm": 26.28786849975586, "learning_rate": 9.84575569358178e-06, "loss": 36.928, "step": 8975 }, { "epoch": 213.71641791044777, "grad_norm": 22.015167236328125, "learning_rate": 9.844720496894411e-06, "loss": 37.8689, "step": 8976 }, { "epoch": 213.74029850746268, "grad_norm": 32.0677490234375, "learning_rate": 9.84368530020704e-06, "loss": 36.9482, "step": 8977 }, { "epoch": 213.76417910447762, "grad_norm": 26.79532814025879, "learning_rate": 9.84265010351967e-06, "loss": 36.9821, "step": 8978 }, { "epoch": 213.78805970149253, "grad_norm": 30.176692962646484, "learning_rate": 9.841614906832298e-06, "loss": 36.7283, "step": 8979 }, { "epoch": 213.81194029850747, "grad_norm": 26.936988830566406, "learning_rate": 9.840579710144928e-06, "loss": 36.7164, "step": 8980 }, { "epoch": 213.83582089552237, "grad_norm": 30.490339279174805, "learning_rate": 9.839544513457557e-06, "loss": 36.4903, "step": 8981 }, { "epoch": 213.8597014925373, "grad_norm": 26.552900314331055, "learning_rate": 9.838509316770187e-06, "loss": 37.4003, "step": 8982 }, { "epoch": 213.88358208955225, "grad_norm": 25.060441970825195, "learning_rate": 9.837474120082817e-06, "loss": 36.2822, "step": 8983 }, { "epoch": 213.90746268656716, "grad_norm": 25.573841094970703, "learning_rate": 9.836438923395446e-06, "loss": 36.8246, "step": 8984 }, { "epoch": 213.9313432835821, "grad_norm": 28.39388656616211, "learning_rate": 9.835403726708076e-06, "loss": 36.6885, "step": 8985 }, { "epoch": 213.955223880597, "grad_norm": 21.96089744567871, "learning_rate": 9.834368530020705e-06, "loss": 36.3271, "step": 8986 }, { "epoch": 213.97910447761194, "grad_norm": 32.2141227722168, "learning_rate": 9.833333333333333e-06, "loss": 37.9622, "step": 8987 }, { "epoch": 214.0, "grad_norm": 25.528892517089844, "learning_rate": 9.832298136645964e-06, "loss": 32.4558, "step": 8988 }, { "epoch": 214.02388059701494, "grad_norm": 26.472932815551758, "learning_rate": 9.831262939958594e-06, "loss": 36.5696, "step": 8989 }, { "epoch": 214.04776119402985, "grad_norm": 25.03866195678711, "learning_rate": 9.830227743271222e-06, "loss": 35.5905, "step": 8990 }, { "epoch": 214.07164179104478, "grad_norm": 29.836627960205078, "learning_rate": 9.829192546583853e-06, "loss": 36.0014, "step": 8991 }, { "epoch": 214.0955223880597, "grad_norm": 26.660200119018555, "learning_rate": 9.82815734989648e-06, "loss": 36.8765, "step": 8992 }, { "epoch": 214.11940298507463, "grad_norm": 28.668292999267578, "learning_rate": 9.827122153209112e-06, "loss": 37.3687, "step": 8993 }, { "epoch": 214.14328358208957, "grad_norm": 25.813344955444336, "learning_rate": 9.82608695652174e-06, "loss": 37.2473, "step": 8994 }, { "epoch": 214.16716417910447, "grad_norm": 32.36681365966797, "learning_rate": 9.82505175983437e-06, "loss": 37.8641, "step": 8995 }, { "epoch": 214.1910447761194, "grad_norm": 27.731050491333008, "learning_rate": 9.824016563146999e-06, "loss": 36.6503, "step": 8996 }, { "epoch": 214.21492537313432, "grad_norm": 28.96619415283203, "learning_rate": 9.822981366459628e-06, "loss": 38.0194, "step": 8997 }, { "epoch": 214.23880597014926, "grad_norm": 25.87613868713379, "learning_rate": 9.821946169772258e-06, "loss": 36.4218, "step": 8998 }, { "epoch": 214.26268656716417, "grad_norm": 24.4296817779541, "learning_rate": 9.820910973084887e-06, "loss": 35.2054, "step": 8999 }, { "epoch": 214.2865671641791, "grad_norm": 22.787378311157227, "learning_rate": 9.819875776397517e-06, "loss": 37.2037, "step": 9000 }, { "epoch": 214.31044776119404, "grad_norm": 32.02442169189453, "learning_rate": 9.818840579710146e-06, "loss": 35.7057, "step": 9001 }, { "epoch": 214.33432835820895, "grad_norm": 27.07895851135254, "learning_rate": 9.817805383022774e-06, "loss": 36.0511, "step": 9002 }, { "epoch": 214.3582089552239, "grad_norm": 28.697946548461914, "learning_rate": 9.816770186335405e-06, "loss": 37.5292, "step": 9003 }, { "epoch": 214.3820895522388, "grad_norm": 25.989091873168945, "learning_rate": 9.815734989648033e-06, "loss": 37.1703, "step": 9004 }, { "epoch": 214.40597014925373, "grad_norm": 27.755807876586914, "learning_rate": 9.814699792960663e-06, "loss": 36.4164, "step": 9005 }, { "epoch": 214.42985074626867, "grad_norm": 24.19984245300293, "learning_rate": 9.813664596273292e-06, "loss": 37.2214, "step": 9006 }, { "epoch": 214.45373134328358, "grad_norm": 27.303367614746094, "learning_rate": 9.812629399585922e-06, "loss": 35.7464, "step": 9007 }, { "epoch": 214.47761194029852, "grad_norm": 26.95231819152832, "learning_rate": 9.811594202898551e-06, "loss": 38.0957, "step": 9008 }, { "epoch": 214.50149253731342, "grad_norm": 26.177330017089844, "learning_rate": 9.810559006211181e-06, "loss": 37.598, "step": 9009 }, { "epoch": 214.52537313432836, "grad_norm": 22.942121505737305, "learning_rate": 9.80952380952381e-06, "loss": 37.7739, "step": 9010 }, { "epoch": 214.54925373134327, "grad_norm": 29.14752960205078, "learning_rate": 9.80848861283644e-06, "loss": 37.2559, "step": 9011 }, { "epoch": 214.5731343283582, "grad_norm": 24.57861328125, "learning_rate": 9.80745341614907e-06, "loss": 37.3289, "step": 9012 }, { "epoch": 214.59701492537314, "grad_norm": 30.79751205444336, "learning_rate": 9.806418219461699e-06, "loss": 36.7896, "step": 9013 }, { "epoch": 214.62089552238805, "grad_norm": 28.287281036376953, "learning_rate": 9.805383022774327e-06, "loss": 37.0989, "step": 9014 }, { "epoch": 214.644776119403, "grad_norm": 28.00124168395996, "learning_rate": 9.804347826086958e-06, "loss": 37.511, "step": 9015 }, { "epoch": 214.6686567164179, "grad_norm": 24.868619918823242, "learning_rate": 9.803312629399588e-06, "loss": 36.6411, "step": 9016 }, { "epoch": 214.69253731343284, "grad_norm": 27.01886558532715, "learning_rate": 9.802277432712215e-06, "loss": 38.0124, "step": 9017 }, { "epoch": 214.71641791044777, "grad_norm": 23.0460147857666, "learning_rate": 9.801242236024847e-06, "loss": 36.6776, "step": 9018 }, { "epoch": 214.74029850746268, "grad_norm": 28.32352066040039, "learning_rate": 9.800207039337474e-06, "loss": 37.474, "step": 9019 }, { "epoch": 214.76417910447762, "grad_norm": 23.069040298461914, "learning_rate": 9.799171842650104e-06, "loss": 37.2404, "step": 9020 }, { "epoch": 214.78805970149253, "grad_norm": 30.9008731842041, "learning_rate": 9.798136645962734e-06, "loss": 38.2099, "step": 9021 }, { "epoch": 214.81194029850747, "grad_norm": 25.48306655883789, "learning_rate": 9.797101449275363e-06, "loss": 34.9577, "step": 9022 }, { "epoch": 214.83582089552237, "grad_norm": 31.529767990112305, "learning_rate": 9.796066252587993e-06, "loss": 36.741, "step": 9023 }, { "epoch": 214.8597014925373, "grad_norm": 28.165117263793945, "learning_rate": 9.795031055900622e-06, "loss": 37.6705, "step": 9024 }, { "epoch": 214.88358208955225, "grad_norm": 26.423799514770508, "learning_rate": 9.793995859213252e-06, "loss": 36.9861, "step": 9025 }, { "epoch": 214.90746268656716, "grad_norm": 23.710920333862305, "learning_rate": 9.792960662525881e-06, "loss": 37.4554, "step": 9026 }, { "epoch": 214.9313432835821, "grad_norm": 32.062286376953125, "learning_rate": 9.79192546583851e-06, "loss": 37.4132, "step": 9027 }, { "epoch": 214.955223880597, "grad_norm": 25.85164451599121, "learning_rate": 9.79089026915114e-06, "loss": 37.4271, "step": 9028 }, { "epoch": 214.97910447761194, "grad_norm": 28.508134841918945, "learning_rate": 9.789855072463768e-06, "loss": 37.9368, "step": 9029 }, { "epoch": 215.0, "grad_norm": 23.24784278869629, "learning_rate": 9.7888198757764e-06, "loss": 31.3253, "step": 9030 }, { "epoch": 215.02388059701494, "grad_norm": 26.680927276611328, "learning_rate": 9.787784679089027e-06, "loss": 37.5587, "step": 9031 }, { "epoch": 215.04776119402985, "grad_norm": 21.63987922668457, "learning_rate": 9.786749482401657e-06, "loss": 37.2078, "step": 9032 }, { "epoch": 215.07164179104478, "grad_norm": 28.85713768005371, "learning_rate": 9.785714285714286e-06, "loss": 37.9692, "step": 9033 }, { "epoch": 215.0955223880597, "grad_norm": 22.70249366760254, "learning_rate": 9.784679089026916e-06, "loss": 36.2635, "step": 9034 }, { "epoch": 215.11940298507463, "grad_norm": 31.306209564208984, "learning_rate": 9.783643892339545e-06, "loss": 36.2981, "step": 9035 }, { "epoch": 215.14328358208957, "grad_norm": 26.145404815673828, "learning_rate": 9.782608695652175e-06, "loss": 36.4388, "step": 9036 }, { "epoch": 215.16716417910447, "grad_norm": 23.636449813842773, "learning_rate": 9.781573498964804e-06, "loss": 36.396, "step": 9037 }, { "epoch": 215.1910447761194, "grad_norm": 22.753822326660156, "learning_rate": 9.780538302277434e-06, "loss": 36.2617, "step": 9038 }, { "epoch": 215.21492537313432, "grad_norm": 24.617338180541992, "learning_rate": 9.779503105590062e-06, "loss": 36.9093, "step": 9039 }, { "epoch": 215.23880597014926, "grad_norm": 19.743427276611328, "learning_rate": 9.778467908902693e-06, "loss": 36.941, "step": 9040 }, { "epoch": 215.26268656716417, "grad_norm": 27.8900146484375, "learning_rate": 9.77743271221532e-06, "loss": 36.108, "step": 9041 }, { "epoch": 215.2865671641791, "grad_norm": 21.104822158813477, "learning_rate": 9.776397515527952e-06, "loss": 37.6502, "step": 9042 }, { "epoch": 215.31044776119404, "grad_norm": 29.389020919799805, "learning_rate": 9.77536231884058e-06, "loss": 37.9045, "step": 9043 }, { "epoch": 215.33432835820895, "grad_norm": 25.60100555419922, "learning_rate": 9.77432712215321e-06, "loss": 37.3051, "step": 9044 }, { "epoch": 215.3582089552239, "grad_norm": 29.061010360717773, "learning_rate": 9.77329192546584e-06, "loss": 37.6249, "step": 9045 }, { "epoch": 215.3820895522388, "grad_norm": 24.267969131469727, "learning_rate": 9.772256728778468e-06, "loss": 36.8123, "step": 9046 }, { "epoch": 215.40597014925373, "grad_norm": 31.36228370666504, "learning_rate": 9.771221532091098e-06, "loss": 38.5689, "step": 9047 }, { "epoch": 215.42985074626867, "grad_norm": 21.266315460205078, "learning_rate": 9.770186335403727e-06, "loss": 36.8086, "step": 9048 }, { "epoch": 215.45373134328358, "grad_norm": 28.91936492919922, "learning_rate": 9.769151138716357e-06, "loss": 36.9067, "step": 9049 }, { "epoch": 215.47761194029852, "grad_norm": 21.60762596130371, "learning_rate": 9.768115942028986e-06, "loss": 37.4202, "step": 9050 }, { "epoch": 215.50149253731342, "grad_norm": 26.22998046875, "learning_rate": 9.767080745341616e-06, "loss": 35.7377, "step": 9051 }, { "epoch": 215.52537313432836, "grad_norm": 20.090837478637695, "learning_rate": 9.766045548654246e-06, "loss": 37.8684, "step": 9052 }, { "epoch": 215.54925373134327, "grad_norm": 31.179576873779297, "learning_rate": 9.765010351966875e-06, "loss": 36.2355, "step": 9053 }, { "epoch": 215.5731343283582, "grad_norm": 23.71304702758789, "learning_rate": 9.763975155279503e-06, "loss": 36.1069, "step": 9054 }, { "epoch": 215.59701492537314, "grad_norm": 32.945030212402344, "learning_rate": 9.762939958592134e-06, "loss": 37.4249, "step": 9055 }, { "epoch": 215.62089552238805, "grad_norm": 32.486446380615234, "learning_rate": 9.761904761904762e-06, "loss": 36.4778, "step": 9056 }, { "epoch": 215.644776119403, "grad_norm": 29.82095718383789, "learning_rate": 9.760869565217393e-06, "loss": 37.1965, "step": 9057 }, { "epoch": 215.6686567164179, "grad_norm": 26.731918334960938, "learning_rate": 9.759834368530021e-06, "loss": 37.4825, "step": 9058 }, { "epoch": 215.69253731343284, "grad_norm": 24.850849151611328, "learning_rate": 9.75879917184265e-06, "loss": 37.2026, "step": 9059 }, { "epoch": 215.71641791044777, "grad_norm": 19.47140884399414, "learning_rate": 9.75776397515528e-06, "loss": 36.0008, "step": 9060 }, { "epoch": 215.74029850746268, "grad_norm": 24.869354248046875, "learning_rate": 9.75672877846791e-06, "loss": 36.0339, "step": 9061 }, { "epoch": 215.76417910447762, "grad_norm": 19.912994384765625, "learning_rate": 9.755693581780539e-06, "loss": 37.9942, "step": 9062 }, { "epoch": 215.78805970149253, "grad_norm": 26.943859100341797, "learning_rate": 9.754658385093169e-06, "loss": 36.5328, "step": 9063 }, { "epoch": 215.81194029850747, "grad_norm": 21.20590591430664, "learning_rate": 9.753623188405798e-06, "loss": 36.6017, "step": 9064 }, { "epoch": 215.83582089552237, "grad_norm": 24.163738250732422, "learning_rate": 9.752587991718428e-06, "loss": 37.0538, "step": 9065 }, { "epoch": 215.8597014925373, "grad_norm": 20.0353946685791, "learning_rate": 9.751552795031056e-06, "loss": 36.0332, "step": 9066 }, { "epoch": 215.88358208955225, "grad_norm": 26.323434829711914, "learning_rate": 9.750517598343687e-06, "loss": 38.0896, "step": 9067 }, { "epoch": 215.90746268656716, "grad_norm": 18.738004684448242, "learning_rate": 9.749482401656315e-06, "loss": 36.6215, "step": 9068 }, { "epoch": 215.9313432835821, "grad_norm": 31.44061279296875, "learning_rate": 9.748447204968944e-06, "loss": 37.4861, "step": 9069 }, { "epoch": 215.955223880597, "grad_norm": 24.629789352416992, "learning_rate": 9.747412008281574e-06, "loss": 38.1235, "step": 9070 }, { "epoch": 215.97910447761194, "grad_norm": 21.770977020263672, "learning_rate": 9.746376811594203e-06, "loss": 36.6435, "step": 9071 }, { "epoch": 216.0, "grad_norm": 16.773563385009766, "learning_rate": 9.745341614906834e-06, "loss": 31.4797, "step": 9072 }, { "epoch": 216.02388059701494, "grad_norm": 23.379308700561523, "learning_rate": 9.744306418219462e-06, "loss": 36.9589, "step": 9073 }, { "epoch": 216.04776119402985, "grad_norm": 18.13448715209961, "learning_rate": 9.743271221532092e-06, "loss": 37.2716, "step": 9074 }, { "epoch": 216.07164179104478, "grad_norm": 20.16269302368164, "learning_rate": 9.742236024844721e-06, "loss": 37.6363, "step": 9075 }, { "epoch": 216.0955223880597, "grad_norm": 18.20665740966797, "learning_rate": 9.74120082815735e-06, "loss": 36.0095, "step": 9076 }, { "epoch": 216.11940298507463, "grad_norm": 19.75546646118164, "learning_rate": 9.74016563146998e-06, "loss": 36.5171, "step": 9077 }, { "epoch": 216.14328358208957, "grad_norm": 19.876527786254883, "learning_rate": 9.73913043478261e-06, "loss": 36.8232, "step": 9078 }, { "epoch": 216.16716417910447, "grad_norm": 15.993359565734863, "learning_rate": 9.73809523809524e-06, "loss": 36.62, "step": 9079 }, { "epoch": 216.1910447761194, "grad_norm": 21.49464225769043, "learning_rate": 9.737060041407869e-06, "loss": 37.1535, "step": 9080 }, { "epoch": 216.21492537313432, "grad_norm": 17.079620361328125, "learning_rate": 9.736024844720497e-06, "loss": 37.3859, "step": 9081 }, { "epoch": 216.23880597014926, "grad_norm": 16.47243881225586, "learning_rate": 9.734989648033128e-06, "loss": 36.8688, "step": 9082 }, { "epoch": 216.26268656716417, "grad_norm": 19.390705108642578, "learning_rate": 9.733954451345756e-06, "loss": 37.118, "step": 9083 }, { "epoch": 216.2865671641791, "grad_norm": 18.31181526184082, "learning_rate": 9.732919254658385e-06, "loss": 37.6145, "step": 9084 }, { "epoch": 216.31044776119404, "grad_norm": 15.150775909423828, "learning_rate": 9.731884057971015e-06, "loss": 35.758, "step": 9085 }, { "epoch": 216.33432835820895, "grad_norm": 20.944284439086914, "learning_rate": 9.730848861283644e-06, "loss": 36.9532, "step": 9086 }, { "epoch": 216.3582089552239, "grad_norm": 16.172500610351562, "learning_rate": 9.729813664596274e-06, "loss": 36.6033, "step": 9087 }, { "epoch": 216.3820895522388, "grad_norm": 21.084178924560547, "learning_rate": 9.728778467908903e-06, "loss": 36.6835, "step": 9088 }, { "epoch": 216.40597014925373, "grad_norm": 17.895221710205078, "learning_rate": 9.727743271221533e-06, "loss": 36.5752, "step": 9089 }, { "epoch": 216.42985074626867, "grad_norm": 21.309843063354492, "learning_rate": 9.726708074534163e-06, "loss": 37.3744, "step": 9090 }, { "epoch": 216.45373134328358, "grad_norm": 16.370155334472656, "learning_rate": 9.725672877846792e-06, "loss": 37.1859, "step": 9091 }, { "epoch": 216.47761194029852, "grad_norm": 21.996126174926758, "learning_rate": 9.724637681159422e-06, "loss": 35.2165, "step": 9092 }, { "epoch": 216.50149253731342, "grad_norm": 20.077713012695312, "learning_rate": 9.72360248447205e-06, "loss": 36.995, "step": 9093 }, { "epoch": 216.52537313432836, "grad_norm": 19.9365177154541, "learning_rate": 9.72256728778468e-06, "loss": 36.6285, "step": 9094 }, { "epoch": 216.54925373134327, "grad_norm": 23.95235252380371, "learning_rate": 9.721532091097308e-06, "loss": 36.7808, "step": 9095 }, { "epoch": 216.5731343283582, "grad_norm": 17.251880645751953, "learning_rate": 9.720496894409938e-06, "loss": 36.7247, "step": 9096 }, { "epoch": 216.59701492537314, "grad_norm": 26.106557846069336, "learning_rate": 9.719461697722568e-06, "loss": 37.7299, "step": 9097 }, { "epoch": 216.62089552238805, "grad_norm": 20.359811782836914, "learning_rate": 9.718426501035197e-06, "loss": 36.8477, "step": 9098 }, { "epoch": 216.644776119403, "grad_norm": 30.013118743896484, "learning_rate": 9.717391304347827e-06, "loss": 38.8294, "step": 9099 }, { "epoch": 216.6686567164179, "grad_norm": 21.708553314208984, "learning_rate": 9.716356107660456e-06, "loss": 37.0413, "step": 9100 }, { "epoch": 216.69253731343284, "grad_norm": 26.785297393798828, "learning_rate": 9.715320910973086e-06, "loss": 37.5232, "step": 9101 }, { "epoch": 216.71641791044777, "grad_norm": 25.817947387695312, "learning_rate": 9.714285714285715e-06, "loss": 36.6427, "step": 9102 }, { "epoch": 216.74029850746268, "grad_norm": 25.02275276184082, "learning_rate": 9.713250517598345e-06, "loss": 37.5533, "step": 9103 }, { "epoch": 216.76417910447762, "grad_norm": 23.612350463867188, "learning_rate": 9.712215320910974e-06, "loss": 36.6638, "step": 9104 }, { "epoch": 216.78805970149253, "grad_norm": 24.220951080322266, "learning_rate": 9.711180124223604e-06, "loss": 37.8171, "step": 9105 }, { "epoch": 216.81194029850747, "grad_norm": 23.3463191986084, "learning_rate": 9.710144927536233e-06, "loss": 36.1184, "step": 9106 }, { "epoch": 216.83582089552237, "grad_norm": 22.078628540039062, "learning_rate": 9.709109730848863e-06, "loss": 36.764, "step": 9107 }, { "epoch": 216.8597014925373, "grad_norm": 20.95184326171875, "learning_rate": 9.70807453416149e-06, "loss": 35.9814, "step": 9108 }, { "epoch": 216.88358208955225, "grad_norm": 22.184547424316406, "learning_rate": 9.707039337474122e-06, "loss": 37.7234, "step": 9109 }, { "epoch": 216.90746268656716, "grad_norm": 21.662275314331055, "learning_rate": 9.70600414078675e-06, "loss": 37.3595, "step": 9110 }, { "epoch": 216.9313432835821, "grad_norm": 20.41446304321289, "learning_rate": 9.70496894409938e-06, "loss": 37.1269, "step": 9111 }, { "epoch": 216.955223880597, "grad_norm": 19.57245445251465, "learning_rate": 9.703933747412009e-06, "loss": 36.0114, "step": 9112 }, { "epoch": 216.97910447761194, "grad_norm": 18.723487854003906, "learning_rate": 9.702898550724638e-06, "loss": 36.2825, "step": 9113 }, { "epoch": 217.0, "grad_norm": 16.511157989501953, "learning_rate": 9.701863354037268e-06, "loss": 33.4808, "step": 9114 }, { "epoch": 217.02388059701494, "grad_norm": 18.5831298828125, "learning_rate": 9.700828157349897e-06, "loss": 36.5857, "step": 9115 }, { "epoch": 217.04776119402985, "grad_norm": 20.021087646484375, "learning_rate": 9.699792960662527e-06, "loss": 36.9092, "step": 9116 }, { "epoch": 217.07164179104478, "grad_norm": 15.198619842529297, "learning_rate": 9.698757763975156e-06, "loss": 35.8288, "step": 9117 }, { "epoch": 217.0955223880597, "grad_norm": 19.33254623413086, "learning_rate": 9.697722567287784e-06, "loss": 36.4575, "step": 9118 }, { "epoch": 217.11940298507463, "grad_norm": 15.601790428161621, "learning_rate": 9.696687370600415e-06, "loss": 37.7686, "step": 9119 }, { "epoch": 217.14328358208957, "grad_norm": NaN, "learning_rate": 9.695652173913043e-06, "loss": 65.063, "step": 9120 }, { "epoch": 217.16716417910447, "grad_norm": 22.725147247314453, "learning_rate": 9.695652173913043e-06, "loss": 36.1623, "step": 9121 }, { "epoch": 217.1910447761194, "grad_norm": 18.391794204711914, "learning_rate": 9.694616977225675e-06, "loss": 36.8899, "step": 9122 }, { "epoch": 217.21492537313432, "grad_norm": 18.53925895690918, "learning_rate": 9.693581780538302e-06, "loss": 37.4847, "step": 9123 }, { "epoch": 217.23880597014926, "grad_norm": 19.029287338256836, "learning_rate": 9.692546583850932e-06, "loss": 36.9624, "step": 9124 }, { "epoch": 217.26268656716417, "grad_norm": 16.01725959777832, "learning_rate": 9.691511387163561e-06, "loss": 36.1798, "step": 9125 }, { "epoch": 217.2865671641791, "grad_norm": 21.95234489440918, "learning_rate": 9.690476190476191e-06, "loss": 36.9395, "step": 9126 }, { "epoch": 217.31044776119404, "grad_norm": 17.638107299804688, "learning_rate": 9.68944099378882e-06, "loss": 36.2656, "step": 9127 }, { "epoch": 217.33432835820895, "grad_norm": 19.56740379333496, "learning_rate": 9.68840579710145e-06, "loss": 35.8209, "step": 9128 }, { "epoch": 217.3582089552239, "grad_norm": 16.121322631835938, "learning_rate": 9.68737060041408e-06, "loss": 37.7785, "step": 9129 }, { "epoch": 217.3820895522388, "grad_norm": 19.964323043823242, "learning_rate": 9.686335403726709e-06, "loss": 36.8468, "step": 9130 }, { "epoch": 217.40597014925373, "grad_norm": 14.685638427734375, "learning_rate": 9.685300207039339e-06, "loss": 37.3379, "step": 9131 }, { "epoch": 217.42985074626867, "grad_norm": 18.589954376220703, "learning_rate": 9.684265010351968e-06, "loss": 37.4971, "step": 9132 }, { "epoch": 217.45373134328358, "grad_norm": 15.052332878112793, "learning_rate": 9.683229813664598e-06, "loss": 36.1428, "step": 9133 }, { "epoch": 217.47761194029852, "grad_norm": 15.699575424194336, "learning_rate": 9.682194616977225e-06, "loss": 38.2258, "step": 9134 }, { "epoch": 217.50149253731342, "grad_norm": 20.121877670288086, "learning_rate": 9.681159420289857e-06, "loss": 37.3783, "step": 9135 }, { "epoch": 217.52537313432836, "grad_norm": 14.712428092956543, "learning_rate": 9.680124223602485e-06, "loss": 37.149, "step": 9136 }, { "epoch": 217.54925373134327, "grad_norm": 16.807716369628906, "learning_rate": 9.679089026915116e-06, "loss": 37.1475, "step": 9137 }, { "epoch": 217.5731343283582, "grad_norm": 17.029197692871094, "learning_rate": 9.678053830227744e-06, "loss": 37.0724, "step": 9138 }, { "epoch": 217.59701492537314, "grad_norm": 15.771768569946289, "learning_rate": 9.677018633540373e-06, "loss": 37.7616, "step": 9139 }, { "epoch": 217.62089552238805, "grad_norm": 17.461040496826172, "learning_rate": 9.675983436853003e-06, "loss": 36.4125, "step": 9140 }, { "epoch": 217.644776119403, "grad_norm": 15.030492782592773, "learning_rate": 9.674948240165632e-06, "loss": 36.2567, "step": 9141 }, { "epoch": 217.6686567164179, "grad_norm": 17.556913375854492, "learning_rate": 9.673913043478262e-06, "loss": 35.6493, "step": 9142 }, { "epoch": 217.69253731343284, "grad_norm": 15.683633804321289, "learning_rate": 9.672877846790891e-06, "loss": 36.5538, "step": 9143 }, { "epoch": 217.71641791044777, "grad_norm": 19.198259353637695, "learning_rate": 9.67184265010352e-06, "loss": 37.0452, "step": 9144 }, { "epoch": 217.74029850746268, "grad_norm": 19.338232040405273, "learning_rate": 9.67080745341615e-06, "loss": 37.4315, "step": 9145 }, { "epoch": 217.76417910447762, "grad_norm": 21.04673194885254, "learning_rate": 9.669772256728778e-06, "loss": 37.6606, "step": 9146 }, { "epoch": 217.78805970149253, "grad_norm": 15.2819242477417, "learning_rate": 9.66873706004141e-06, "loss": 35.3826, "step": 9147 }, { "epoch": 217.81194029850747, "grad_norm": 18.835935592651367, "learning_rate": 9.667701863354037e-06, "loss": 37.4417, "step": 9148 }, { "epoch": 217.83582089552237, "grad_norm": 19.404373168945312, "learning_rate": 9.666666666666667e-06, "loss": 35.8734, "step": 9149 }, { "epoch": 217.8597014925373, "grad_norm": 15.097556114196777, "learning_rate": 9.665631469979296e-06, "loss": 37.443, "step": 9150 }, { "epoch": 217.88358208955225, "grad_norm": 19.009340286254883, "learning_rate": 9.664596273291926e-06, "loss": 37.6373, "step": 9151 }, { "epoch": 217.90746268656716, "grad_norm": 20.086925506591797, "learning_rate": 9.663561076604555e-06, "loss": 36.842, "step": 9152 }, { "epoch": 217.9313432835821, "grad_norm": 13.27296257019043, "learning_rate": 9.662525879917185e-06, "loss": 37.3396, "step": 9153 }, { "epoch": 217.955223880597, "grad_norm": 17.1934814453125, "learning_rate": 9.661490683229814e-06, "loss": 37.7651, "step": 9154 }, { "epoch": 217.97910447761194, "grad_norm": 20.468364715576172, "learning_rate": 9.660455486542444e-06, "loss": 35.9981, "step": 9155 }, { "epoch": 218.0, "grad_norm": 15.743645668029785, "learning_rate": 9.659420289855073e-06, "loss": 32.9107, "step": 9156 }, { "epoch": 218.02388059701494, "grad_norm": 13.317445755004883, "learning_rate": 9.658385093167703e-06, "loss": 34.8715, "step": 9157 }, { "epoch": 218.04776119402985, "grad_norm": 16.343395233154297, "learning_rate": 9.657349896480332e-06, "loss": 36.6059, "step": 9158 }, { "epoch": 218.07164179104478, "grad_norm": 14.639159202575684, "learning_rate": 9.656314699792962e-06, "loss": 37.2106, "step": 9159 }, { "epoch": 218.0955223880597, "grad_norm": 14.244100570678711, "learning_rate": 9.655279503105592e-06, "loss": 37.2165, "step": 9160 }, { "epoch": 218.11940298507463, "grad_norm": 16.957246780395508, "learning_rate": 9.65424430641822e-06, "loss": 36.6862, "step": 9161 }, { "epoch": 218.14328358208957, "grad_norm": 12.883386611938477, "learning_rate": 9.65320910973085e-06, "loss": 37.553, "step": 9162 }, { "epoch": 218.16716417910447, "grad_norm": 20.242103576660156, "learning_rate": 9.652173913043478e-06, "loss": 36.5785, "step": 9163 }, { "epoch": 218.1910447761194, "grad_norm": 16.585161209106445, "learning_rate": 9.651138716356108e-06, "loss": 38.3141, "step": 9164 }, { "epoch": 218.21492537313432, "grad_norm": 20.26701545715332, "learning_rate": 9.650103519668737e-06, "loss": 35.7752, "step": 9165 }, { "epoch": 218.23880597014926, "grad_norm": 19.144397735595703, "learning_rate": 9.649068322981367e-06, "loss": 36.6697, "step": 9166 }, { "epoch": 218.26268656716417, "grad_norm": 19.420429229736328, "learning_rate": 9.648033126293997e-06, "loss": 36.0501, "step": 9167 }, { "epoch": 218.2865671641791, "grad_norm": 17.626617431640625, "learning_rate": 9.646997929606626e-06, "loss": 36.0203, "step": 9168 }, { "epoch": 218.31044776119404, "grad_norm": 18.067670822143555, "learning_rate": 9.645962732919256e-06, "loss": 37.7678, "step": 9169 }, { "epoch": 218.33432835820895, "grad_norm": 16.033538818359375, "learning_rate": 9.644927536231885e-06, "loss": 36.5795, "step": 9170 }, { "epoch": 218.3582089552239, "grad_norm": 17.337013244628906, "learning_rate": 9.643892339544515e-06, "loss": 37.7038, "step": 9171 }, { "epoch": 218.3820895522388, "grad_norm": 15.980463027954102, "learning_rate": 9.642857142857144e-06, "loss": 37.8133, "step": 9172 }, { "epoch": 218.40597014925373, "grad_norm": 18.340688705444336, "learning_rate": 9.641821946169772e-06, "loss": 36.7869, "step": 9173 }, { "epoch": 218.42985074626867, "grad_norm": 19.210174560546875, "learning_rate": 9.640786749482403e-06, "loss": 37.0788, "step": 9174 }, { "epoch": 218.45373134328358, "grad_norm": 19.932706832885742, "learning_rate": 9.639751552795031e-06, "loss": 36.5435, "step": 9175 }, { "epoch": 218.47761194029852, "grad_norm": 13.324295997619629, "learning_rate": 9.63871635610766e-06, "loss": 35.4878, "step": 9176 }, { "epoch": 218.50149253731342, "grad_norm": 18.737878799438477, "learning_rate": 9.63768115942029e-06, "loss": 36.5564, "step": 9177 }, { "epoch": 218.52537313432836, "grad_norm": 14.989187240600586, "learning_rate": 9.63664596273292e-06, "loss": 36.5208, "step": 9178 }, { "epoch": 218.54925373134327, "grad_norm": 20.792604446411133, "learning_rate": 9.63561076604555e-06, "loss": 35.9787, "step": 9179 }, { "epoch": 218.5731343283582, "grad_norm": 16.712087631225586, "learning_rate": 9.634575569358179e-06, "loss": 35.9913, "step": 9180 }, { "epoch": 218.59701492537314, "grad_norm": 19.96718406677246, "learning_rate": 9.633540372670808e-06, "loss": 38.5806, "step": 9181 }, { "epoch": 218.62089552238805, "grad_norm": 17.18105697631836, "learning_rate": 9.632505175983438e-06, "loss": 36.9286, "step": 9182 }, { "epoch": 218.644776119403, "grad_norm": 17.91598892211914, "learning_rate": 9.631469979296067e-06, "loss": 36.6085, "step": 9183 }, { "epoch": 218.6686567164179, "grad_norm": 17.956539154052734, "learning_rate": 9.630434782608697e-06, "loss": 36.1301, "step": 9184 }, { "epoch": 218.69253731343284, "grad_norm": 19.377994537353516, "learning_rate": 9.629399585921326e-06, "loss": 36.9346, "step": 9185 }, { "epoch": 218.71641791044777, "grad_norm": 18.69805145263672, "learning_rate": 9.628364389233956e-06, "loss": 36.7145, "step": 9186 }, { "epoch": 218.74029850746268, "grad_norm": 17.211429595947266, "learning_rate": 9.627329192546585e-06, "loss": 38.0607, "step": 9187 }, { "epoch": 218.76417910447762, "grad_norm": 18.123088836669922, "learning_rate": 9.626293995859213e-06, "loss": 36.0628, "step": 9188 }, { "epoch": 218.78805970149253, "grad_norm": NaN, "learning_rate": 9.625258799171844e-06, "loss": 52.3213, "step": 9189 }, { "epoch": 218.81194029850747, "grad_norm": 15.918928146362305, "learning_rate": 9.625258799171844e-06, "loss": 37.8708, "step": 9190 }, { "epoch": 218.83582089552237, "grad_norm": 19.9727840423584, "learning_rate": 9.624223602484472e-06, "loss": 36.9569, "step": 9191 }, { "epoch": 218.8597014925373, "grad_norm": 18.443010330200195, "learning_rate": 9.623188405797102e-06, "loss": 37.2799, "step": 9192 }, { "epoch": 218.88358208955225, "grad_norm": 18.08015251159668, "learning_rate": 9.622153209109731e-06, "loss": 36.4251, "step": 9193 }, { "epoch": 218.90746268656716, "grad_norm": 15.19926643371582, "learning_rate": 9.621118012422361e-06, "loss": 38.557, "step": 9194 }, { "epoch": 218.9313432835821, "grad_norm": 14.689451217651367, "learning_rate": 9.62008281573499e-06, "loss": 37.5389, "step": 9195 }, { "epoch": 218.955223880597, "grad_norm": 16.193313598632812, "learning_rate": 9.61904761904762e-06, "loss": 35.9918, "step": 9196 }, { "epoch": 218.97910447761194, "grad_norm": 13.192307472229004, "learning_rate": 9.61801242236025e-06, "loss": 35.8413, "step": 9197 }, { "epoch": 219.0, "grad_norm": 13.88156509399414, "learning_rate": 9.616977225672879e-06, "loss": 32.3184, "step": 9198 }, { "epoch": 219.02388059701494, "grad_norm": 14.823074340820312, "learning_rate": 9.615942028985507e-06, "loss": 35.9319, "step": 9199 }, { "epoch": 219.04776119402985, "grad_norm": 17.479848861694336, "learning_rate": 9.614906832298138e-06, "loss": 35.9434, "step": 9200 }, { "epoch": 219.07164179104478, "grad_norm": 15.585541725158691, "learning_rate": 9.613871635610766e-06, "loss": 36.0488, "step": 9201 }, { "epoch": 219.0955223880597, "grad_norm": 18.069622039794922, "learning_rate": 9.612836438923397e-06, "loss": 36.7673, "step": 9202 }, { "epoch": 219.11940298507463, "grad_norm": 23.847604751586914, "learning_rate": 9.611801242236025e-06, "loss": 37.5001, "step": 9203 }, { "epoch": 219.14328358208957, "grad_norm": 14.445686340332031, "learning_rate": 9.610766045548654e-06, "loss": 37.0008, "step": 9204 }, { "epoch": 219.16716417910447, "grad_norm": 19.418006896972656, "learning_rate": 9.609730848861284e-06, "loss": 37.5214, "step": 9205 }, { "epoch": 219.1910447761194, "grad_norm": 19.122482299804688, "learning_rate": 9.608695652173914e-06, "loss": 36.9843, "step": 9206 }, { "epoch": 219.21492537313432, "grad_norm": 13.655843734741211, "learning_rate": 9.607660455486543e-06, "loss": 35.9457, "step": 9207 }, { "epoch": 219.23880597014926, "grad_norm": 17.065208435058594, "learning_rate": 9.606625258799173e-06, "loss": 37.2418, "step": 9208 }, { "epoch": 219.26268656716417, "grad_norm": 13.323447227478027, "learning_rate": 9.605590062111802e-06, "loss": 37.8345, "step": 9209 }, { "epoch": 219.2865671641791, "grad_norm": 17.5530948638916, "learning_rate": 9.604554865424432e-06, "loss": 36.3977, "step": 9210 }, { "epoch": 219.31044776119404, "grad_norm": 16.106706619262695, "learning_rate": 9.603519668737061e-06, "loss": 36.4268, "step": 9211 }, { "epoch": 219.33432835820895, "grad_norm": 13.573047637939453, "learning_rate": 9.60248447204969e-06, "loss": 35.8055, "step": 9212 }, { "epoch": 219.3582089552239, "grad_norm": 16.16801643371582, "learning_rate": 9.60144927536232e-06, "loss": 36.2473, "step": 9213 }, { "epoch": 219.3820895522388, "grad_norm": 14.158682823181152, "learning_rate": 9.600414078674948e-06, "loss": 36.0948, "step": 9214 }, { "epoch": 219.40597014925373, "grad_norm": 12.716529846191406, "learning_rate": 9.59937888198758e-06, "loss": 36.4154, "step": 9215 }, { "epoch": 219.42985074626867, "grad_norm": 14.64181900024414, "learning_rate": 9.598343685300207e-06, "loss": 37.5303, "step": 9216 }, { "epoch": 219.45373134328358, "grad_norm": NaN, "learning_rate": 9.597308488612838e-06, "loss": 41.3461, "step": 9217 }, { "epoch": 219.47761194029852, "grad_norm": 12.889041900634766, "learning_rate": 9.597308488612838e-06, "loss": 36.4206, "step": 9218 }, { "epoch": 219.50149253731342, "grad_norm": 15.781641006469727, "learning_rate": 9.596273291925466e-06, "loss": 37.3701, "step": 9219 }, { "epoch": 219.52537313432836, "grad_norm": 14.214648246765137, "learning_rate": 9.595238095238096e-06, "loss": 36.8136, "step": 9220 }, { "epoch": 219.54925373134327, "grad_norm": 16.5802001953125, "learning_rate": 9.594202898550725e-06, "loss": 36.9532, "step": 9221 }, { "epoch": 219.5731343283582, "grad_norm": 13.49622917175293, "learning_rate": 9.593167701863355e-06, "loss": 36.7924, "step": 9222 }, { "epoch": 219.59701492537314, "grad_norm": 15.241145133972168, "learning_rate": 9.592132505175984e-06, "loss": 37.2905, "step": 9223 }, { "epoch": 219.62089552238805, "grad_norm": 14.371820449829102, "learning_rate": 9.591097308488614e-06, "loss": 37.6425, "step": 9224 }, { "epoch": 219.644776119403, "grad_norm": 16.55687141418457, "learning_rate": 9.590062111801243e-06, "loss": 37.3197, "step": 9225 }, { "epoch": 219.6686567164179, "grad_norm": 18.263837814331055, "learning_rate": 9.589026915113873e-06, "loss": 36.9441, "step": 9226 }, { "epoch": 219.69253731343284, "grad_norm": 16.113059997558594, "learning_rate": 9.5879917184265e-06, "loss": 36.8065, "step": 9227 }, { "epoch": 219.71641791044777, "grad_norm": 17.373727798461914, "learning_rate": 9.586956521739132e-06, "loss": 36.2574, "step": 9228 }, { "epoch": 219.74029850746268, "grad_norm": 16.516822814941406, "learning_rate": 9.58592132505176e-06, "loss": 35.8442, "step": 9229 }, { "epoch": 219.76417910447762, "grad_norm": 16.880693435668945, "learning_rate": 9.58488612836439e-06, "loss": 37.4098, "step": 9230 }, { "epoch": 219.78805970149253, "grad_norm": 14.671090126037598, "learning_rate": 9.583850931677019e-06, "loss": 38.1314, "step": 9231 }, { "epoch": 219.81194029850747, "grad_norm": 14.153200149536133, "learning_rate": 9.582815734989648e-06, "loss": 36.2577, "step": 9232 }, { "epoch": 219.83582089552237, "grad_norm": 16.481653213500977, "learning_rate": 9.581780538302278e-06, "loss": 37.1363, "step": 9233 }, { "epoch": 219.8597014925373, "grad_norm": 12.075387954711914, "learning_rate": 9.580745341614907e-06, "loss": 37.0842, "step": 9234 }, { "epoch": 219.88358208955225, "grad_norm": 17.5874080657959, "learning_rate": 9.579710144927537e-06, "loss": 36.651, "step": 9235 }, { "epoch": 219.90746268656716, "grad_norm": 13.357857704162598, "learning_rate": 9.578674948240167e-06, "loss": 36.7274, "step": 9236 }, { "epoch": 219.9313432835821, "grad_norm": 17.825353622436523, "learning_rate": 9.577639751552796e-06, "loss": 36.7724, "step": 9237 }, { "epoch": 219.955223880597, "grad_norm": 14.304804801940918, "learning_rate": 9.576604554865426e-06, "loss": 36.8395, "step": 9238 }, { "epoch": 219.97910447761194, "grad_norm": 15.323370933532715, "learning_rate": 9.575569358178055e-06, "loss": 36.6138, "step": 9239 }, { "epoch": 220.0, "grad_norm": 18.964876174926758, "learning_rate": 9.574534161490685e-06, "loss": 32.3095, "step": 9240 }, { "epoch": 220.02388059701494, "grad_norm": 17.353851318359375, "learning_rate": 9.573498964803314e-06, "loss": 36.4195, "step": 9241 }, { "epoch": 220.04776119402985, "grad_norm": 15.174515724182129, "learning_rate": 9.572463768115942e-06, "loss": 36.7379, "step": 9242 }, { "epoch": 220.07164179104478, "grad_norm": 17.7712459564209, "learning_rate": 9.571428571428573e-06, "loss": 36.1797, "step": 9243 }, { "epoch": 220.0955223880597, "grad_norm": 15.105401039123535, "learning_rate": 9.570393374741201e-06, "loss": 36.7952, "step": 9244 }, { "epoch": 220.11940298507463, "grad_norm": 19.85262680053711, "learning_rate": 9.56935817805383e-06, "loss": 37.2235, "step": 9245 }, { "epoch": 220.14328358208957, "grad_norm": 21.23208236694336, "learning_rate": 9.56832298136646e-06, "loss": 36.532, "step": 9246 }, { "epoch": 220.16716417910447, "grad_norm": 16.253814697265625, "learning_rate": 9.56728778467909e-06, "loss": 36.9258, "step": 9247 }, { "epoch": 220.1910447761194, "grad_norm": 20.50847625732422, "learning_rate": 9.56625258799172e-06, "loss": 36.7899, "step": 9248 }, { "epoch": 220.21492537313432, "grad_norm": 15.544790267944336, "learning_rate": 9.565217391304349e-06, "loss": 37.627, "step": 9249 }, { "epoch": 220.23880597014926, "grad_norm": 15.568458557128906, "learning_rate": 9.564182194616978e-06, "loss": 37.1097, "step": 9250 }, { "epoch": 220.26268656716417, "grad_norm": 13.180426597595215, "learning_rate": 9.563146997929608e-06, "loss": 37.0846, "step": 9251 }, { "epoch": 220.2865671641791, "grad_norm": 14.081692695617676, "learning_rate": 9.562111801242237e-06, "loss": 36.5627, "step": 9252 }, { "epoch": 220.31044776119404, "grad_norm": 16.696626663208008, "learning_rate": 9.561076604554867e-06, "loss": 35.5503, "step": 9253 }, { "epoch": 220.33432835820895, "grad_norm": 22.202970504760742, "learning_rate": 9.560041407867495e-06, "loss": 35.8487, "step": 9254 }, { "epoch": 220.3582089552239, "grad_norm": 13.861478805541992, "learning_rate": 9.559006211180126e-06, "loss": 37.0773, "step": 9255 }, { "epoch": 220.3820895522388, "grad_norm": 23.343826293945312, "learning_rate": 9.557971014492754e-06, "loss": 37.297, "step": 9256 }, { "epoch": 220.40597014925373, "grad_norm": 19.788761138916016, "learning_rate": 9.556935817805383e-06, "loss": 36.5872, "step": 9257 }, { "epoch": 220.42985074626867, "grad_norm": 14.769845008850098, "learning_rate": 9.555900621118013e-06, "loss": 37.4659, "step": 9258 }, { "epoch": 220.45373134328358, "grad_norm": 25.088499069213867, "learning_rate": 9.554865424430642e-06, "loss": 36.4183, "step": 9259 }, { "epoch": 220.47761194029852, "grad_norm": 19.144893646240234, "learning_rate": 9.553830227743272e-06, "loss": 36.895, "step": 9260 }, { "epoch": 220.50149253731342, "grad_norm": 16.255157470703125, "learning_rate": 9.552795031055901e-06, "loss": 36.1986, "step": 9261 }, { "epoch": 220.52537313432836, "grad_norm": 30.13691520690918, "learning_rate": 9.551759834368531e-06, "loss": 36.4725, "step": 9262 }, { "epoch": 220.54925373134327, "grad_norm": 16.886524200439453, "learning_rate": 9.55072463768116e-06, "loss": 35.1262, "step": 9263 }, { "epoch": 220.5731343283582, "grad_norm": 40.92776107788086, "learning_rate": 9.54968944099379e-06, "loss": 36.4841, "step": 9264 }, { "epoch": 220.59701492537314, "grad_norm": 29.202791213989258, "learning_rate": 9.54865424430642e-06, "loss": 37.9052, "step": 9265 }, { "epoch": 220.62089552238805, "grad_norm": 39.013118743896484, "learning_rate": 9.547619047619049e-06, "loss": 36.8797, "step": 9266 }, { "epoch": 220.644776119403, "grad_norm": 37.325225830078125, "learning_rate": 9.546583850931679e-06, "loss": 36.3243, "step": 9267 }, { "epoch": 220.6686567164179, "grad_norm": 24.062450408935547, "learning_rate": 9.545548654244308e-06, "loss": 37.3711, "step": 9268 }, { "epoch": 220.69253731343284, "grad_norm": 25.628732681274414, "learning_rate": 9.544513457556936e-06, "loss": 36.9699, "step": 9269 }, { "epoch": 220.71641791044777, "grad_norm": 31.478517532348633, "learning_rate": 9.543478260869567e-06, "loss": 36.9297, "step": 9270 }, { "epoch": 220.74029850746268, "grad_norm": 22.91040802001953, "learning_rate": 9.542443064182195e-06, "loss": 36.687, "step": 9271 }, { "epoch": 220.76417910447762, "grad_norm": 36.85102462768555, "learning_rate": 9.541407867494824e-06, "loss": 36.3917, "step": 9272 }, { "epoch": 220.78805970149253, "grad_norm": 33.397098541259766, "learning_rate": 9.540372670807454e-06, "loss": 36.1614, "step": 9273 }, { "epoch": 220.81194029850747, "grad_norm": 28.137874603271484, "learning_rate": 9.539337474120084e-06, "loss": 37.2919, "step": 9274 }, { "epoch": 220.83582089552237, "grad_norm": 26.61577796936035, "learning_rate": 9.538302277432713e-06, "loss": 36.4024, "step": 9275 }, { "epoch": 220.8597014925373, "grad_norm": 27.809036254882812, "learning_rate": 9.537267080745343e-06, "loss": 36.6629, "step": 9276 }, { "epoch": 220.88358208955225, "grad_norm": 25.116004943847656, "learning_rate": 9.536231884057972e-06, "loss": 36.7077, "step": 9277 }, { "epoch": 220.90746268656716, "grad_norm": 32.13947677612305, "learning_rate": 9.535196687370602e-06, "loss": 37.3892, "step": 9278 }, { "epoch": 220.9313432835821, "grad_norm": 28.722021102905273, "learning_rate": 9.53416149068323e-06, "loss": 38.0002, "step": 9279 }, { "epoch": 220.955223880597, "grad_norm": 29.87248992919922, "learning_rate": 9.53312629399586e-06, "loss": 37.0684, "step": 9280 }, { "epoch": 220.97910447761194, "grad_norm": 25.427030563354492, "learning_rate": 9.532091097308489e-06, "loss": 36.559, "step": 9281 }, { "epoch": 221.0, "grad_norm": 28.555543899536133, "learning_rate": 9.53105590062112e-06, "loss": 31.9309, "step": 9282 }, { "epoch": 221.02388059701494, "grad_norm": 28.924266815185547, "learning_rate": 9.530020703933748e-06, "loss": 36.5134, "step": 9283 }, { "epoch": 221.04776119402985, "grad_norm": 27.023439407348633, "learning_rate": 9.528985507246377e-06, "loss": 36.8401, "step": 9284 }, { "epoch": 221.07164179104478, "grad_norm": 25.894798278808594, "learning_rate": 9.527950310559007e-06, "loss": 36.8202, "step": 9285 }, { "epoch": 221.0955223880597, "grad_norm": 31.44702911376953, "learning_rate": 9.526915113871636e-06, "loss": 36.7885, "step": 9286 }, { "epoch": 221.11940298507463, "grad_norm": 24.80626678466797, "learning_rate": 9.525879917184266e-06, "loss": 38.347, "step": 9287 }, { "epoch": 221.14328358208957, "grad_norm": 31.647432327270508, "learning_rate": 9.524844720496895e-06, "loss": 36.9638, "step": 9288 }, { "epoch": 221.16716417910447, "grad_norm": 27.200225830078125, "learning_rate": 9.523809523809525e-06, "loss": 37.1524, "step": 9289 }, { "epoch": 221.1910447761194, "grad_norm": 28.28724479675293, "learning_rate": 9.522774327122154e-06, "loss": 36.547, "step": 9290 }, { "epoch": 221.21492537313432, "grad_norm": 28.738943099975586, "learning_rate": 9.521739130434784e-06, "loss": 36.8349, "step": 9291 }, { "epoch": 221.23880597014926, "grad_norm": 29.01047134399414, "learning_rate": 9.520703933747413e-06, "loss": 35.6371, "step": 9292 }, { "epoch": 221.26268656716417, "grad_norm": 25.44609260559082, "learning_rate": 9.519668737060043e-06, "loss": 37.8431, "step": 9293 }, { "epoch": 221.2865671641791, "grad_norm": 30.137645721435547, "learning_rate": 9.51863354037267e-06, "loss": 37.2189, "step": 9294 }, { "epoch": 221.31044776119404, "grad_norm": 26.297382354736328, "learning_rate": 9.517598343685302e-06, "loss": 35.8506, "step": 9295 }, { "epoch": 221.33432835820895, "grad_norm": 30.275667190551758, "learning_rate": 9.51656314699793e-06, "loss": 36.866, "step": 9296 }, { "epoch": 221.3582089552239, "grad_norm": 27.415193557739258, "learning_rate": 9.515527950310561e-06, "loss": 35.4461, "step": 9297 }, { "epoch": 221.3820895522388, "grad_norm": 30.012296676635742, "learning_rate": 9.514492753623189e-06, "loss": 36.361, "step": 9298 }, { "epoch": 221.40597014925373, "grad_norm": 25.366943359375, "learning_rate": 9.513457556935818e-06, "loss": 35.9706, "step": 9299 }, { "epoch": 221.42985074626867, "grad_norm": 28.098432540893555, "learning_rate": 9.512422360248448e-06, "loss": 36.8511, "step": 9300 }, { "epoch": 221.45373134328358, "grad_norm": 27.954959869384766, "learning_rate": 9.511387163561077e-06, "loss": 37.4078, "step": 9301 }, { "epoch": 221.47761194029852, "grad_norm": 30.06001091003418, "learning_rate": 9.510351966873707e-06, "loss": 37.7406, "step": 9302 }, { "epoch": 221.50149253731342, "grad_norm": 25.211225509643555, "learning_rate": 9.509316770186336e-06, "loss": 37.0598, "step": 9303 }, { "epoch": 221.52537313432836, "grad_norm": 28.45794105529785, "learning_rate": 9.508281573498966e-06, "loss": 36.447, "step": 9304 }, { "epoch": 221.54925373134327, "grad_norm": 26.013484954833984, "learning_rate": 9.507246376811596e-06, "loss": 36.8887, "step": 9305 }, { "epoch": 221.5731343283582, "grad_norm": 28.581687927246094, "learning_rate": 9.506211180124223e-06, "loss": 35.7594, "step": 9306 }, { "epoch": 221.59701492537314, "grad_norm": 25.98162078857422, "learning_rate": 9.505175983436855e-06, "loss": 36.6488, "step": 9307 }, { "epoch": 221.62089552238805, "grad_norm": 29.857873916625977, "learning_rate": 9.504140786749482e-06, "loss": 36.5665, "step": 9308 }, { "epoch": 221.644776119403, "grad_norm": 25.796464920043945, "learning_rate": 9.503105590062112e-06, "loss": 37.4074, "step": 9309 }, { "epoch": 221.6686567164179, "grad_norm": 29.17536735534668, "learning_rate": 9.502070393374741e-06, "loss": 36.9913, "step": 9310 }, { "epoch": 221.69253731343284, "grad_norm": 28.136606216430664, "learning_rate": 9.501035196687371e-06, "loss": 35.9862, "step": 9311 }, { "epoch": 221.71641791044777, "grad_norm": 30.56552505493164, "learning_rate": 9.5e-06, "loss": 35.5431, "step": 9312 }, { "epoch": 221.74029850746268, "grad_norm": 24.548416137695312, "learning_rate": 9.49896480331263e-06, "loss": 36.7893, "step": 9313 }, { "epoch": 221.76417910447762, "grad_norm": 30.45704460144043, "learning_rate": 9.49792960662526e-06, "loss": 37.2092, "step": 9314 }, { "epoch": 221.78805970149253, "grad_norm": NaN, "learning_rate": 9.496894409937889e-06, "loss": 59.5099, "step": 9315 }, { "epoch": 221.81194029850747, "grad_norm": 25.80453109741211, "learning_rate": 9.496894409937889e-06, "loss": 36.6871, "step": 9316 }, { "epoch": 221.83582089552237, "grad_norm": 28.29366683959961, "learning_rate": 9.495859213250519e-06, "loss": 37.6914, "step": 9317 }, { "epoch": 221.8597014925373, "grad_norm": 27.410236358642578, "learning_rate": 9.494824016563148e-06, "loss": 36.4577, "step": 9318 }, { "epoch": 221.88358208955225, "grad_norm": 29.258834838867188, "learning_rate": 9.493788819875778e-06, "loss": 36.2599, "step": 9319 }, { "epoch": 221.90746268656716, "grad_norm": 27.47816276550293, "learning_rate": 9.492753623188407e-06, "loss": 35.9362, "step": 9320 }, { "epoch": 221.9313432835821, "grad_norm": 32.89609146118164, "learning_rate": 9.491718426501037e-06, "loss": 37.7435, "step": 9321 }, { "epoch": 221.955223880597, "grad_norm": 28.55097198486328, "learning_rate": 9.490683229813665e-06, "loss": 36.0103, "step": 9322 }, { "epoch": 221.97910447761194, "grad_norm": 23.855192184448242, "learning_rate": 9.489648033126296e-06, "loss": 36.576, "step": 9323 }, { "epoch": 222.0, "grad_norm": 21.09449577331543, "learning_rate": 9.488612836438924e-06, "loss": 31.4365, "step": 9324 }, { "epoch": 222.02388059701494, "grad_norm": 32.309261322021484, "learning_rate": 9.487577639751553e-06, "loss": 37.0288, "step": 9325 }, { "epoch": 222.04776119402985, "grad_norm": 26.1121768951416, "learning_rate": 9.486542443064183e-06, "loss": 36.8737, "step": 9326 }, { "epoch": 222.07164179104478, "grad_norm": 34.31647491455078, "learning_rate": 9.485507246376812e-06, "loss": 35.9067, "step": 9327 }, { "epoch": 222.0955223880597, "grad_norm": 32.59259033203125, "learning_rate": 9.484472049689442e-06, "loss": 36.4487, "step": 9328 }, { "epoch": 222.11940298507463, "grad_norm": 22.975914001464844, "learning_rate": 9.483436853002071e-06, "loss": 36.6547, "step": 9329 }, { "epoch": 222.14328358208957, "grad_norm": 21.199647903442383, "learning_rate": 9.4824016563147e-06, "loss": 37.0391, "step": 9330 }, { "epoch": 222.16716417910447, "grad_norm": 29.940921783447266, "learning_rate": 9.48136645962733e-06, "loss": 36.1474, "step": 9331 }, { "epoch": 222.1910447761194, "grad_norm": 24.6529483795166, "learning_rate": 9.48033126293996e-06, "loss": 36.8716, "step": 9332 }, { "epoch": 222.21492537313432, "grad_norm": 32.249847412109375, "learning_rate": 9.47929606625259e-06, "loss": 37.5204, "step": 9333 }, { "epoch": 222.23880597014926, "grad_norm": 29.692399978637695, "learning_rate": 9.478260869565217e-06, "loss": 35.7068, "step": 9334 }, { "epoch": 222.26268656716417, "grad_norm": 23.01384162902832, "learning_rate": 9.477225672877848e-06, "loss": 37.0796, "step": 9335 }, { "epoch": 222.2865671641791, "grad_norm": 22.15422821044922, "learning_rate": 9.476190476190476e-06, "loss": 38.0209, "step": 9336 }, { "epoch": 222.31044776119404, "grad_norm": 28.740741729736328, "learning_rate": 9.475155279503106e-06, "loss": 36.7489, "step": 9337 }, { "epoch": 222.33432835820895, "grad_norm": 21.39152717590332, "learning_rate": 9.474120082815735e-06, "loss": 37.9498, "step": 9338 }, { "epoch": 222.3582089552239, "grad_norm": 31.728282928466797, "learning_rate": 9.473084886128365e-06, "loss": 37.0965, "step": 9339 }, { "epoch": 222.3820895522388, "grad_norm": 26.833126068115234, "learning_rate": 9.472049689440994e-06, "loss": 37.4387, "step": 9340 }, { "epoch": 222.40597014925373, "grad_norm": 27.517566680908203, "learning_rate": 9.471014492753624e-06, "loss": 36.3681, "step": 9341 }, { "epoch": 222.42985074626867, "grad_norm": 25.988306045532227, "learning_rate": 9.469979296066253e-06, "loss": 36.96, "step": 9342 }, { "epoch": 222.45373134328358, "grad_norm": 27.119274139404297, "learning_rate": 9.468944099378883e-06, "loss": 36.1979, "step": 9343 }, { "epoch": 222.47761194029852, "grad_norm": 21.548036575317383, "learning_rate": 9.46790890269151e-06, "loss": 37.0174, "step": 9344 }, { "epoch": 222.50149253731342, "grad_norm": 32.375579833984375, "learning_rate": 9.466873706004142e-06, "loss": 36.4319, "step": 9345 }, { "epoch": 222.52537313432836, "grad_norm": 25.519662857055664, "learning_rate": 9.465838509316772e-06, "loss": 36.6505, "step": 9346 }, { "epoch": 222.54925373134327, "grad_norm": 30.985382080078125, "learning_rate": 9.464803312629401e-06, "loss": 36.7131, "step": 9347 }, { "epoch": 222.5731343283582, "grad_norm": 26.79625701904297, "learning_rate": 9.46376811594203e-06, "loss": 37.1961, "step": 9348 }, { "epoch": 222.59701492537314, "grad_norm": 25.46737289428711, "learning_rate": 9.462732919254658e-06, "loss": 36.6815, "step": 9349 }, { "epoch": 222.62089552238805, "grad_norm": 22.190353393554688, "learning_rate": 9.46169772256729e-06, "loss": 35.9443, "step": 9350 }, { "epoch": 222.644776119403, "grad_norm": 27.605209350585938, "learning_rate": 9.460662525879918e-06, "loss": 36.8895, "step": 9351 }, { "epoch": 222.6686567164179, "grad_norm": 22.50598907470703, "learning_rate": 9.459627329192547e-06, "loss": 37.0562, "step": 9352 }, { "epoch": 222.69253731343284, "grad_norm": 30.742273330688477, "learning_rate": 9.458592132505177e-06, "loss": 34.8006, "step": 9353 }, { "epoch": 222.71641791044777, "grad_norm": 27.609203338623047, "learning_rate": 9.457556935817806e-06, "loss": 36.096, "step": 9354 }, { "epoch": 222.74029850746268, "grad_norm": 25.80821990966797, "learning_rate": 9.456521739130436e-06, "loss": 35.8656, "step": 9355 }, { "epoch": 222.76417910447762, "grad_norm": 24.429798126220703, "learning_rate": 9.455486542443065e-06, "loss": 35.7282, "step": 9356 }, { "epoch": 222.78805970149253, "grad_norm": 26.56473159790039, "learning_rate": 9.454451345755695e-06, "loss": 35.7349, "step": 9357 }, { "epoch": 222.81194029850747, "grad_norm": 23.048847198486328, "learning_rate": 9.453416149068324e-06, "loss": 37.8195, "step": 9358 }, { "epoch": 222.83582089552237, "grad_norm": 25.443201065063477, "learning_rate": 9.452380952380952e-06, "loss": 36.0767, "step": 9359 }, { "epoch": 222.8597014925373, "grad_norm": 21.29392433166504, "learning_rate": 9.451345755693583e-06, "loss": 36.912, "step": 9360 }, { "epoch": 222.88358208955225, "grad_norm": 27.57860565185547, "learning_rate": 9.450310559006211e-06, "loss": 36.4075, "step": 9361 }, { "epoch": 222.90746268656716, "grad_norm": 24.445480346679688, "learning_rate": 9.449275362318842e-06, "loss": 36.4575, "step": 9362 }, { "epoch": 222.9313432835821, "grad_norm": 29.19906997680664, "learning_rate": 9.44824016563147e-06, "loss": 37.9941, "step": 9363 }, { "epoch": 222.955223880597, "grad_norm": 26.642549514770508, "learning_rate": 9.4472049689441e-06, "loss": 36.3679, "step": 9364 }, { "epoch": 222.97910447761194, "grad_norm": 21.054933547973633, "learning_rate": 9.44616977225673e-06, "loss": 35.4231, "step": 9365 }, { "epoch": 223.0, "grad_norm": 19.74691390991211, "learning_rate": 9.445134575569359e-06, "loss": 33.1747, "step": 9366 }, { "epoch": 223.02388059701494, "grad_norm": 23.323965072631836, "learning_rate": 9.444099378881988e-06, "loss": 36.345, "step": 9367 }, { "epoch": 223.04776119402985, "grad_norm": 17.254398345947266, "learning_rate": 9.443064182194618e-06, "loss": 35.4009, "step": 9368 }, { "epoch": 223.07164179104478, "grad_norm": 24.655067443847656, "learning_rate": 9.442028985507247e-06, "loss": 35.2186, "step": 9369 }, { "epoch": 223.0955223880597, "grad_norm": 18.97420883178711, "learning_rate": 9.440993788819877e-06, "loss": 37.9157, "step": 9370 }, { "epoch": 223.11940298507463, "grad_norm": 24.617870330810547, "learning_rate": 9.439958592132505e-06, "loss": 37.5063, "step": 9371 }, { "epoch": 223.14328358208957, "grad_norm": 23.071441650390625, "learning_rate": 9.438923395445136e-06, "loss": 37.1021, "step": 9372 }, { "epoch": 223.16716417910447, "grad_norm": 23.001476287841797, "learning_rate": 9.437888198757765e-06, "loss": 36.4769, "step": 9373 }, { "epoch": 223.1910447761194, "grad_norm": 21.526748657226562, "learning_rate": 9.436853002070393e-06, "loss": 36.0368, "step": 9374 }, { "epoch": 223.21492537313432, "grad_norm": 22.32477378845215, "learning_rate": 9.435817805383025e-06, "loss": 37.367, "step": 9375 }, { "epoch": 223.23880597014926, "grad_norm": 19.45122718811035, "learning_rate": 9.434782608695652e-06, "loss": 37.2365, "step": 9376 }, { "epoch": 223.26268656716417, "grad_norm": 23.752643585205078, "learning_rate": 9.433747412008284e-06, "loss": 37.151, "step": 9377 }, { "epoch": 223.2865671641791, "grad_norm": 18.8602237701416, "learning_rate": 9.432712215320911e-06, "loss": 36.614, "step": 9378 }, { "epoch": 223.31044776119404, "grad_norm": 22.16071319580078, "learning_rate": 9.431677018633541e-06, "loss": 36.9328, "step": 9379 }, { "epoch": 223.33432835820895, "grad_norm": 21.07900619506836, "learning_rate": 9.43064182194617e-06, "loss": 35.7809, "step": 9380 }, { "epoch": 223.3582089552239, "grad_norm": 24.552772521972656, "learning_rate": 9.4296066252588e-06, "loss": 35.3386, "step": 9381 }, { "epoch": 223.3820895522388, "grad_norm": 22.024473190307617, "learning_rate": 9.42857142857143e-06, "loss": 37.6294, "step": 9382 }, { "epoch": 223.40597014925373, "grad_norm": 27.61777114868164, "learning_rate": 9.427536231884059e-06, "loss": 36.9125, "step": 9383 }, { "epoch": 223.42985074626867, "grad_norm": 22.7742919921875, "learning_rate": 9.426501035196689e-06, "loss": 38.2719, "step": 9384 }, { "epoch": 223.45373134328358, "grad_norm": NaN, "learning_rate": 9.425465838509318e-06, "loss": 32.1527, "step": 9385 }, { "epoch": 223.47761194029852, "grad_norm": 20.231372833251953, "learning_rate": 9.425465838509318e-06, "loss": 36.6002, "step": 9386 }, { "epoch": 223.50149253731342, "grad_norm": 20.850204467773438, "learning_rate": 9.424430641821946e-06, "loss": 37.3469, "step": 9387 }, { "epoch": 223.52537313432836, "grad_norm": 17.099939346313477, "learning_rate": 9.423395445134577e-06, "loss": 35.4745, "step": 9388 }, { "epoch": 223.54925373134327, "grad_norm": 20.98678970336914, "learning_rate": 9.422360248447205e-06, "loss": 36.0333, "step": 9389 }, { "epoch": 223.5731343283582, "grad_norm": 17.527341842651367, "learning_rate": 9.421325051759835e-06, "loss": 37.9387, "step": 9390 }, { "epoch": 223.59701492537314, "grad_norm": 19.38433265686035, "learning_rate": 9.420289855072464e-06, "loss": 37.2279, "step": 9391 }, { "epoch": 223.62089552238805, "grad_norm": 15.133399963378906, "learning_rate": 9.419254658385094e-06, "loss": 36.4897, "step": 9392 }, { "epoch": 223.644776119403, "grad_norm": 16.894901275634766, "learning_rate": 9.418219461697723e-06, "loss": 36.3029, "step": 9393 }, { "epoch": 223.6686567164179, "grad_norm": 16.413881301879883, "learning_rate": 9.417184265010353e-06, "loss": 36.3818, "step": 9394 }, { "epoch": 223.69253731343284, "grad_norm": 15.037392616271973, "learning_rate": 9.416149068322982e-06, "loss": 36.4857, "step": 9395 }, { "epoch": 223.71641791044777, "grad_norm": 15.291950225830078, "learning_rate": 9.415113871635612e-06, "loss": 37.1058, "step": 9396 }, { "epoch": 223.74029850746268, "grad_norm": 15.040635108947754, "learning_rate": 9.414078674948241e-06, "loss": 35.7792, "step": 9397 }, { "epoch": 223.76417910447762, "grad_norm": 15.97356128692627, "learning_rate": 9.41304347826087e-06, "loss": 36.0541, "step": 9398 }, { "epoch": 223.78805970149253, "grad_norm": 15.278190612792969, "learning_rate": 9.412008281573499e-06, "loss": 37.2379, "step": 9399 }, { "epoch": 223.81194029850747, "grad_norm": 21.481409072875977, "learning_rate": 9.41097308488613e-06, "loss": 37.0654, "step": 9400 }, { "epoch": 223.83582089552237, "grad_norm": 15.87094783782959, "learning_rate": 9.40993788819876e-06, "loss": 36.9308, "step": 9401 }, { "epoch": 223.8597014925373, "grad_norm": 17.718656539916992, "learning_rate": 9.408902691511387e-06, "loss": 37.1585, "step": 9402 }, { "epoch": 223.88358208955225, "grad_norm": 16.742095947265625, "learning_rate": 9.407867494824018e-06, "loss": 37.4013, "step": 9403 }, { "epoch": 223.90746268656716, "grad_norm": 16.419233322143555, "learning_rate": 9.406832298136646e-06, "loss": 34.8722, "step": 9404 }, { "epoch": 223.9313432835821, "grad_norm": 19.157041549682617, "learning_rate": 9.405797101449276e-06, "loss": 36.4444, "step": 9405 }, { "epoch": 223.955223880597, "grad_norm": 16.145048141479492, "learning_rate": 9.404761904761905e-06, "loss": 36.0074, "step": 9406 }, { "epoch": 223.97910447761194, "grad_norm": 22.667598724365234, "learning_rate": 9.403726708074535e-06, "loss": 36.9183, "step": 9407 }, { "epoch": 224.0, "grad_norm": 16.207134246826172, "learning_rate": 9.402691511387164e-06, "loss": 31.4937, "step": 9408 }, { "epoch": 224.02388059701494, "grad_norm": 18.85007095336914, "learning_rate": 9.401656314699794e-06, "loss": 37.0026, "step": 9409 }, { "epoch": 224.04776119402985, "grad_norm": 19.203710556030273, "learning_rate": 9.400621118012423e-06, "loss": 35.727, "step": 9410 }, { "epoch": 224.07164179104478, "grad_norm": 17.155635833740234, "learning_rate": 9.399585921325053e-06, "loss": 37.0265, "step": 9411 }, { "epoch": 224.0955223880597, "grad_norm": 19.872440338134766, "learning_rate": 9.398550724637682e-06, "loss": 36.5634, "step": 9412 }, { "epoch": 224.11940298507463, "grad_norm": 18.595470428466797, "learning_rate": 9.397515527950312e-06, "loss": 38.2642, "step": 9413 }, { "epoch": 224.14328358208957, "grad_norm": 16.608049392700195, "learning_rate": 9.39648033126294e-06, "loss": 35.9693, "step": 9414 }, { "epoch": 224.16716417910447, "grad_norm": 17.762571334838867, "learning_rate": 9.395445134575571e-06, "loss": 37.0019, "step": 9415 }, { "epoch": 224.1910447761194, "grad_norm": 17.047176361083984, "learning_rate": 9.394409937888199e-06, "loss": 36.5651, "step": 9416 }, { "epoch": 224.21492537313432, "grad_norm": 18.56181526184082, "learning_rate": 9.393374741200828e-06, "loss": 36.5981, "step": 9417 }, { "epoch": 224.23880597014926, "grad_norm": 14.048553466796875, "learning_rate": 9.392339544513458e-06, "loss": 36.8729, "step": 9418 }, { "epoch": 224.26268656716417, "grad_norm": 18.872583389282227, "learning_rate": 9.391304347826087e-06, "loss": 37.0848, "step": 9419 }, { "epoch": 224.2865671641791, "grad_norm": 16.786888122558594, "learning_rate": 9.390269151138717e-06, "loss": 36.495, "step": 9420 }, { "epoch": 224.31044776119404, "grad_norm": 17.79970359802246, "learning_rate": 9.389233954451347e-06, "loss": 37.1684, "step": 9421 }, { "epoch": 224.33432835820895, "grad_norm": 13.249444007873535, "learning_rate": 9.388198757763976e-06, "loss": 35.6529, "step": 9422 }, { "epoch": 224.3582089552239, "grad_norm": 21.127038955688477, "learning_rate": 9.387163561076606e-06, "loss": 36.3109, "step": 9423 }, { "epoch": 224.3820895522388, "grad_norm": 17.2930908203125, "learning_rate": 9.386128364389233e-06, "loss": 36.643, "step": 9424 }, { "epoch": 224.40597014925373, "grad_norm": 19.54315185546875, "learning_rate": 9.385093167701865e-06, "loss": 36.7007, "step": 9425 }, { "epoch": 224.42985074626867, "grad_norm": 17.682388305664062, "learning_rate": 9.384057971014492e-06, "loss": 35.7651, "step": 9426 }, { "epoch": 224.45373134328358, "grad_norm": 18.735719680786133, "learning_rate": 9.383022774327124e-06, "loss": 35.9602, "step": 9427 }, { "epoch": 224.47761194029852, "grad_norm": 16.567495346069336, "learning_rate": 9.381987577639753e-06, "loss": 35.8212, "step": 9428 }, { "epoch": 224.50149253731342, "grad_norm": 18.95294189453125, "learning_rate": 9.380952380952381e-06, "loss": 36.5769, "step": 9429 }, { "epoch": 224.52537313432836, "grad_norm": 21.27597427368164, "learning_rate": 9.379917184265012e-06, "loss": 36.8463, "step": 9430 }, { "epoch": 224.54925373134327, "grad_norm": 15.662125587463379, "learning_rate": 9.37888198757764e-06, "loss": 36.874, "step": 9431 }, { "epoch": 224.5731343283582, "grad_norm": 18.717649459838867, "learning_rate": 9.37784679089027e-06, "loss": 36.1701, "step": 9432 }, { "epoch": 224.59701492537314, "grad_norm": 15.020418167114258, "learning_rate": 9.3768115942029e-06, "loss": 35.3557, "step": 9433 }, { "epoch": 224.62089552238805, "grad_norm": 21.474580764770508, "learning_rate": 9.375776397515529e-06, "loss": 36.4692, "step": 9434 }, { "epoch": 224.644776119403, "grad_norm": 15.604917526245117, "learning_rate": 9.374741200828158e-06, "loss": 35.8736, "step": 9435 }, { "epoch": 224.6686567164179, "grad_norm": 20.360532760620117, "learning_rate": 9.373706004140788e-06, "loss": 36.359, "step": 9436 }, { "epoch": 224.69253731343284, "grad_norm": 17.218374252319336, "learning_rate": 9.372670807453417e-06, "loss": 37.9752, "step": 9437 }, { "epoch": 224.71641791044777, "grad_norm": 20.179065704345703, "learning_rate": 9.371635610766047e-06, "loss": 36.7497, "step": 9438 }, { "epoch": 224.74029850746268, "grad_norm": 16.89761734008789, "learning_rate": 9.370600414078675e-06, "loss": 36.1195, "step": 9439 }, { "epoch": 224.76417910447762, "grad_norm": 20.876089096069336, "learning_rate": 9.369565217391306e-06, "loss": 37.1461, "step": 9440 }, { "epoch": 224.78805970149253, "grad_norm": 20.17327880859375, "learning_rate": 9.368530020703934e-06, "loss": 35.7237, "step": 9441 }, { "epoch": 224.81194029850747, "grad_norm": 17.4796085357666, "learning_rate": 9.367494824016565e-06, "loss": 38.5719, "step": 9442 }, { "epoch": 224.83582089552237, "grad_norm": 18.749671936035156, "learning_rate": 9.366459627329193e-06, "loss": 37.7713, "step": 9443 }, { "epoch": 224.8597014925373, "grad_norm": 18.992996215820312, "learning_rate": 9.365424430641822e-06, "loss": 37.2909, "step": 9444 }, { "epoch": 224.88358208955225, "grad_norm": 14.612916946411133, "learning_rate": 9.364389233954452e-06, "loss": 37.2268, "step": 9445 }, { "epoch": 224.90746268656716, "grad_norm": 14.41982650756836, "learning_rate": 9.363354037267081e-06, "loss": 36.3542, "step": 9446 }, { "epoch": 224.9313432835821, "grad_norm": 14.91037654876709, "learning_rate": 9.362318840579711e-06, "loss": 36.4081, "step": 9447 }, { "epoch": 224.955223880597, "grad_norm": 15.922717094421387, "learning_rate": 9.36128364389234e-06, "loss": 36.5212, "step": 9448 }, { "epoch": 224.97910447761194, "grad_norm": 18.122549057006836, "learning_rate": 9.36024844720497e-06, "loss": 36.5235, "step": 9449 }, { "epoch": 225.0, "grad_norm": 15.519862174987793, "learning_rate": 9.3592132505176e-06, "loss": 31.8674, "step": 9450 }, { "epoch": 225.02388059701494, "grad_norm": NaN, "learning_rate": 9.358178053830227e-06, "loss": 35.3011, "step": 9451 }, { "epoch": 225.04776119402985, "grad_norm": 17.2370662689209, "learning_rate": 9.358178053830227e-06, "loss": 36.4703, "step": 9452 }, { "epoch": 225.07164179104478, "grad_norm": 14.530112266540527, "learning_rate": 9.357142857142859e-06, "loss": 37.5439, "step": 9453 }, { "epoch": 225.0955223880597, "grad_norm": 17.49200439453125, "learning_rate": 9.356107660455486e-06, "loss": 35.407, "step": 9454 }, { "epoch": 225.11940298507463, "grad_norm": 19.838918685913086, "learning_rate": 9.355072463768116e-06, "loss": 36.3665, "step": 9455 }, { "epoch": 225.14328358208957, "grad_norm": 15.36133098602295, "learning_rate": 9.354037267080745e-06, "loss": 37.52, "step": 9456 }, { "epoch": 225.16716417910447, "grad_norm": 25.876646041870117, "learning_rate": 9.353002070393375e-06, "loss": 35.3112, "step": 9457 }, { "epoch": 225.1910447761194, "grad_norm": 18.423568725585938, "learning_rate": 9.351966873706006e-06, "loss": 37.3666, "step": 9458 }, { "epoch": 225.21492537313432, "grad_norm": 22.096370697021484, "learning_rate": 9.350931677018634e-06, "loss": 36.9002, "step": 9459 }, { "epoch": 225.23880597014926, "grad_norm": 20.92823028564453, "learning_rate": 9.349896480331264e-06, "loss": 36.2065, "step": 9460 }, { "epoch": 225.26268656716417, "grad_norm": 17.686012268066406, "learning_rate": 9.348861283643893e-06, "loss": 36.1261, "step": 9461 }, { "epoch": 225.2865671641791, "grad_norm": 13.867901802062988, "learning_rate": 9.347826086956523e-06, "loss": 36.382, "step": 9462 }, { "epoch": 225.31044776119404, "grad_norm": 17.950428009033203, "learning_rate": 9.346790890269152e-06, "loss": 35.615, "step": 9463 }, { "epoch": 225.33432835820895, "grad_norm": 20.58045768737793, "learning_rate": 9.345755693581782e-06, "loss": 36.307, "step": 9464 }, { "epoch": 225.3582089552239, "grad_norm": 16.847736358642578, "learning_rate": 9.344720496894411e-06, "loss": 36.6786, "step": 9465 }, { "epoch": 225.3820895522388, "grad_norm": 21.384674072265625, "learning_rate": 9.34368530020704e-06, "loss": 36.6836, "step": 9466 }, { "epoch": 225.40597014925373, "grad_norm": 20.075319290161133, "learning_rate": 9.342650103519669e-06, "loss": 36.1683, "step": 9467 }, { "epoch": 225.42985074626867, "grad_norm": 17.858158111572266, "learning_rate": 9.3416149068323e-06, "loss": 37.2771, "step": 9468 }, { "epoch": 225.45373134328358, "grad_norm": 13.715435981750488, "learning_rate": 9.340579710144928e-06, "loss": 35.7138, "step": 9469 }, { "epoch": 225.47761194029852, "grad_norm": 24.905656814575195, "learning_rate": 9.339544513457557e-06, "loss": 36.5885, "step": 9470 }, { "epoch": 225.50149253731342, "grad_norm": 16.43111801147461, "learning_rate": 9.338509316770187e-06, "loss": 36.8778, "step": 9471 }, { "epoch": 225.52537313432836, "grad_norm": 15.977991104125977, "learning_rate": 9.337474120082816e-06, "loss": 36.921, "step": 9472 }, { "epoch": 225.54925373134327, "grad_norm": 18.274497985839844, "learning_rate": 9.336438923395446e-06, "loss": 37.1489, "step": 9473 }, { "epoch": 225.5731343283582, "grad_norm": 18.14158821105957, "learning_rate": 9.335403726708075e-06, "loss": 35.983, "step": 9474 }, { "epoch": 225.59701492537314, "grad_norm": 18.77350425720215, "learning_rate": 9.334368530020705e-06, "loss": 36.4306, "step": 9475 }, { "epoch": 225.62089552238805, "grad_norm": 17.489553451538086, "learning_rate": 9.333333333333334e-06, "loss": 36.7718, "step": 9476 }, { "epoch": 225.644776119403, "grad_norm": 22.633970260620117, "learning_rate": 9.332298136645964e-06, "loss": 37.5032, "step": 9477 }, { "epoch": 225.6686567164179, "grad_norm": 13.850768089294434, "learning_rate": 9.331262939958593e-06, "loss": 37.5411, "step": 9478 }, { "epoch": 225.69253731343284, "grad_norm": 27.146482467651367, "learning_rate": 9.330227743271221e-06, "loss": 36.5295, "step": 9479 }, { "epoch": 225.71641791044777, "grad_norm": 21.86114501953125, "learning_rate": 9.329192546583852e-06, "loss": 36.4041, "step": 9480 }, { "epoch": 225.74029850746268, "grad_norm": 21.19093894958496, "learning_rate": 9.32815734989648e-06, "loss": 35.0877, "step": 9481 }, { "epoch": 225.76417910447762, "grad_norm": 22.90280532836914, "learning_rate": 9.32712215320911e-06, "loss": 36.268, "step": 9482 }, { "epoch": 225.78805970149253, "grad_norm": 18.020240783691406, "learning_rate": 9.32608695652174e-06, "loss": 34.9878, "step": 9483 }, { "epoch": 225.81194029850747, "grad_norm": 16.98219108581543, "learning_rate": 9.325051759834369e-06, "loss": 37.7387, "step": 9484 }, { "epoch": 225.83582089552237, "grad_norm": 19.392545700073242, "learning_rate": 9.324016563146998e-06, "loss": 37.0511, "step": 9485 }, { "epoch": 225.8597014925373, "grad_norm": 15.673796653747559, "learning_rate": 9.322981366459628e-06, "loss": 36.8313, "step": 9486 }, { "epoch": 225.88358208955225, "grad_norm": 18.076011657714844, "learning_rate": 9.321946169772257e-06, "loss": 37.5841, "step": 9487 }, { "epoch": 225.90746268656716, "grad_norm": 16.198062896728516, "learning_rate": 9.320910973084887e-06, "loss": 37.4417, "step": 9488 }, { "epoch": 225.9313432835821, "grad_norm": 26.08418846130371, "learning_rate": 9.319875776397517e-06, "loss": 36.6044, "step": 9489 }, { "epoch": 225.955223880597, "grad_norm": 18.208662033081055, "learning_rate": 9.318840579710146e-06, "loss": 37.0748, "step": 9490 }, { "epoch": 225.97910447761194, "grad_norm": 22.65439796447754, "learning_rate": 9.317805383022776e-06, "loss": 36.1861, "step": 9491 }, { "epoch": 226.0, "grad_norm": 22.45075225830078, "learning_rate": 9.316770186335405e-06, "loss": 32.8049, "step": 9492 }, { "epoch": 226.02388059701494, "grad_norm": 16.150846481323242, "learning_rate": 9.315734989648035e-06, "loss": 36.8926, "step": 9493 }, { "epoch": 226.04776119402985, "grad_norm": 25.77251434326172, "learning_rate": 9.314699792960662e-06, "loss": 37.1812, "step": 9494 }, { "epoch": 226.07164179104478, "grad_norm": 18.994468688964844, "learning_rate": 9.313664596273294e-06, "loss": 36.6646, "step": 9495 }, { "epoch": 226.0955223880597, "grad_norm": 21.6476993560791, "learning_rate": 9.312629399585922e-06, "loss": 37.6085, "step": 9496 }, { "epoch": 226.11940298507463, "grad_norm": 24.551372528076172, "learning_rate": 9.311594202898551e-06, "loss": 36.5801, "step": 9497 }, { "epoch": 226.14328358208957, "grad_norm": 17.199466705322266, "learning_rate": 9.31055900621118e-06, "loss": 35.884, "step": 9498 }, { "epoch": 226.16716417910447, "grad_norm": 29.144901275634766, "learning_rate": 9.30952380952381e-06, "loss": 35.4717, "step": 9499 }, { "epoch": 226.1910447761194, "grad_norm": 20.848114013671875, "learning_rate": 9.30848861283644e-06, "loss": 36.9175, "step": 9500 }, { "epoch": 226.21492537313432, "grad_norm": 32.51123809814453, "learning_rate": 9.30745341614907e-06, "loss": 37.4948, "step": 9501 }, { "epoch": 226.23880597014926, "grad_norm": 18.725080490112305, "learning_rate": 9.306418219461699e-06, "loss": 36.4376, "step": 9502 }, { "epoch": 226.26268656716417, "grad_norm": 34.88555145263672, "learning_rate": 9.305383022774328e-06, "loss": 37.9428, "step": 9503 }, { "epoch": 226.2865671641791, "grad_norm": 25.855121612548828, "learning_rate": 9.304347826086956e-06, "loss": 37.1728, "step": 9504 }, { "epoch": 226.31044776119404, "grad_norm": 37.347198486328125, "learning_rate": 9.303312629399587e-06, "loss": 35.5822, "step": 9505 }, { "epoch": 226.33432835820895, "grad_norm": 37.224613189697266, "learning_rate": 9.302277432712215e-06, "loss": 36.2776, "step": 9506 }, { "epoch": 226.3582089552239, "grad_norm": 23.242774963378906, "learning_rate": 9.301242236024846e-06, "loss": 36.7405, "step": 9507 }, { "epoch": 226.3820895522388, "grad_norm": 26.102792739868164, "learning_rate": 9.300207039337474e-06, "loss": 35.7835, "step": 9508 }, { "epoch": 226.40597014925373, "grad_norm": 28.836580276489258, "learning_rate": 9.299171842650104e-06, "loss": 37.6715, "step": 9509 }, { "epoch": 226.42985074626867, "grad_norm": 23.27029800415039, "learning_rate": 9.298136645962733e-06, "loss": 35.8731, "step": 9510 }, { "epoch": 226.45373134328358, "grad_norm": 35.15096664428711, "learning_rate": 9.297101449275363e-06, "loss": 37.1661, "step": 9511 }, { "epoch": 226.47761194029852, "grad_norm": 28.0610408782959, "learning_rate": 9.296066252587992e-06, "loss": 36.5984, "step": 9512 }, { "epoch": 226.50149253731342, "grad_norm": 33.2618293762207, "learning_rate": 9.295031055900622e-06, "loss": 36.2855, "step": 9513 }, { "epoch": 226.52537313432836, "grad_norm": 31.961000442504883, "learning_rate": 9.293995859213251e-06, "loss": 37.3197, "step": 9514 }, { "epoch": 226.54925373134327, "grad_norm": 26.64537239074707, "learning_rate": 9.292960662525881e-06, "loss": 36.5365, "step": 9515 }, { "epoch": 226.5731343283582, "grad_norm": 26.88564682006836, "learning_rate": 9.29192546583851e-06, "loss": 37.6781, "step": 9516 }, { "epoch": 226.59701492537314, "grad_norm": 26.57740592956543, "learning_rate": 9.29089026915114e-06, "loss": 34.8302, "step": 9517 }, { "epoch": 226.62089552238805, "grad_norm": 22.637767791748047, "learning_rate": 9.28985507246377e-06, "loss": 35.4446, "step": 9518 }, { "epoch": 226.644776119403, "grad_norm": 32.20105743408203, "learning_rate": 9.288819875776397e-06, "loss": 37.527, "step": 9519 }, { "epoch": 226.6686567164179, "grad_norm": 24.580772399902344, "learning_rate": 9.287784679089029e-06, "loss": 37.1435, "step": 9520 }, { "epoch": 226.69253731343284, "grad_norm": 30.045690536499023, "learning_rate": 9.286749482401656e-06, "loss": 35.4939, "step": 9521 }, { "epoch": 226.71641791044777, "grad_norm": 27.443836212158203, "learning_rate": 9.285714285714288e-06, "loss": 37.1542, "step": 9522 }, { "epoch": 226.74029850746268, "grad_norm": 30.533077239990234, "learning_rate": 9.284679089026915e-06, "loss": 36.9308, "step": 9523 }, { "epoch": 226.76417910447762, "grad_norm": 27.056224822998047, "learning_rate": 9.283643892339545e-06, "loss": 37.3001, "step": 9524 }, { "epoch": 226.78805970149253, "grad_norm": 27.161880493164062, "learning_rate": 9.282608695652174e-06, "loss": 35.7908, "step": 9525 }, { "epoch": 226.81194029850747, "grad_norm": 24.670005798339844, "learning_rate": 9.281573498964804e-06, "loss": 37.226, "step": 9526 }, { "epoch": 226.83582089552237, "grad_norm": 28.789091110229492, "learning_rate": 9.280538302277434e-06, "loss": 35.142, "step": 9527 }, { "epoch": 226.8597014925373, "grad_norm": 28.024110794067383, "learning_rate": 9.279503105590063e-06, "loss": 36.7626, "step": 9528 }, { "epoch": 226.88358208955225, "grad_norm": 29.142467498779297, "learning_rate": 9.278467908902693e-06, "loss": 36.4019, "step": 9529 }, { "epoch": 226.90746268656716, "grad_norm": 26.798542022705078, "learning_rate": 9.277432712215322e-06, "loss": 35.8312, "step": 9530 }, { "epoch": 226.9313432835821, "grad_norm": 27.18988800048828, "learning_rate": 9.27639751552795e-06, "loss": 35.5619, "step": 9531 }, { "epoch": 226.955223880597, "grad_norm": 24.10135841369629, "learning_rate": 9.275362318840581e-06, "loss": 35.2492, "step": 9532 }, { "epoch": 226.97910447761194, "grad_norm": 30.03662109375, "learning_rate": 9.274327122153209e-06, "loss": 36.867, "step": 9533 }, { "epoch": 227.0, "grad_norm": 20.40000343322754, "learning_rate": 9.273291925465839e-06, "loss": 32.0142, "step": 9534 }, { "epoch": 227.02388059701494, "grad_norm": 30.297977447509766, "learning_rate": 9.272256728778468e-06, "loss": 36.9057, "step": 9535 }, { "epoch": 227.04776119402985, "grad_norm": 27.713289260864258, "learning_rate": 9.271221532091098e-06, "loss": 36.9784, "step": 9536 }, { "epoch": 227.07164179104478, "grad_norm": 30.362102508544922, "learning_rate": 9.270186335403727e-06, "loss": 37.718, "step": 9537 }, { "epoch": 227.0955223880597, "grad_norm": 29.50467300415039, "learning_rate": 9.269151138716357e-06, "loss": 36.3835, "step": 9538 }, { "epoch": 227.11940298507463, "grad_norm": 27.414894104003906, "learning_rate": 9.268115942028986e-06, "loss": 36.8967, "step": 9539 }, { "epoch": 227.14328358208957, "grad_norm": 25.29361915588379, "learning_rate": 9.267080745341616e-06, "loss": 37.5438, "step": 9540 }, { "epoch": 227.16716417910447, "grad_norm": 28.762990951538086, "learning_rate": 9.266045548654245e-06, "loss": 36.4315, "step": 9541 }, { "epoch": 227.1910447761194, "grad_norm": 24.950563430786133, "learning_rate": 9.265010351966875e-06, "loss": 36.9243, "step": 9542 }, { "epoch": 227.21492537313432, "grad_norm": 30.792552947998047, "learning_rate": 9.263975155279504e-06, "loss": 36.2705, "step": 9543 }, { "epoch": 227.23880597014926, "grad_norm": 29.26622772216797, "learning_rate": 9.262939958592134e-06, "loss": 35.4549, "step": 9544 }, { "epoch": 227.26268656716417, "grad_norm": 27.079885482788086, "learning_rate": 9.261904761904763e-06, "loss": 36.2627, "step": 9545 }, { "epoch": 227.2865671641791, "grad_norm": 26.671934127807617, "learning_rate": 9.260869565217391e-06, "loss": 37.5339, "step": 9546 }, { "epoch": 227.31044776119404, "grad_norm": 29.74240493774414, "learning_rate": 9.259834368530022e-06, "loss": 36.8341, "step": 9547 }, { "epoch": 227.33432835820895, "grad_norm": 23.974681854248047, "learning_rate": 9.25879917184265e-06, "loss": 37.6008, "step": 9548 }, { "epoch": 227.3582089552239, "grad_norm": 30.059099197387695, "learning_rate": 9.25776397515528e-06, "loss": 36.8922, "step": 9549 }, { "epoch": 227.3820895522388, "grad_norm": 25.293973922729492, "learning_rate": 9.25672877846791e-06, "loss": 36.2701, "step": 9550 }, { "epoch": 227.40597014925373, "grad_norm": 29.775676727294922, "learning_rate": 9.255693581780539e-06, "loss": 36.2126, "step": 9551 }, { "epoch": 227.42985074626867, "grad_norm": 24.457752227783203, "learning_rate": 9.254658385093168e-06, "loss": 36.4361, "step": 9552 }, { "epoch": 227.45373134328358, "grad_norm": 31.548799514770508, "learning_rate": 9.253623188405798e-06, "loss": 36.2246, "step": 9553 }, { "epoch": 227.47761194029852, "grad_norm": 27.01188087463379, "learning_rate": 9.252587991718427e-06, "loss": 36.3035, "step": 9554 }, { "epoch": 227.50149253731342, "grad_norm": 29.452869415283203, "learning_rate": 9.251552795031057e-06, "loss": 36.749, "step": 9555 }, { "epoch": 227.52537313432836, "grad_norm": 27.627206802368164, "learning_rate": 9.250517598343686e-06, "loss": 35.4703, "step": 9556 }, { "epoch": 227.54925373134327, "grad_norm": 30.036054611206055, "learning_rate": 9.249482401656316e-06, "loss": 37.4655, "step": 9557 }, { "epoch": 227.5731343283582, "grad_norm": 26.98161506652832, "learning_rate": 9.248447204968944e-06, "loss": 36.8224, "step": 9558 }, { "epoch": 227.59701492537314, "grad_norm": 26.470125198364258, "learning_rate": 9.247412008281575e-06, "loss": 37.1462, "step": 9559 }, { "epoch": 227.62089552238805, "grad_norm": 24.754650115966797, "learning_rate": 9.246376811594203e-06, "loss": 35.6753, "step": 9560 }, { "epoch": 227.644776119403, "grad_norm": 28.549808502197266, "learning_rate": 9.245341614906832e-06, "loss": 35.236, "step": 9561 }, { "epoch": 227.6686567164179, "grad_norm": 22.26018714904785, "learning_rate": 9.244306418219462e-06, "loss": 36.3384, "step": 9562 }, { "epoch": 227.69253731343284, "grad_norm": NaN, "learning_rate": 9.243271221532091e-06, "loss": 54.0053, "step": 9563 }, { "epoch": 227.71641791044777, "grad_norm": 29.885662078857422, "learning_rate": 9.243271221532091e-06, "loss": 35.7748, "step": 9564 }, { "epoch": 227.74029850746268, "grad_norm": 26.544864654541016, "learning_rate": 9.242236024844721e-06, "loss": 36.4898, "step": 9565 }, { "epoch": 227.76417910447762, "grad_norm": 29.086551666259766, "learning_rate": 9.24120082815735e-06, "loss": 36.8967, "step": 9566 }, { "epoch": 227.78805970149253, "grad_norm": 25.901094436645508, "learning_rate": 9.24016563146998e-06, "loss": 36.4017, "step": 9567 }, { "epoch": 227.81194029850747, "grad_norm": 26.74492645263672, "learning_rate": 9.23913043478261e-06, "loss": 36.814, "step": 9568 }, { "epoch": 227.83582089552237, "grad_norm": 25.30301856994629, "learning_rate": 9.238095238095239e-06, "loss": 36.5462, "step": 9569 }, { "epoch": 227.8597014925373, "grad_norm": 27.54271125793457, "learning_rate": 9.237060041407869e-06, "loss": 36.1326, "step": 9570 }, { "epoch": 227.88358208955225, "grad_norm": 28.441038131713867, "learning_rate": 9.236024844720498e-06, "loss": 36.421, "step": 9571 }, { "epoch": 227.90746268656716, "grad_norm": 28.303749084472656, "learning_rate": 9.234989648033128e-06, "loss": 36.3227, "step": 9572 }, { "epoch": 227.9313432835821, "grad_norm": 24.03018569946289, "learning_rate": 9.233954451345757e-06, "loss": 35.5522, "step": 9573 }, { "epoch": 227.955223880597, "grad_norm": 28.449106216430664, "learning_rate": 9.232919254658385e-06, "loss": 36.2296, "step": 9574 }, { "epoch": 227.97910447761194, "grad_norm": 26.051109313964844, "learning_rate": 9.231884057971016e-06, "loss": 36.0445, "step": 9575 }, { "epoch": 228.0, "grad_norm": 24.871156692504883, "learning_rate": 9.230848861283644e-06, "loss": 31.9742, "step": 9576 }, { "epoch": 228.02388059701494, "grad_norm": 30.20591926574707, "learning_rate": 9.229813664596274e-06, "loss": 36.5131, "step": 9577 }, { "epoch": 228.04776119402985, "grad_norm": 24.737354278564453, "learning_rate": 9.228778467908903e-06, "loss": 36.2709, "step": 9578 }, { "epoch": 228.07164179104478, "grad_norm": 24.169931411743164, "learning_rate": 9.227743271221533e-06, "loss": 36.7686, "step": 9579 }, { "epoch": 228.0955223880597, "grad_norm": 27.94664764404297, "learning_rate": 9.226708074534162e-06, "loss": 35.84, "step": 9580 }, { "epoch": 228.11940298507463, "grad_norm": 24.36469841003418, "learning_rate": 9.225672877846792e-06, "loss": 35.7983, "step": 9581 }, { "epoch": 228.14328358208957, "grad_norm": 30.60504150390625, "learning_rate": 9.224637681159421e-06, "loss": 36.2161, "step": 9582 }, { "epoch": 228.16716417910447, "grad_norm": 24.57705307006836, "learning_rate": 9.22360248447205e-06, "loss": 36.4545, "step": 9583 }, { "epoch": 228.1910447761194, "grad_norm": 28.83746337890625, "learning_rate": 9.222567287784679e-06, "loss": 36.1562, "step": 9584 }, { "epoch": 228.21492537313432, "grad_norm": 23.025724411010742, "learning_rate": 9.22153209109731e-06, "loss": 35.4331, "step": 9585 }, { "epoch": 228.23880597014926, "grad_norm": 26.98846435546875, "learning_rate": 9.220496894409938e-06, "loss": 37.1912, "step": 9586 }, { "epoch": 228.26268656716417, "grad_norm": 22.991891860961914, "learning_rate": 9.219461697722569e-06, "loss": 37.0172, "step": 9587 }, { "epoch": 228.2865671641791, "grad_norm": 27.927915573120117, "learning_rate": 9.218426501035197e-06, "loss": 37.2899, "step": 9588 }, { "epoch": 228.31044776119404, "grad_norm": 25.7060546875, "learning_rate": 9.217391304347826e-06, "loss": 36.2329, "step": 9589 }, { "epoch": 228.33432835820895, "grad_norm": 31.828887939453125, "learning_rate": 9.216356107660456e-06, "loss": 37.1658, "step": 9590 }, { "epoch": 228.3582089552239, "grad_norm": 25.7882022857666, "learning_rate": 9.215320910973085e-06, "loss": 35.7852, "step": 9591 }, { "epoch": 228.3820895522388, "grad_norm": 24.80341339111328, "learning_rate": 9.214285714285715e-06, "loss": 37.3967, "step": 9592 }, { "epoch": 228.40597014925373, "grad_norm": 26.441181182861328, "learning_rate": 9.213250517598344e-06, "loss": 36.7858, "step": 9593 }, { "epoch": 228.42985074626867, "grad_norm": 27.830324172973633, "learning_rate": 9.212215320910974e-06, "loss": 35.8861, "step": 9594 }, { "epoch": 228.45373134328358, "grad_norm": 28.198192596435547, "learning_rate": 9.211180124223603e-06, "loss": 36.8701, "step": 9595 }, { "epoch": 228.47761194029852, "grad_norm": 27.90357208251953, "learning_rate": 9.210144927536233e-06, "loss": 36.896, "step": 9596 }, { "epoch": 228.50149253731342, "grad_norm": 27.799747467041016, "learning_rate": 9.209109730848863e-06, "loss": 36.4181, "step": 9597 }, { "epoch": 228.52537313432836, "grad_norm": 27.674747467041016, "learning_rate": 9.208074534161492e-06, "loss": 36.6183, "step": 9598 }, { "epoch": 228.54925373134327, "grad_norm": 22.056236267089844, "learning_rate": 9.20703933747412e-06, "loss": 35.8563, "step": 9599 }, { "epoch": 228.5731343283582, "grad_norm": 28.755813598632812, "learning_rate": 9.206004140786751e-06, "loss": 36.1678, "step": 9600 }, { "epoch": 228.59701492537314, "grad_norm": 24.192420959472656, "learning_rate": 9.204968944099379e-06, "loss": 36.8989, "step": 9601 }, { "epoch": 228.62089552238805, "grad_norm": 28.1363468170166, "learning_rate": 9.20393374741201e-06, "loss": 36.3853, "step": 9602 }, { "epoch": 228.644776119403, "grad_norm": 27.22496795654297, "learning_rate": 9.202898550724638e-06, "loss": 36.8836, "step": 9603 }, { "epoch": 228.6686567164179, "grad_norm": 27.100008010864258, "learning_rate": 9.201863354037268e-06, "loss": 36.5644, "step": 9604 }, { "epoch": 228.69253731343284, "grad_norm": 23.599897384643555, "learning_rate": 9.200828157349897e-06, "loss": 34.7451, "step": 9605 }, { "epoch": 228.71641791044777, "grad_norm": 27.625505447387695, "learning_rate": 9.199792960662527e-06, "loss": 36.1246, "step": 9606 }, { "epoch": 228.74029850746268, "grad_norm": 23.810094833374023, "learning_rate": 9.198757763975156e-06, "loss": 37.7482, "step": 9607 }, { "epoch": 228.76417910447762, "grad_norm": 30.633962631225586, "learning_rate": 9.197722567287786e-06, "loss": 37.4939, "step": 9608 }, { "epoch": 228.78805970149253, "grad_norm": 26.48391342163086, "learning_rate": 9.196687370600415e-06, "loss": 37.7659, "step": 9609 }, { "epoch": 228.81194029850747, "grad_norm": 27.97773551940918, "learning_rate": 9.195652173913045e-06, "loss": 36.5648, "step": 9610 }, { "epoch": 228.83582089552237, "grad_norm": 25.009822845458984, "learning_rate": 9.194616977225673e-06, "loss": 35.4764, "step": 9611 }, { "epoch": 228.8597014925373, "grad_norm": 25.571136474609375, "learning_rate": 9.193581780538304e-06, "loss": 35.1957, "step": 9612 }, { "epoch": 228.88358208955225, "grad_norm": 23.255353927612305, "learning_rate": 9.192546583850932e-06, "loss": 36.2558, "step": 9613 }, { "epoch": 228.90746268656716, "grad_norm": 29.044546127319336, "learning_rate": 9.191511387163561e-06, "loss": 37.4831, "step": 9614 }, { "epoch": 228.9313432835821, "grad_norm": 23.691383361816406, "learning_rate": 9.19047619047619e-06, "loss": 36.819, "step": 9615 }, { "epoch": 228.955223880597, "grad_norm": 32.514060974121094, "learning_rate": 9.18944099378882e-06, "loss": 36.1476, "step": 9616 }, { "epoch": 228.97910447761194, "grad_norm": 28.069005966186523, "learning_rate": 9.18840579710145e-06, "loss": 36.6969, "step": 9617 }, { "epoch": 229.0, "grad_norm": 26.643814086914062, "learning_rate": 9.18737060041408e-06, "loss": 31.8657, "step": 9618 }, { "epoch": 229.02388059701494, "grad_norm": 24.87017059326172, "learning_rate": 9.186335403726709e-06, "loss": 35.0354, "step": 9619 }, { "epoch": 229.04776119402985, "grad_norm": 29.640108108520508, "learning_rate": 9.185300207039338e-06, "loss": 36.069, "step": 9620 }, { "epoch": 229.07164179104478, "grad_norm": 24.502927780151367, "learning_rate": 9.184265010351968e-06, "loss": 36.3469, "step": 9621 }, { "epoch": 229.0955223880597, "grad_norm": 30.278432846069336, "learning_rate": 9.183229813664597e-06, "loss": 36.4906, "step": 9622 }, { "epoch": 229.11940298507463, "grad_norm": 28.056241989135742, "learning_rate": 9.182194616977227e-06, "loss": 36.9138, "step": 9623 }, { "epoch": 229.14328358208957, "grad_norm": 28.766387939453125, "learning_rate": 9.181159420289856e-06, "loss": 36.6277, "step": 9624 }, { "epoch": 229.16716417910447, "grad_norm": 25.354930877685547, "learning_rate": 9.180124223602486e-06, "loss": 35.8951, "step": 9625 }, { "epoch": 229.1910447761194, "grad_norm": 26.487565994262695, "learning_rate": 9.179089026915114e-06, "loss": 37.2082, "step": 9626 }, { "epoch": 229.21492537313432, "grad_norm": 21.196584701538086, "learning_rate": 9.178053830227745e-06, "loss": 36.8846, "step": 9627 }, { "epoch": 229.23880597014926, "grad_norm": 27.11143684387207, "learning_rate": 9.177018633540373e-06, "loss": 37.4834, "step": 9628 }, { "epoch": 229.26268656716417, "grad_norm": 23.48799705505371, "learning_rate": 9.175983436853002e-06, "loss": 37.0113, "step": 9629 }, { "epoch": 229.2865671641791, "grad_norm": 29.37694549560547, "learning_rate": 9.174948240165632e-06, "loss": 35.9114, "step": 9630 }, { "epoch": 229.31044776119404, "grad_norm": 23.161012649536133, "learning_rate": 9.173913043478261e-06, "loss": 36.5496, "step": 9631 }, { "epoch": 229.33432835820895, "grad_norm": 28.731334686279297, "learning_rate": 9.172877846790891e-06, "loss": 35.6109, "step": 9632 }, { "epoch": 229.3582089552239, "grad_norm": 25.209381103515625, "learning_rate": 9.17184265010352e-06, "loss": 36.8632, "step": 9633 }, { "epoch": 229.3820895522388, "grad_norm": 30.024168014526367, "learning_rate": 9.17080745341615e-06, "loss": 36.5665, "step": 9634 }, { "epoch": 229.40597014925373, "grad_norm": 25.45462989807129, "learning_rate": 9.16977225672878e-06, "loss": 35.5529, "step": 9635 }, { "epoch": 229.42985074626867, "grad_norm": 26.418886184692383, "learning_rate": 9.168737060041409e-06, "loss": 36.4459, "step": 9636 }, { "epoch": 229.45373134328358, "grad_norm": 24.408700942993164, "learning_rate": 9.167701863354039e-06, "loss": 36.6138, "step": 9637 }, { "epoch": 229.47761194029852, "grad_norm": 28.08294105529785, "learning_rate": 9.166666666666666e-06, "loss": 36.4635, "step": 9638 }, { "epoch": 229.50149253731342, "grad_norm": 22.752918243408203, "learning_rate": 9.165631469979298e-06, "loss": 36.985, "step": 9639 }, { "epoch": 229.52537313432836, "grad_norm": 28.713455200195312, "learning_rate": 9.164596273291925e-06, "loss": 36.6903, "step": 9640 }, { "epoch": 229.54925373134327, "grad_norm": 23.558683395385742, "learning_rate": 9.163561076604555e-06, "loss": 36.7125, "step": 9641 }, { "epoch": 229.5731343283582, "grad_norm": 27.721750259399414, "learning_rate": 9.162525879917185e-06, "loss": 36.0821, "step": 9642 }, { "epoch": 229.59701492537314, "grad_norm": 26.57756233215332, "learning_rate": 9.161490683229814e-06, "loss": 36.8507, "step": 9643 }, { "epoch": 229.62089552238805, "grad_norm": 29.08770751953125, "learning_rate": 9.160455486542444e-06, "loss": 35.9886, "step": 9644 }, { "epoch": 229.644776119403, "grad_norm": 26.117769241333008, "learning_rate": 9.159420289855073e-06, "loss": 36.9678, "step": 9645 }, { "epoch": 229.6686567164179, "grad_norm": 23.67546272277832, "learning_rate": 9.158385093167703e-06, "loss": 35.8266, "step": 9646 }, { "epoch": 229.69253731343284, "grad_norm": 23.151939392089844, "learning_rate": 9.157349896480332e-06, "loss": 36.994, "step": 9647 }, { "epoch": 229.71641791044777, "grad_norm": 28.121440887451172, "learning_rate": 9.156314699792962e-06, "loss": 36.6383, "step": 9648 }, { "epoch": 229.74029850746268, "grad_norm": 22.268630981445312, "learning_rate": 9.155279503105591e-06, "loss": 37.0435, "step": 9649 }, { "epoch": 229.76417910447762, "grad_norm": 30.70549964904785, "learning_rate": 9.15424430641822e-06, "loss": 35.9803, "step": 9650 }, { "epoch": 229.78805970149253, "grad_norm": 22.488264083862305, "learning_rate": 9.15320910973085e-06, "loss": 35.8651, "step": 9651 }, { "epoch": 229.81194029850747, "grad_norm": 29.461984634399414, "learning_rate": 9.15217391304348e-06, "loss": 36.7376, "step": 9652 }, { "epoch": 229.83582089552237, "grad_norm": 25.53286361694336, "learning_rate": 9.151138716356108e-06, "loss": 36.3119, "step": 9653 }, { "epoch": 229.8597014925373, "grad_norm": 26.687524795532227, "learning_rate": 9.150103519668739e-06, "loss": 36.9127, "step": 9654 }, { "epoch": 229.88358208955225, "grad_norm": 23.459135055541992, "learning_rate": 9.149068322981367e-06, "loss": 36.0665, "step": 9655 }, { "epoch": 229.90746268656716, "grad_norm": 26.695363998413086, "learning_rate": 9.148033126293996e-06, "loss": 36.1897, "step": 9656 }, { "epoch": 229.9313432835821, "grad_norm": 21.218103408813477, "learning_rate": 9.146997929606626e-06, "loss": 35.0119, "step": 9657 }, { "epoch": 229.955223880597, "grad_norm": 30.54604721069336, "learning_rate": 9.145962732919255e-06, "loss": 36.636, "step": 9658 }, { "epoch": 229.97910447761194, "grad_norm": 26.00942611694336, "learning_rate": 9.144927536231885e-06, "loss": 37.2953, "step": 9659 }, { "epoch": 230.0, "grad_norm": 22.77959632873535, "learning_rate": 9.143892339544514e-06, "loss": 33.3568, "step": 9660 }, { "epoch": 230.0, "step": 9660, "total_flos": 4.7489759270672614e+17, "train_loss": 3.205690276499367, "train_runtime": 25607.5331, "train_samples_per_second": 48.07, "train_steps_per_second": 0.377 }, { "epoch": 230.02388059701494, "grad_norm": 24.71234893798828, "learning_rate": 1e-05, "loss": 36.4127, "step": 9661 }, { "epoch": 230.04776119402985, "grad_norm": Infinity, "learning_rate": 9.99904761904762e-06, "loss": 43.3981, "step": 9662 }, { "epoch": 230.07164179104478, "grad_norm": Infinity, "learning_rate": 9.99904761904762e-06, "loss": 43.4953, "step": 9663 }, { "epoch": 230.0955223880597, "grad_norm": 375.4003601074219, "learning_rate": 9.99904761904762e-06, "loss": 42.9443, "step": 9664 }, { "epoch": 230.11940298507463, "grad_norm": 212.64337158203125, "learning_rate": 9.998095238095239e-06, "loss": 41.8767, "step": 9665 }, { "epoch": 230.14328358208957, "grad_norm": 81.78107452392578, "learning_rate": 9.997142857142858e-06, "loss": 39.5315, "step": 9666 }, { "epoch": 230.16716417910447, "grad_norm": 69.39502716064453, "learning_rate": 9.996190476190476e-06, "loss": 38.3034, "step": 9667 }, { "epoch": 230.1910447761194, "grad_norm": 58.64521026611328, "learning_rate": 9.995238095238095e-06, "loss": 36.6875, "step": 9668 }, { "epoch": 230.21492537313432, "grad_norm": 62.30952835083008, "learning_rate": 9.994285714285716e-06, "loss": 36.8947, "step": 9669 }, { "epoch": 230.23880597014926, "grad_norm": 46.44341278076172, "learning_rate": 9.993333333333333e-06, "loss": 36.5471, "step": 9670 }, { "epoch": 230.26268656716417, "grad_norm": 52.63033676147461, "learning_rate": 9.992380952380954e-06, "loss": 37.0309, "step": 9671 }, { "epoch": 230.2865671641791, "grad_norm": 33.28845977783203, "learning_rate": 9.991428571428573e-06, "loss": 36.6858, "step": 9672 }, { "epoch": 230.31044776119404, "grad_norm": 43.48398208618164, "learning_rate": 9.990476190476191e-06, "loss": 36.7173, "step": 9673 }, { "epoch": 230.33432835820895, "grad_norm": 30.336566925048828, "learning_rate": 9.98952380952381e-06, "loss": 36.4503, "step": 9674 }, { "epoch": 230.3582089552239, "grad_norm": 32.99346923828125, "learning_rate": 9.98857142857143e-06, "loss": 36.694, "step": 9675 }, { "epoch": 230.3820895522388, "grad_norm": 26.682479858398438, "learning_rate": 9.987619047619048e-06, "loss": 36.7674, "step": 9676 }, { "epoch": 230.40597014925373, "grad_norm": 23.91337776184082, "learning_rate": 9.986666666666667e-06, "loss": 36.2288, "step": 9677 }, { "epoch": 230.42985074626867, "grad_norm": 21.41556739807129, "learning_rate": 9.985714285714286e-06, "loss": 37.7821, "step": 9678 }, { "epoch": 230.45373134328358, "grad_norm": 21.785661697387695, "learning_rate": 9.984761904761907e-06, "loss": 36.516, "step": 9679 }, { "epoch": 230.47761194029852, "grad_norm": 21.943296432495117, "learning_rate": 9.983809523809524e-06, "loss": 36.4244, "step": 9680 }, { "epoch": 230.50149253731342, "grad_norm": 16.069475173950195, "learning_rate": 9.982857142857144e-06, "loss": 37.5188, "step": 9681 }, { "epoch": 230.52537313432836, "grad_norm": 22.269662857055664, "learning_rate": 9.981904761904763e-06, "loss": 35.9702, "step": 9682 }, { "epoch": 230.54925373134327, "grad_norm": 16.287111282348633, "learning_rate": 9.980952380952382e-06, "loss": 36.6675, "step": 9683 }, { "epoch": 230.5731343283582, "grad_norm": 25.83818817138672, "learning_rate": 9.980000000000001e-06, "loss": 36.7307, "step": 9684 }, { "epoch": 230.59701492537314, "grad_norm": 19.08870506286621, "learning_rate": 9.97904761904762e-06, "loss": 35.9015, "step": 9685 }, { "epoch": 230.62089552238805, "grad_norm": 19.956871032714844, "learning_rate": 9.978095238095239e-06, "loss": 36.4668, "step": 9686 }, { "epoch": 230.644776119403, "grad_norm": 23.43463897705078, "learning_rate": 9.977142857142858e-06, "loss": 37.2856, "step": 9687 }, { "epoch": 230.6686567164179, "grad_norm": 22.461307525634766, "learning_rate": 9.976190476190477e-06, "loss": 36.2338, "step": 9688 }, { "epoch": 230.69253731343284, "grad_norm": NaN, "learning_rate": 9.975238095238095e-06, "loss": 35.0579, "step": 9689 }, { "epoch": 230.71641791044777, "grad_norm": 15.775413513183594, "learning_rate": 9.975238095238095e-06, "loss": 37.179, "step": 9690 }, { "epoch": 230.74029850746268, "grad_norm": 16.799991607666016, "learning_rate": 9.974285714285716e-06, "loss": 36.5493, "step": 9691 }, { "epoch": 230.76417910447762, "grad_norm": 19.954538345336914, "learning_rate": 9.973333333333333e-06, "loss": 36.9768, "step": 9692 }, { "epoch": 230.78805970149253, "grad_norm": 18.49732208251953, "learning_rate": 9.972380952380954e-06, "loss": 37.5158, "step": 9693 }, { "epoch": 230.81194029850747, "grad_norm": 12.87289047241211, "learning_rate": 9.971428571428571e-06, "loss": 35.1848, "step": 9694 }, { "epoch": 230.83582089552237, "grad_norm": 15.607162475585938, "learning_rate": 9.970476190476192e-06, "loss": 37.1183, "step": 9695 }, { "epoch": 230.8597014925373, "grad_norm": 14.40938949584961, "learning_rate": 9.96952380952381e-06, "loss": 35.9702, "step": 9696 }, { "epoch": 230.88358208955225, "grad_norm": 14.662040710449219, "learning_rate": 9.96857142857143e-06, "loss": 35.782, "step": 9697 }, { "epoch": 230.90746268656716, "grad_norm": 13.855502128601074, "learning_rate": 9.967619047619048e-06, "loss": 36.739, "step": 9698 }, { "epoch": 230.9313432835821, "grad_norm": 16.36407470703125, "learning_rate": 9.966666666666667e-06, "loss": 37.031, "step": 9699 }, { "epoch": 230.955223880597, "grad_norm": 15.637924194335938, "learning_rate": 9.965714285714286e-06, "loss": 37.6073, "step": 9700 }, { "epoch": 230.97910447761194, "grad_norm": 15.853774070739746, "learning_rate": 9.964761904761907e-06, "loss": 36.0398, "step": 9701 }, { "epoch": 231.0, "grad_norm": 15.01644229888916, "learning_rate": 9.963809523809524e-06, "loss": 31.3042, "step": 9702 }, { "epoch": 231.02388059701494, "grad_norm": 13.843478202819824, "learning_rate": 9.962857142857145e-06, "loss": 36.955, "step": 9703 }, { "epoch": 231.04776119402985, "grad_norm": 19.220901489257812, "learning_rate": 9.961904761904763e-06, "loss": 37.508, "step": 9704 }, { "epoch": 231.07164179104478, "grad_norm": 15.885614395141602, "learning_rate": 9.960952380952382e-06, "loss": 37.5109, "step": 9705 }, { "epoch": 231.0955223880597, "grad_norm": 20.546024322509766, "learning_rate": 9.960000000000001e-06, "loss": 35.8497, "step": 9706 }, { "epoch": 231.11940298507463, "grad_norm": 15.986220359802246, "learning_rate": 9.95904761904762e-06, "loss": 36.7785, "step": 9707 }, { "epoch": 231.14328358208957, "grad_norm": 17.797225952148438, "learning_rate": 9.958095238095239e-06, "loss": 36.2075, "step": 9708 }, { "epoch": 231.16716417910447, "grad_norm": 17.886463165283203, "learning_rate": 9.957142857142858e-06, "loss": 35.7369, "step": 9709 }, { "epoch": 231.1910447761194, "grad_norm": 20.580068588256836, "learning_rate": 9.956190476190477e-06, "loss": 36.6398, "step": 9710 }, { "epoch": 231.21492537313432, "grad_norm": 17.89637565612793, "learning_rate": 9.955238095238096e-06, "loss": 36.6353, "step": 9711 }, { "epoch": 231.23880597014926, "grad_norm": 16.90445899963379, "learning_rate": 9.954285714285715e-06, "loss": 36.2476, "step": 9712 }, { "epoch": 231.26268656716417, "grad_norm": 14.622599601745605, "learning_rate": 9.953333333333333e-06, "loss": 35.352, "step": 9713 }, { "epoch": 231.2865671641791, "grad_norm": 17.468870162963867, "learning_rate": 9.952380952380954e-06, "loss": 35.306, "step": 9714 }, { "epoch": 231.31044776119404, "grad_norm": 16.23251724243164, "learning_rate": 9.951428571428571e-06, "loss": 36.6746, "step": 9715 }, { "epoch": 231.33432835820895, "grad_norm": 16.193260192871094, "learning_rate": 9.950476190476192e-06, "loss": 37.1332, "step": 9716 }, { "epoch": 231.3582089552239, "grad_norm": 13.897563934326172, "learning_rate": 9.94952380952381e-06, "loss": 35.1147, "step": 9717 }, { "epoch": 231.3820895522388, "grad_norm": 17.120468139648438, "learning_rate": 9.94857142857143e-06, "loss": 37.01, "step": 9718 }, { "epoch": 231.40597014925373, "grad_norm": 14.881387710571289, "learning_rate": 9.947619047619049e-06, "loss": 35.7699, "step": 9719 }, { "epoch": 231.42985074626867, "grad_norm": 14.89230728149414, "learning_rate": 9.946666666666667e-06, "loss": 35.4544, "step": 9720 }, { "epoch": 231.45373134328358, "grad_norm": 12.17119026184082, "learning_rate": 9.945714285714286e-06, "loss": 35.3585, "step": 9721 }, { "epoch": 231.47761194029852, "grad_norm": 19.992794036865234, "learning_rate": 9.944761904761905e-06, "loss": 35.4771, "step": 9722 }, { "epoch": 231.50149253731342, "grad_norm": 20.673452377319336, "learning_rate": 9.943809523809524e-06, "loss": 36.3602, "step": 9723 }, { "epoch": 231.52537313432836, "grad_norm": 18.165555953979492, "learning_rate": 9.942857142857145e-06, "loss": 37.6295, "step": 9724 }, { "epoch": 231.54925373134327, "grad_norm": 13.455978393554688, "learning_rate": 9.941904761904762e-06, "loss": 36.6793, "step": 9725 }, { "epoch": 231.5731343283582, "grad_norm": 16.61351776123047, "learning_rate": 9.940952380952382e-06, "loss": 36.8213, "step": 9726 }, { "epoch": 231.59701492537314, "grad_norm": 17.952800750732422, "learning_rate": 9.940000000000001e-06, "loss": 36.6038, "step": 9727 }, { "epoch": 231.62089552238805, "grad_norm": 20.534774780273438, "learning_rate": 9.93904761904762e-06, "loss": 36.165, "step": 9728 }, { "epoch": 231.644776119403, "grad_norm": 14.053009033203125, "learning_rate": 9.93809523809524e-06, "loss": 35.3623, "step": 9729 }, { "epoch": 231.6686567164179, "grad_norm": 22.43195152282715, "learning_rate": 9.937142857142858e-06, "loss": 36.9735, "step": 9730 }, { "epoch": 231.69253731343284, "grad_norm": 21.216463088989258, "learning_rate": 9.936190476190477e-06, "loss": 36.5385, "step": 9731 }, { "epoch": 231.71641791044777, "grad_norm": 16.259366989135742, "learning_rate": 9.935238095238096e-06, "loss": 36.7006, "step": 9732 }, { "epoch": 231.74029850746268, "grad_norm": 29.90939712524414, "learning_rate": 9.934285714285715e-06, "loss": 36.2747, "step": 9733 }, { "epoch": 231.76417910447762, "grad_norm": NaN, "learning_rate": 9.933333333333334e-06, "loss": 50.0571, "step": 9734 }, { "epoch": 231.78805970149253, "grad_norm": 18.765172958374023, "learning_rate": 9.933333333333334e-06, "loss": 36.055, "step": 9735 }, { "epoch": 231.81194029850747, "grad_norm": 33.186214447021484, "learning_rate": 9.932380952380953e-06, "loss": 37.1991, "step": 9736 }, { "epoch": 231.83582089552237, "grad_norm": 22.32003402709961, "learning_rate": 9.931428571428571e-06, "loss": 36.8529, "step": 9737 }, { "epoch": 231.8597014925373, "grad_norm": 34.36970520019531, "learning_rate": 9.930476190476192e-06, "loss": 37.8959, "step": 9738 }, { "epoch": 231.88358208955225, "grad_norm": 22.484411239624023, "learning_rate": 9.92952380952381e-06, "loss": 35.1009, "step": 9739 }, { "epoch": 231.90746268656716, "grad_norm": 35.59208679199219, "learning_rate": 9.92857142857143e-06, "loss": 38.0479, "step": 9740 }, { "epoch": 231.9313432835821, "grad_norm": 24.744535446166992, "learning_rate": 9.927619047619049e-06, "loss": 36.7969, "step": 9741 }, { "epoch": 231.955223880597, "grad_norm": 35.06355285644531, "learning_rate": 9.926666666666668e-06, "loss": 36.65, "step": 9742 }, { "epoch": 231.97910447761194, "grad_norm": 29.507776260375977, "learning_rate": 9.925714285714287e-06, "loss": 36.2103, "step": 9743 }, { "epoch": 232.0, "grad_norm": 31.731843948364258, "learning_rate": 9.924761904761905e-06, "loss": 30.5489, "step": 9744 }, { "epoch": 232.02388059701494, "grad_norm": 31.205385208129883, "learning_rate": 9.923809523809524e-06, "loss": 36.8333, "step": 9745 }, { "epoch": 232.04776119402985, "grad_norm": 28.050081253051758, "learning_rate": 9.922857142857145e-06, "loss": 36.6318, "step": 9746 }, { "epoch": 232.07164179104478, "grad_norm": 26.250442504882812, "learning_rate": 9.921904761904762e-06, "loss": 36.0135, "step": 9747 }, { "epoch": 232.0955223880597, "grad_norm": 27.33561897277832, "learning_rate": 9.920952380952383e-06, "loss": 36.6887, "step": 9748 }, { "epoch": 232.11940298507463, "grad_norm": 23.297338485717773, "learning_rate": 9.920000000000002e-06, "loss": 36.5706, "step": 9749 }, { "epoch": 232.14328358208957, "grad_norm": 31.878978729248047, "learning_rate": 9.91904761904762e-06, "loss": 34.7229, "step": 9750 }, { "epoch": 232.16716417910447, "grad_norm": NaN, "learning_rate": 9.91809523809524e-06, "loss": 30.7021, "step": 9751 }, { "epoch": 232.1910447761194, "grad_norm": 27.822471618652344, "learning_rate": 9.91809523809524e-06, "loss": 35.5529, "step": 9752 }, { "epoch": 232.21492537313432, "grad_norm": 28.86005973815918, "learning_rate": 9.917142857142857e-06, "loss": 36.7525, "step": 9753 }, { "epoch": 232.23880597014926, "grad_norm": 28.202646255493164, "learning_rate": 9.916190476190477e-06, "loss": 37.5697, "step": 9754 }, { "epoch": 232.26268656716417, "grad_norm": 31.08536720275879, "learning_rate": 9.915238095238096e-06, "loss": 36.452, "step": 9755 }, { "epoch": 232.2865671641791, "grad_norm": 27.056488037109375, "learning_rate": 9.914285714285715e-06, "loss": 36.5263, "step": 9756 }, { "epoch": 232.31044776119404, "grad_norm": 32.32611083984375, "learning_rate": 9.913333333333334e-06, "loss": 35.9751, "step": 9757 }, { "epoch": 232.33432835820895, "grad_norm": 29.796775817871094, "learning_rate": 9.912380952380953e-06, "loss": 36.5457, "step": 9758 }, { "epoch": 232.3582089552239, "grad_norm": 27.9810733795166, "learning_rate": 9.911428571428572e-06, "loss": 36.3721, "step": 9759 }, { "epoch": 232.3820895522388, "grad_norm": 25.292329788208008, "learning_rate": 9.910476190476192e-06, "loss": 35.6735, "step": 9760 }, { "epoch": 232.40597014925373, "grad_norm": 31.544578552246094, "learning_rate": 9.90952380952381e-06, "loss": 35.9049, "step": 9761 }, { "epoch": 232.42985074626867, "grad_norm": 26.03896713256836, "learning_rate": 9.90857142857143e-06, "loss": 35.0874, "step": 9762 }, { "epoch": 232.45373134328358, "grad_norm": 31.23866081237793, "learning_rate": 9.907619047619049e-06, "loss": 35.927, "step": 9763 }, { "epoch": 232.47761194029852, "grad_norm": 25.877737045288086, "learning_rate": 9.906666666666668e-06, "loss": 36.2758, "step": 9764 }, { "epoch": 232.50149253731342, "grad_norm": 28.32729721069336, "learning_rate": 9.905714285714287e-06, "loss": 35.9696, "step": 9765 }, { "epoch": 232.52537313432836, "grad_norm": 26.140939712524414, "learning_rate": 9.904761904761906e-06, "loss": 37.0429, "step": 9766 }, { "epoch": 232.54925373134327, "grad_norm": 29.77901268005371, "learning_rate": 9.903809523809524e-06, "loss": 36.6883, "step": 9767 }, { "epoch": 232.5731343283582, "grad_norm": 28.25278091430664, "learning_rate": 9.902857142857143e-06, "loss": 36.8882, "step": 9768 }, { "epoch": 232.59701492537314, "grad_norm": 30.905784606933594, "learning_rate": 9.901904761904762e-06, "loss": 37.1189, "step": 9769 }, { "epoch": 232.62089552238805, "grad_norm": 24.243087768554688, "learning_rate": 9.900952380952383e-06, "loss": 36.439, "step": 9770 }, { "epoch": 232.644776119403, "grad_norm": 28.366220474243164, "learning_rate": 9.9e-06, "loss": 35.9505, "step": 9771 }, { "epoch": 232.6686567164179, "grad_norm": 24.840042114257812, "learning_rate": 9.89904761904762e-06, "loss": 35.9767, "step": 9772 }, { "epoch": 232.69253731343284, "grad_norm": 27.562599182128906, "learning_rate": 9.89809523809524e-06, "loss": 37.1143, "step": 9773 }, { "epoch": 232.71641791044777, "grad_norm": 28.17928695678711, "learning_rate": 9.897142857142858e-06, "loss": 36.3648, "step": 9774 }, { "epoch": 232.74029850746268, "grad_norm": 26.700790405273438, "learning_rate": 9.896190476190477e-06, "loss": 36.998, "step": 9775 }, { "epoch": 232.76417910447762, "grad_norm": 24.507808685302734, "learning_rate": 9.895238095238096e-06, "loss": 36.2914, "step": 9776 }, { "epoch": 232.78805970149253, "grad_norm": 28.5314998626709, "learning_rate": 9.894285714285715e-06, "loss": 36.9364, "step": 9777 }, { "epoch": 232.81194029850747, "grad_norm": 23.566978454589844, "learning_rate": 9.893333333333334e-06, "loss": 37.4334, "step": 9778 }, { "epoch": 232.83582089552237, "grad_norm": 32.451316833496094, "learning_rate": 9.892380952380953e-06, "loss": 36.7729, "step": 9779 }, { "epoch": 232.8597014925373, "grad_norm": 28.89414405822754, "learning_rate": 9.891428571428572e-06, "loss": 36.2706, "step": 9780 }, { "epoch": 232.88358208955225, "grad_norm": 26.81434440612793, "learning_rate": 9.89047619047619e-06, "loss": 35.4961, "step": 9781 }, { "epoch": 232.90746268656716, "grad_norm": 24.25072479248047, "learning_rate": 9.88952380952381e-06, "loss": 36.3199, "step": 9782 }, { "epoch": 232.9313432835821, "grad_norm": 28.760164260864258, "learning_rate": 9.88857142857143e-06, "loss": 36.1606, "step": 9783 }, { "epoch": 232.955223880597, "grad_norm": 26.750282287597656, "learning_rate": 9.887619047619047e-06, "loss": 36.8096, "step": 9784 }, { "epoch": 232.97910447761194, "grad_norm": 29.684532165527344, "learning_rate": 9.886666666666668e-06, "loss": 35.3498, "step": 9785 }, { "epoch": 233.0, "grad_norm": 21.966087341308594, "learning_rate": 9.885714285714287e-06, "loss": 31.7937, "step": 9786 }, { "epoch": 233.02388059701494, "grad_norm": 26.098487854003906, "learning_rate": 9.884761904761906e-06, "loss": 35.8188, "step": 9787 }, { "epoch": 233.04776119402985, "grad_norm": 21.66274642944336, "learning_rate": 9.883809523809525e-06, "loss": 37.2063, "step": 9788 }, { "epoch": 233.07164179104478, "grad_norm": 26.546297073364258, "learning_rate": 9.882857142857144e-06, "loss": 36.5433, "step": 9789 }, { "epoch": 233.0955223880597, "grad_norm": 22.168907165527344, "learning_rate": 9.881904761904762e-06, "loss": 34.894, "step": 9790 }, { "epoch": 233.11940298507463, "grad_norm": 37.45732116699219, "learning_rate": 9.880952380952381e-06, "loss": 36.3476, "step": 9791 }, { "epoch": 233.14328358208957, "grad_norm": 26.97699546813965, "learning_rate": 9.88e-06, "loss": 35.4302, "step": 9792 }, { "epoch": 233.16716417910447, "grad_norm": 27.618061065673828, "learning_rate": 9.879047619047621e-06, "loss": 36.9248, "step": 9793 }, { "epoch": 233.1910447761194, "grad_norm": 27.036169052124023, "learning_rate": 9.878095238095238e-06, "loss": 35.6113, "step": 9794 }, { "epoch": 233.21492537313432, "grad_norm": 27.536331176757812, "learning_rate": 9.877142857142859e-06, "loss": 36.8181, "step": 9795 }, { "epoch": 233.23880597014926, "grad_norm": 24.680740356445312, "learning_rate": 9.876190476190478e-06, "loss": 37.4333, "step": 9796 }, { "epoch": 233.26268656716417, "grad_norm": 28.902076721191406, "learning_rate": 9.875238095238095e-06, "loss": 36.5323, "step": 9797 }, { "epoch": 233.2865671641791, "grad_norm": 27.464153289794922, "learning_rate": 9.874285714285715e-06, "loss": 35.7346, "step": 9798 }, { "epoch": 233.31044776119404, "grad_norm": 29.325603485107422, "learning_rate": 9.873333333333334e-06, "loss": 36.2152, "step": 9799 }, { "epoch": 233.33432835820895, "grad_norm": 27.650089263916016, "learning_rate": 9.872380952380953e-06, "loss": 36.6219, "step": 9800 }, { "epoch": 233.3582089552239, "grad_norm": 26.96459197998047, "learning_rate": 9.871428571428572e-06, "loss": 36.8084, "step": 9801 }, { "epoch": 233.3820895522388, "grad_norm": 23.522382736206055, "learning_rate": 9.870476190476191e-06, "loss": 36.5991, "step": 9802 }, { "epoch": 233.40597014925373, "grad_norm": 29.002304077148438, "learning_rate": 9.86952380952381e-06, "loss": 37.535, "step": 9803 }, { "epoch": 233.42985074626867, "grad_norm": 23.243408203125, "learning_rate": 9.86857142857143e-06, "loss": 36.7426, "step": 9804 }, { "epoch": 233.45373134328358, "grad_norm": 28.153141021728516, "learning_rate": 9.867619047619048e-06, "loss": 37.1516, "step": 9805 }, { "epoch": 233.47761194029852, "grad_norm": 25.485090255737305, "learning_rate": 9.866666666666668e-06, "loss": 36.4648, "step": 9806 }, { "epoch": 233.50149253731342, "grad_norm": 27.776960372924805, "learning_rate": 9.865714285714285e-06, "loss": 34.8475, "step": 9807 }, { "epoch": 233.52537313432836, "grad_norm": 26.18765640258789, "learning_rate": 9.864761904761906e-06, "loss": 36.1718, "step": 9808 }, { "epoch": 233.54925373134327, "grad_norm": 26.28923225402832, "learning_rate": 9.863809523809525e-06, "loss": 37.6577, "step": 9809 }, { "epoch": 233.5731343283582, "grad_norm": 24.869470596313477, "learning_rate": 9.862857142857144e-06, "loss": 35.8387, "step": 9810 }, { "epoch": 233.59701492537314, "grad_norm": 25.25719451904297, "learning_rate": 9.861904761904763e-06, "loss": 35.1382, "step": 9811 }, { "epoch": 233.62089552238805, "grad_norm": 23.99881935119629, "learning_rate": 9.860952380952382e-06, "loss": 36.8773, "step": 9812 }, { "epoch": 233.644776119403, "grad_norm": 26.899507522583008, "learning_rate": 9.86e-06, "loss": 36.1116, "step": 9813 }, { "epoch": 233.6686567164179, "grad_norm": 23.04312515258789, "learning_rate": 9.859047619047621e-06, "loss": 36.3473, "step": 9814 }, { "epoch": 233.69253731343284, "grad_norm": 27.696046829223633, "learning_rate": 9.858095238095238e-06, "loss": 36.6766, "step": 9815 }, { "epoch": 233.71641791044777, "grad_norm": 22.96893310546875, "learning_rate": 9.857142857142859e-06, "loss": 36.3639, "step": 9816 }, { "epoch": 233.74029850746268, "grad_norm": 27.865476608276367, "learning_rate": 9.856190476190478e-06, "loss": 35.4878, "step": 9817 }, { "epoch": 233.76417910447762, "grad_norm": 24.344972610473633, "learning_rate": 9.855238095238095e-06, "loss": 35.4155, "step": 9818 }, { "epoch": 233.78805970149253, "grad_norm": 26.939016342163086, "learning_rate": 9.854285714285716e-06, "loss": 37.4377, "step": 9819 }, { "epoch": 233.81194029850747, "grad_norm": 22.826608657836914, "learning_rate": 9.853333333333334e-06, "loss": 36.5141, "step": 9820 }, { "epoch": 233.83582089552237, "grad_norm": 25.923465728759766, "learning_rate": 9.852380952380953e-06, "loss": 35.8261, "step": 9821 }, { "epoch": 233.8597014925373, "grad_norm": 24.230087280273438, "learning_rate": 9.851428571428572e-06, "loss": 36.7688, "step": 9822 }, { "epoch": 233.88358208955225, "grad_norm": 27.065654754638672, "learning_rate": 9.850476190476191e-06, "loss": 35.6492, "step": 9823 }, { "epoch": 233.90746268656716, "grad_norm": 21.02733612060547, "learning_rate": 9.84952380952381e-06, "loss": 35.9388, "step": 9824 }, { "epoch": 233.9313432835821, "grad_norm": 26.16006088256836, "learning_rate": 9.848571428571429e-06, "loss": 35.9996, "step": 9825 }, { "epoch": 233.955223880597, "grad_norm": 20.3577938079834, "learning_rate": 9.847619047619048e-06, "loss": 37.1911, "step": 9826 }, { "epoch": 233.97910447761194, "grad_norm": 20.499956130981445, "learning_rate": 9.846666666666668e-06, "loss": 35.8032, "step": 9827 }, { "epoch": 234.0, "grad_norm": 19.88180160522461, "learning_rate": 9.845714285714286e-06, "loss": 31.2912, "step": 9828 }, { "epoch": 234.02388059701494, "grad_norm": 13.999608993530273, "learning_rate": 9.844761904761906e-06, "loss": 35.5007, "step": 9829 }, { "epoch": 234.04776119402985, "grad_norm": 25.21092987060547, "learning_rate": 9.843809523809525e-06, "loss": 35.4806, "step": 9830 }, { "epoch": 234.07164179104478, "grad_norm": 16.646089553833008, "learning_rate": 9.842857142857144e-06, "loss": 36.6435, "step": 9831 }, { "epoch": 234.0955223880597, "grad_norm": 25.66943359375, "learning_rate": 9.841904761904763e-06, "loss": 36.7818, "step": 9832 }, { "epoch": 234.11940298507463, "grad_norm": 20.8841495513916, "learning_rate": 9.840952380952382e-06, "loss": 36.266, "step": 9833 }, { "epoch": 234.14328358208957, "grad_norm": 21.753887176513672, "learning_rate": 9.84e-06, "loss": 35.8203, "step": 9834 }, { "epoch": 234.16716417910447, "grad_norm": 18.985937118530273, "learning_rate": 9.83904761904762e-06, "loss": 34.8694, "step": 9835 }, { "epoch": 234.1910447761194, "grad_norm": 17.199949264526367, "learning_rate": 9.838095238095238e-06, "loss": 37.2323, "step": 9836 }, { "epoch": 234.21492537313432, "grad_norm": 19.559226989746094, "learning_rate": 9.837142857142859e-06, "loss": 37.2003, "step": 9837 }, { "epoch": 234.23880597014926, "grad_norm": 16.026540756225586, "learning_rate": 9.836190476190476e-06, "loss": 35.7033, "step": 9838 }, { "epoch": 234.26268656716417, "grad_norm": 17.020076751708984, "learning_rate": 9.835238095238097e-06, "loss": 36.3054, "step": 9839 }, { "epoch": 234.2865671641791, "grad_norm": 18.18143081665039, "learning_rate": 9.834285714285716e-06, "loss": 36.8279, "step": 9840 }, { "epoch": 234.31044776119404, "grad_norm": 17.629560470581055, "learning_rate": 9.833333333333333e-06, "loss": 35.4263, "step": 9841 }, { "epoch": 234.33432835820895, "grad_norm": 14.195773124694824, "learning_rate": 9.832380952380954e-06, "loss": 36.3135, "step": 9842 }, { "epoch": 234.3582089552239, "grad_norm": 18.136837005615234, "learning_rate": 9.831428571428572e-06, "loss": 36.7588, "step": 9843 }, { "epoch": 234.3820895522388, "grad_norm": 17.47150993347168, "learning_rate": 9.830476190476191e-06, "loss": 36.3127, "step": 9844 }, { "epoch": 234.40597014925373, "grad_norm": 16.70725440979004, "learning_rate": 9.82952380952381e-06, "loss": 37.7066, "step": 9845 }, { "epoch": 234.42985074626867, "grad_norm": 19.580862045288086, "learning_rate": 9.828571428571429e-06, "loss": 36.304, "step": 9846 }, { "epoch": 234.45373134328358, "grad_norm": 20.10016441345215, "learning_rate": 9.827619047619048e-06, "loss": 35.801, "step": 9847 }, { "epoch": 234.47761194029852, "grad_norm": 18.814186096191406, "learning_rate": 9.826666666666667e-06, "loss": 36.9099, "step": 9848 }, { "epoch": 234.50149253731342, "grad_norm": 19.953445434570312, "learning_rate": 9.825714285714286e-06, "loss": 35.3052, "step": 9849 }, { "epoch": 234.52537313432836, "grad_norm": 18.7332763671875, "learning_rate": 9.824761904761906e-06, "loss": 36.9053, "step": 9850 }, { "epoch": 234.54925373134327, "grad_norm": 13.999094009399414, "learning_rate": 9.823809523809524e-06, "loss": 36.9181, "step": 9851 }, { "epoch": 234.5731343283582, "grad_norm": 18.283750534057617, "learning_rate": 9.822857142857144e-06, "loss": 36.444, "step": 9852 }, { "epoch": 234.59701492537314, "grad_norm": 22.197336196899414, "learning_rate": 9.821904761904763e-06, "loss": 35.4112, "step": 9853 }, { "epoch": 234.62089552238805, "grad_norm": 15.777986526489258, "learning_rate": 9.820952380952382e-06, "loss": 35.8148, "step": 9854 }, { "epoch": 234.644776119403, "grad_norm": 17.59519386291504, "learning_rate": 9.820000000000001e-06, "loss": 36.5398, "step": 9855 }, { "epoch": 234.6686567164179, "grad_norm": 19.24283790588379, "learning_rate": 9.81904761904762e-06, "loss": 35.8213, "step": 9856 }, { "epoch": 234.69253731343284, "grad_norm": 15.129947662353516, "learning_rate": 9.818095238095239e-06, "loss": 37.5821, "step": 9857 }, { "epoch": 234.71641791044777, "grad_norm": 19.374385833740234, "learning_rate": 9.81714285714286e-06, "loss": 36.8088, "step": 9858 }, { "epoch": 234.74029850746268, "grad_norm": 18.22612190246582, "learning_rate": 9.816190476190476e-06, "loss": 36.6973, "step": 9859 }, { "epoch": 234.76417910447762, "grad_norm": 16.317365646362305, "learning_rate": 9.815238095238097e-06, "loss": 37.3607, "step": 9860 }, { "epoch": 234.78805970149253, "grad_norm": 16.01597785949707, "learning_rate": 9.814285714285716e-06, "loss": 35.5199, "step": 9861 }, { "epoch": 234.81194029850747, "grad_norm": 17.750478744506836, "learning_rate": 9.813333333333333e-06, "loss": 35.7935, "step": 9862 }, { "epoch": 234.83582089552237, "grad_norm": 13.788301467895508, "learning_rate": 9.812380952380954e-06, "loss": 36.5824, "step": 9863 }, { "epoch": 234.8597014925373, "grad_norm": 17.28419303894043, "learning_rate": 9.811428571428571e-06, "loss": 36.2316, "step": 9864 }, { "epoch": 234.88358208955225, "grad_norm": 15.71476936340332, "learning_rate": 9.810476190476191e-06, "loss": 36.9027, "step": 9865 }, { "epoch": 234.90746268656716, "grad_norm": 20.27968406677246, "learning_rate": 9.80952380952381e-06, "loss": 35.868, "step": 9866 }, { "epoch": 234.9313432835821, "grad_norm": 16.62568473815918, "learning_rate": 9.80857142857143e-06, "loss": 36.1727, "step": 9867 }, { "epoch": 234.955223880597, "grad_norm": 21.846633911132812, "learning_rate": 9.807619047619048e-06, "loss": 35.1444, "step": 9868 }, { "epoch": 234.97910447761194, "grad_norm": 16.76075553894043, "learning_rate": 9.806666666666667e-06, "loss": 35.1602, "step": 9869 }, { "epoch": 235.0, "grad_norm": 16.464860916137695, "learning_rate": 9.805714285714286e-06, "loss": 32.9988, "step": 9870 }, { "epoch": 235.02388059701494, "grad_norm": 22.344356536865234, "learning_rate": 9.804761904761907e-06, "loss": 36.0721, "step": 9871 }, { "epoch": 235.04776119402985, "grad_norm": 16.429410934448242, "learning_rate": 9.803809523809524e-06, "loss": 35.5533, "step": 9872 }, { "epoch": 235.07164179104478, "grad_norm": 26.959598541259766, "learning_rate": 9.802857142857144e-06, "loss": 36.5131, "step": 9873 }, { "epoch": 235.0955223880597, "grad_norm": 19.566802978515625, "learning_rate": 9.801904761904763e-06, "loss": 35.2014, "step": 9874 }, { "epoch": 235.11940298507463, "grad_norm": 22.33953094482422, "learning_rate": 9.800952380952382e-06, "loss": 36.3257, "step": 9875 }, { "epoch": 235.14328358208957, "grad_norm": 19.54528045654297, "learning_rate": 9.800000000000001e-06, "loss": 36.3505, "step": 9876 }, { "epoch": 235.16716417910447, "grad_norm": 19.99598503112793, "learning_rate": 9.79904761904762e-06, "loss": 36.5828, "step": 9877 }, { "epoch": 235.1910447761194, "grad_norm": 22.05472755432129, "learning_rate": 9.798095238095239e-06, "loss": 36.444, "step": 9878 }, { "epoch": 235.21492537313432, "grad_norm": 17.299413681030273, "learning_rate": 9.797142857142858e-06, "loss": 36.8062, "step": 9879 }, { "epoch": 235.23880597014926, "grad_norm": 17.942642211914062, "learning_rate": 9.796190476190477e-06, "loss": 36.4663, "step": 9880 }, { "epoch": 235.26268656716417, "grad_norm": 18.936473846435547, "learning_rate": 9.795238095238097e-06, "loss": 36.2818, "step": 9881 }, { "epoch": 235.2865671641791, "grad_norm": 15.805120468139648, "learning_rate": 9.794285714285714e-06, "loss": 36.8603, "step": 9882 }, { "epoch": 235.31044776119404, "grad_norm": 18.38108253479004, "learning_rate": 9.793333333333333e-06, "loss": 36.0332, "step": 9883 }, { "epoch": 235.33432835820895, "grad_norm": 17.744592666625977, "learning_rate": 9.792380952380954e-06, "loss": 35.8128, "step": 9884 }, { "epoch": 235.3582089552239, "grad_norm": 15.176288604736328, "learning_rate": 9.791428571428571e-06, "loss": 36.1821, "step": 9885 }, { "epoch": 235.3820895522388, "grad_norm": 16.138187408447266, "learning_rate": 9.790476190476192e-06, "loss": 36.0667, "step": 9886 }, { "epoch": 235.40597014925373, "grad_norm": 15.533767700195312, "learning_rate": 9.78952380952381e-06, "loss": 36.667, "step": 9887 }, { "epoch": 235.42985074626867, "grad_norm": 14.583806991577148, "learning_rate": 9.78857142857143e-06, "loss": 35.383, "step": 9888 }, { "epoch": 235.45373134328358, "grad_norm": 19.801340103149414, "learning_rate": 9.787619047619048e-06, "loss": 35.8748, "step": 9889 }, { "epoch": 235.47761194029852, "grad_norm": 16.901988983154297, "learning_rate": 9.786666666666667e-06, "loss": 36.6985, "step": 9890 }, { "epoch": 235.50149253731342, "grad_norm": 17.895591735839844, "learning_rate": 9.785714285714286e-06, "loss": 36.9642, "step": 9891 }, { "epoch": 235.52537313432836, "grad_norm": 21.717445373535156, "learning_rate": 9.784761904761905e-06, "loss": 35.3154, "step": 9892 }, { "epoch": 235.54925373134327, "grad_norm": 15.22280216217041, "learning_rate": 9.783809523809524e-06, "loss": 35.9869, "step": 9893 }, { "epoch": 235.5731343283582, "grad_norm": 16.396968841552734, "learning_rate": 9.782857142857145e-06, "loss": 36.8867, "step": 9894 }, { "epoch": 235.59701492537314, "grad_norm": 19.450927734375, "learning_rate": 9.781904761904762e-06, "loss": 35.575, "step": 9895 }, { "epoch": 235.62089552238805, "grad_norm": 17.347875595092773, "learning_rate": 9.780952380952382e-06, "loss": 35.4163, "step": 9896 }, { "epoch": 235.644776119403, "grad_norm": 16.20336151123047, "learning_rate": 9.780000000000001e-06, "loss": 36.0011, "step": 9897 }, { "epoch": 235.6686567164179, "grad_norm": 17.76625633239746, "learning_rate": 9.77904761904762e-06, "loss": 37.8143, "step": 9898 }, { "epoch": 235.69253731343284, "grad_norm": 16.26557159423828, "learning_rate": 9.778095238095239e-06, "loss": 37.094, "step": 9899 }, { "epoch": 235.71641791044777, "grad_norm": 16.70842170715332, "learning_rate": 9.777142857142858e-06, "loss": 35.9713, "step": 9900 }, { "epoch": 235.74029850746268, "grad_norm": 15.71903133392334, "learning_rate": 9.776190476190477e-06, "loss": 36.8651, "step": 9901 }, { "epoch": 235.76417910447762, "grad_norm": 15.551657676696777, "learning_rate": 9.775238095238096e-06, "loss": 36.7437, "step": 9902 }, { "epoch": 235.78805970149253, "grad_norm": 15.175107955932617, "learning_rate": 9.774285714285715e-06, "loss": 36.5867, "step": 9903 }, { "epoch": 235.81194029850747, "grad_norm": 15.5099458694458, "learning_rate": 9.773333333333335e-06, "loss": 36.4173, "step": 9904 }, { "epoch": 235.83582089552237, "grad_norm": 16.476665496826172, "learning_rate": 9.772380952380952e-06, "loss": 36.1536, "step": 9905 }, { "epoch": 235.8597014925373, "grad_norm": 15.383299827575684, "learning_rate": 9.771428571428571e-06, "loss": 36.9653, "step": 9906 }, { "epoch": 235.88358208955225, "grad_norm": 15.790655136108398, "learning_rate": 9.770476190476192e-06, "loss": 36.1127, "step": 9907 }, { "epoch": 235.90746268656716, "grad_norm": 19.14923858642578, "learning_rate": 9.769523809523809e-06, "loss": 35.8418, "step": 9908 }, { "epoch": 235.9313432835821, "grad_norm": 17.147768020629883, "learning_rate": 9.76857142857143e-06, "loss": 36.0894, "step": 9909 }, { "epoch": 235.955223880597, "grad_norm": 16.384170532226562, "learning_rate": 9.767619047619049e-06, "loss": 35.4527, "step": 9910 }, { "epoch": 235.97910447761194, "grad_norm": 21.327133178710938, "learning_rate": 9.766666666666667e-06, "loss": 36.346, "step": 9911 }, { "epoch": 236.0, "grad_norm": 16.97562599182129, "learning_rate": 9.765714285714286e-06, "loss": 31.1273, "step": 9912 }, { "epoch": 236.02388059701494, "grad_norm": 16.29657554626465, "learning_rate": 9.764761904761905e-06, "loss": 37.1635, "step": 9913 }, { "epoch": 236.04776119402985, "grad_norm": 22.797019958496094, "learning_rate": 9.763809523809524e-06, "loss": 36.2798, "step": 9914 }, { "epoch": 236.07164179104478, "grad_norm": 15.591317176818848, "learning_rate": 9.762857142857145e-06, "loss": 35.9309, "step": 9915 }, { "epoch": 236.0955223880597, "grad_norm": 20.136259078979492, "learning_rate": 9.761904761904762e-06, "loss": 36.8937, "step": 9916 }, { "epoch": 236.11940298507463, "grad_norm": 16.15099334716797, "learning_rate": 9.760952380952383e-06, "loss": 34.3163, "step": 9917 }, { "epoch": 236.14328358208957, "grad_norm": 18.136035919189453, "learning_rate": 9.760000000000001e-06, "loss": 34.7456, "step": 9918 }, { "epoch": 236.16716417910447, "grad_norm": 12.46219253540039, "learning_rate": 9.75904761904762e-06, "loss": 36.1773, "step": 9919 }, { "epoch": 236.1910447761194, "grad_norm": 16.850460052490234, "learning_rate": 9.75809523809524e-06, "loss": 36.8295, "step": 9920 }, { "epoch": 236.21492537313432, "grad_norm": 18.266088485717773, "learning_rate": 9.757142857142858e-06, "loss": 35.9171, "step": 9921 }, { "epoch": 236.23880597014926, "grad_norm": 14.196195602416992, "learning_rate": 9.756190476190477e-06, "loss": 36.9765, "step": 9922 }, { "epoch": 236.26268656716417, "grad_norm": 19.22831153869629, "learning_rate": 9.755238095238096e-06, "loss": 37.2524, "step": 9923 }, { "epoch": 236.2865671641791, "grad_norm": 17.952713012695312, "learning_rate": 9.754285714285715e-06, "loss": 36.3808, "step": 9924 }, { "epoch": 236.31044776119404, "grad_norm": 12.137635231018066, "learning_rate": 9.753333333333335e-06, "loss": 36.627, "step": 9925 }, { "epoch": 236.33432835820895, "grad_norm": 18.9188289642334, "learning_rate": 9.752380952380953e-06, "loss": 36.0876, "step": 9926 }, { "epoch": 236.3582089552239, "grad_norm": 21.2408390045166, "learning_rate": 9.751428571428571e-06, "loss": 34.7637, "step": 9927 }, { "epoch": 236.3820895522388, "grad_norm": 13.691661834716797, "learning_rate": 9.750476190476192e-06, "loss": 36.667, "step": 9928 }, { "epoch": 236.40597014925373, "grad_norm": 17.704147338867188, "learning_rate": 9.74952380952381e-06, "loss": 35.2197, "step": 9929 }, { "epoch": 236.42985074626867, "grad_norm": 18.488739013671875, "learning_rate": 9.74857142857143e-06, "loss": 36.3589, "step": 9930 }, { "epoch": 236.45373134328358, "grad_norm": 16.424970626831055, "learning_rate": 9.747619047619049e-06, "loss": 37.2576, "step": 9931 }, { "epoch": 236.47761194029852, "grad_norm": 13.38017463684082, "learning_rate": 9.746666666666668e-06, "loss": 34.6685, "step": 9932 }, { "epoch": 236.50149253731342, "grad_norm": 19.310014724731445, "learning_rate": 9.745714285714287e-06, "loss": 36.7496, "step": 9933 }, { "epoch": 236.52537313432836, "grad_norm": 17.776432037353516, "learning_rate": 9.744761904761905e-06, "loss": 36.1067, "step": 9934 }, { "epoch": 236.54925373134327, "grad_norm": 17.462303161621094, "learning_rate": 9.743809523809524e-06, "loss": 35.9356, "step": 9935 }, { "epoch": 236.5731343283582, "grad_norm": 12.972521781921387, "learning_rate": 9.742857142857143e-06, "loss": 36.8305, "step": 9936 }, { "epoch": 236.59701492537314, "grad_norm": 19.28449249267578, "learning_rate": 9.741904761904762e-06, "loss": 37.2609, "step": 9937 }, { "epoch": 236.62089552238805, "grad_norm": 15.885763168334961, "learning_rate": 9.740952380952383e-06, "loss": 37.182, "step": 9938 }, { "epoch": 236.644776119403, "grad_norm": 19.846704483032227, "learning_rate": 9.74e-06, "loss": 35.3155, "step": 9939 }, { "epoch": 236.6686567164179, "grad_norm": 15.600286483764648, "learning_rate": 9.73904761904762e-06, "loss": 36.9865, "step": 9940 }, { "epoch": 236.69253731343284, "grad_norm": 15.757110595703125, "learning_rate": 9.73809523809524e-06, "loss": 35.2183, "step": 9941 }, { "epoch": 236.71641791044777, "grad_norm": 16.8973388671875, "learning_rate": 9.737142857142858e-06, "loss": 37.0222, "step": 9942 }, { "epoch": 236.74029850746268, "grad_norm": 16.777618408203125, "learning_rate": 9.736190476190477e-06, "loss": 36.3856, "step": 9943 }, { "epoch": 236.76417910447762, "grad_norm": 18.161388397216797, "learning_rate": 9.735238095238096e-06, "loss": 36.7569, "step": 9944 }, { "epoch": 236.78805970149253, "grad_norm": 16.15582847595215, "learning_rate": 9.734285714285715e-06, "loss": 36.9244, "step": 9945 }, { "epoch": 236.81194029850747, "grad_norm": 18.0284423828125, "learning_rate": 9.733333333333334e-06, "loss": 35.4237, "step": 9946 }, { "epoch": 236.83582089552237, "grad_norm": 16.975326538085938, "learning_rate": 9.732380952380953e-06, "loss": 35.6616, "step": 9947 }, { "epoch": 236.8597014925373, "grad_norm": 14.613351821899414, "learning_rate": 9.731428571428573e-06, "loss": 35.9004, "step": 9948 }, { "epoch": 236.88358208955225, "grad_norm": 18.295223236083984, "learning_rate": 9.73047619047619e-06, "loss": 35.7258, "step": 9949 }, { "epoch": 236.90746268656716, "grad_norm": 15.034687042236328, "learning_rate": 9.72952380952381e-06, "loss": 35.9824, "step": 9950 }, { "epoch": 236.9313432835821, "grad_norm": 17.6257381439209, "learning_rate": 9.72857142857143e-06, "loss": 35.0237, "step": 9951 }, { "epoch": 236.955223880597, "grad_norm": 16.51543426513672, "learning_rate": 9.727619047619047e-06, "loss": 35.6532, "step": 9952 }, { "epoch": 236.97910447761194, "grad_norm": 16.309326171875, "learning_rate": 9.726666666666668e-06, "loss": 36.6841, "step": 9953 }, { "epoch": 237.0, "grad_norm": 13.546600341796875, "learning_rate": 9.725714285714287e-06, "loss": 33.0049, "step": 9954 }, { "epoch": 237.02388059701494, "grad_norm": 15.856393814086914, "learning_rate": 9.724761904761906e-06, "loss": 35.8076, "step": 9955 }, { "epoch": 237.04776119402985, "grad_norm": 17.967824935913086, "learning_rate": 9.723809523809525e-06, "loss": 36.4542, "step": 9956 }, { "epoch": 237.07164179104478, "grad_norm": 18.078475952148438, "learning_rate": 9.722857142857143e-06, "loss": 36.9038, "step": 9957 }, { "epoch": 237.0955223880597, "grad_norm": 14.310022354125977, "learning_rate": 9.721904761904762e-06, "loss": 35.1829, "step": 9958 }, { "epoch": 237.11940298507463, "grad_norm": 27.364797592163086, "learning_rate": 9.720952380952381e-06, "loss": 35.9449, "step": 9959 }, { "epoch": 237.14328358208957, "grad_norm": 17.1212158203125, "learning_rate": 9.72e-06, "loss": 37.6461, "step": 9960 }, { "epoch": 237.16716417910447, "grad_norm": 26.285417556762695, "learning_rate": 9.71904761904762e-06, "loss": 34.547, "step": 9961 }, { "epoch": 237.1910447761194, "grad_norm": 19.969297409057617, "learning_rate": 9.718095238095238e-06, "loss": 35.9812, "step": 9962 }, { "epoch": 237.21492537313432, "grad_norm": 21.263275146484375, "learning_rate": 9.717142857142858e-06, "loss": 36.5746, "step": 9963 }, { "epoch": 237.23880597014926, "grad_norm": 26.24778938293457, "learning_rate": 9.716190476190477e-06, "loss": 35.2941, "step": 9964 }, { "epoch": 237.26268656716417, "grad_norm": 17.686864852905273, "learning_rate": 9.715238095238096e-06, "loss": 35.4527, "step": 9965 }, { "epoch": 237.2865671641791, "grad_norm": 27.805757522583008, "learning_rate": 9.714285714285715e-06, "loss": 36.5016, "step": 9966 }, { "epoch": 237.31044776119404, "grad_norm": 19.451583862304688, "learning_rate": 9.713333333333334e-06, "loss": 35.4458, "step": 9967 }, { "epoch": 237.33432835820895, "grad_norm": 25.279926300048828, "learning_rate": 9.712380952380953e-06, "loss": 35.3307, "step": 9968 }, { "epoch": 237.3582089552239, "grad_norm": 19.426441192626953, "learning_rate": 9.711428571428574e-06, "loss": 35.482, "step": 9969 }, { "epoch": 237.3820895522388, "grad_norm": 22.436826705932617, "learning_rate": 9.71047619047619e-06, "loss": 36.923, "step": 9970 }, { "epoch": 237.40597014925373, "grad_norm": 22.07547950744629, "learning_rate": 9.70952380952381e-06, "loss": 36.1381, "step": 9971 }, { "epoch": 237.42985074626867, "grad_norm": 17.01164436340332, "learning_rate": 9.70857142857143e-06, "loss": 35.5353, "step": 9972 }, { "epoch": 237.45373134328358, "grad_norm": 21.813730239868164, "learning_rate": 9.707619047619047e-06, "loss": 36.1357, "step": 9973 }, { "epoch": 237.47761194029852, "grad_norm": 22.933889389038086, "learning_rate": 9.706666666666668e-06, "loss": 35.8501, "step": 9974 }, { "epoch": 237.50149253731342, "grad_norm": 15.256769180297852, "learning_rate": 9.705714285714287e-06, "loss": 37.1791, "step": 9975 }, { "epoch": 237.52537313432836, "grad_norm": 19.60181427001953, "learning_rate": 9.704761904761906e-06, "loss": 36.2048, "step": 9976 }, { "epoch": 237.54925373134327, "grad_norm": 18.724287033081055, "learning_rate": 9.703809523809525e-06, "loss": 35.2301, "step": 9977 }, { "epoch": 237.5731343283582, "grad_norm": 15.75432014465332, "learning_rate": 9.702857142857144e-06, "loss": 36.4579, "step": 9978 }, { "epoch": 237.59701492537314, "grad_norm": 22.569398880004883, "learning_rate": 9.701904761904763e-06, "loss": 37.6286, "step": 9979 }, { "epoch": 237.62089552238805, "grad_norm": 18.756433486938477, "learning_rate": 9.700952380952381e-06, "loss": 37.0072, "step": 9980 }, { "epoch": 237.644776119403, "grad_norm": 15.693806648254395, "learning_rate": 9.7e-06, "loss": 36.7072, "step": 9981 }, { "epoch": 237.6686567164179, "grad_norm": 21.104143142700195, "learning_rate": 9.699047619047621e-06, "loss": 35.6147, "step": 9982 }, { "epoch": 237.69253731343284, "grad_norm": 18.04296875, "learning_rate": 9.698095238095238e-06, "loss": 36.7233, "step": 9983 }, { "epoch": 237.71641791044777, "grad_norm": 16.26370620727539, "learning_rate": 9.697142857142859e-06, "loss": 35.6375, "step": 9984 }, { "epoch": 237.74029850746268, "grad_norm": 14.69483470916748, "learning_rate": 9.696190476190478e-06, "loss": 35.7502, "step": 9985 }, { "epoch": 237.76417910447762, "grad_norm": 19.199661254882812, "learning_rate": 9.695238095238096e-06, "loss": 36.5614, "step": 9986 }, { "epoch": 237.78805970149253, "grad_norm": 15.763886451721191, "learning_rate": 9.694285714285715e-06, "loss": 36.1778, "step": 9987 }, { "epoch": 237.81194029850747, "grad_norm": 17.95053482055664, "learning_rate": 9.693333333333334e-06, "loss": 36.6272, "step": 9988 }, { "epoch": 237.83582089552237, "grad_norm": 15.827825546264648, "learning_rate": 9.692380952380953e-06, "loss": 36.4755, "step": 9989 }, { "epoch": 237.8597014925373, "grad_norm": 18.18069076538086, "learning_rate": 9.691428571428572e-06, "loss": 36.8826, "step": 9990 }, { "epoch": 237.88358208955225, "grad_norm": 17.14325523376465, "learning_rate": 9.690476190476191e-06, "loss": 37.1303, "step": 9991 }, { "epoch": 237.90746268656716, "grad_norm": 18.050926208496094, "learning_rate": 9.68952380952381e-06, "loss": 36.1544, "step": 9992 }, { "epoch": 237.9313432835821, "grad_norm": 17.636999130249023, "learning_rate": 9.688571428571429e-06, "loss": 36.0903, "step": 9993 }, { "epoch": 237.955223880597, "grad_norm": 15.995038986206055, "learning_rate": 9.687619047619048e-06, "loss": 35.2946, "step": 9994 }, { "epoch": 237.97910447761194, "grad_norm": 16.34441566467285, "learning_rate": 9.686666666666668e-06, "loss": 35.9815, "step": 9995 }, { "epoch": 238.0, "grad_norm": 14.362862586975098, "learning_rate": 9.685714285714285e-06, "loss": 31.6897, "step": 9996 }, { "epoch": 238.02388059701494, "grad_norm": 18.762998580932617, "learning_rate": 9.684761904761906e-06, "loss": 36.6022, "step": 9997 }, { "epoch": 238.04776119402985, "grad_norm": 19.1278133392334, "learning_rate": 9.683809523809525e-06, "loss": 35.4117, "step": 9998 }, { "epoch": 238.07164179104478, "grad_norm": 14.328730583190918, "learning_rate": 9.682857142857144e-06, "loss": 35.1485, "step": 9999 }, { "epoch": 238.0955223880597, "grad_norm": 19.069643020629883, "learning_rate": 9.681904761904763e-06, "loss": 36.3664, "step": 10000 }, { "epoch": 238.11940298507463, "grad_norm": 18.38530158996582, "learning_rate": 9.680952380952382e-06, "loss": 37.0273, "step": 10001 }, { "epoch": 238.14328358208957, "grad_norm": 18.504533767700195, "learning_rate": 9.68e-06, "loss": 35.2123, "step": 10002 }, { "epoch": 238.16716417910447, "grad_norm": 16.1649112701416, "learning_rate": 9.67904761904762e-06, "loss": 37.3847, "step": 10003 }, { "epoch": 238.1910447761194, "grad_norm": 18.605621337890625, "learning_rate": 9.678095238095238e-06, "loss": 36.542, "step": 10004 }, { "epoch": 238.21492537313432, "grad_norm": 18.89537239074707, "learning_rate": 9.677142857142859e-06, "loss": 35.8042, "step": 10005 }, { "epoch": 238.23880597014926, "grad_norm": 16.746604919433594, "learning_rate": 9.676190476190476e-06, "loss": 36.2674, "step": 10006 }, { "epoch": 238.26268656716417, "grad_norm": 20.042264938354492, "learning_rate": 9.675238095238097e-06, "loss": 37.1588, "step": 10007 }, { "epoch": 238.2865671641791, "grad_norm": 17.67850685119629, "learning_rate": 9.674285714285716e-06, "loss": 36.817, "step": 10008 }, { "epoch": 238.31044776119404, "grad_norm": 16.53818130493164, "learning_rate": 9.673333333333334e-06, "loss": 36.0362, "step": 10009 }, { "epoch": 238.33432835820895, "grad_norm": 16.945322036743164, "learning_rate": 9.672380952380953e-06, "loss": 36.4225, "step": 10010 }, { "epoch": 238.3582089552239, "grad_norm": 16.241308212280273, "learning_rate": 9.671428571428572e-06, "loss": 36.6693, "step": 10011 }, { "epoch": 238.3820895522388, "grad_norm": 21.750402450561523, "learning_rate": 9.670476190476191e-06, "loss": 37.7203, "step": 10012 }, { "epoch": 238.40597014925373, "grad_norm": 13.78532600402832, "learning_rate": 9.66952380952381e-06, "loss": 36.0114, "step": 10013 }, { "epoch": 238.42985074626867, "grad_norm": 14.291070938110352, "learning_rate": 9.668571428571429e-06, "loss": 35.7391, "step": 10014 }, { "epoch": 238.45373134328358, "grad_norm": 17.091083526611328, "learning_rate": 9.667619047619048e-06, "loss": 37.235, "step": 10015 }, { "epoch": 238.47761194029852, "grad_norm": 18.903963088989258, "learning_rate": 9.666666666666667e-06, "loss": 35.3213, "step": 10016 }, { "epoch": 238.50149253731342, "grad_norm": 17.843887329101562, "learning_rate": 9.665714285714286e-06, "loss": 35.7046, "step": 10017 }, { "epoch": 238.52537313432836, "grad_norm": 16.645095825195312, "learning_rate": 9.664761904761906e-06, "loss": 36.8381, "step": 10018 }, { "epoch": 238.54925373134327, "grad_norm": 17.44959831237793, "learning_rate": 9.663809523809523e-06, "loss": 35.6872, "step": 10019 }, { "epoch": 238.5731343283582, "grad_norm": NaN, "learning_rate": 9.662857142857144e-06, "loss": 35.9578, "step": 10020 }, { "epoch": 238.59701492537314, "grad_norm": 17.336702346801758, "learning_rate": 9.662857142857144e-06, "loss": 37.0041, "step": 10021 }, { "epoch": 238.62089552238805, "grad_norm": 14.818805694580078, "learning_rate": 9.661904761904763e-06, "loss": 36.8163, "step": 10022 }, { "epoch": 238.644776119403, "grad_norm": 14.351116180419922, "learning_rate": 9.660952380952382e-06, "loss": 34.632, "step": 10023 }, { "epoch": 238.6686567164179, "grad_norm": 16.829465866088867, "learning_rate": 9.66e-06, "loss": 35.9618, "step": 10024 }, { "epoch": 238.69253731343284, "grad_norm": 18.949562072753906, "learning_rate": 9.65904761904762e-06, "loss": 37.1074, "step": 10025 }, { "epoch": 238.71641791044777, "grad_norm": 18.230022430419922, "learning_rate": 9.658095238095238e-06, "loss": 36.8739, "step": 10026 }, { "epoch": 238.74029850746268, "grad_norm": 14.652442932128906, "learning_rate": 9.657142857142859e-06, "loss": 35.2269, "step": 10027 }, { "epoch": 238.76417910447762, "grad_norm": 18.973373413085938, "learning_rate": 9.656190476190476e-06, "loss": 35.2157, "step": 10028 }, { "epoch": 238.78805970149253, "grad_norm": 20.56492042541504, "learning_rate": 9.655238095238097e-06, "loss": 35.1376, "step": 10029 }, { "epoch": 238.81194029850747, "grad_norm": 16.110862731933594, "learning_rate": 9.654285714285716e-06, "loss": 34.2289, "step": 10030 }, { "epoch": 238.83582089552237, "grad_norm": 13.740087509155273, "learning_rate": 9.653333333333335e-06, "loss": 34.809, "step": 10031 }, { "epoch": 238.8597014925373, "grad_norm": 23.522594451904297, "learning_rate": 9.652380952380954e-06, "loss": 36.9026, "step": 10032 }, { "epoch": 238.88358208955225, "grad_norm": 18.54004669189453, "learning_rate": 9.651428571428572e-06, "loss": 35.559, "step": 10033 }, { "epoch": 238.90746268656716, "grad_norm": 13.475213050842285, "learning_rate": 9.650476190476191e-06, "loss": 35.8268, "step": 10034 }, { "epoch": 238.9313432835821, "grad_norm": 16.256486892700195, "learning_rate": 9.64952380952381e-06, "loss": 36.4832, "step": 10035 }, { "epoch": 238.955223880597, "grad_norm": 17.91010284423828, "learning_rate": 9.648571428571429e-06, "loss": 36.0897, "step": 10036 }, { "epoch": 238.97910447761194, "grad_norm": 15.188232421875, "learning_rate": 9.647619047619048e-06, "loss": 35.4762, "step": 10037 }, { "epoch": 239.0, "grad_norm": 16.662385940551758, "learning_rate": 9.646666666666667e-06, "loss": 31.8471, "step": 10038 }, { "epoch": 239.02388059701494, "grad_norm": 17.247777938842773, "learning_rate": 9.645714285714286e-06, "loss": 35.1285, "step": 10039 }, { "epoch": 239.04776119402985, "grad_norm": 18.966320037841797, "learning_rate": 9.644761904761906e-06, "loss": 36.6666, "step": 10040 }, { "epoch": 239.07164179104478, "grad_norm": 23.5268611907959, "learning_rate": 9.643809523809524e-06, "loss": 37.0704, "step": 10041 }, { "epoch": 239.0955223880597, "grad_norm": 12.94433307647705, "learning_rate": 9.642857142857144e-06, "loss": 37.5156, "step": 10042 }, { "epoch": 239.11940298507463, "grad_norm": 24.97050666809082, "learning_rate": 9.641904761904763e-06, "loss": 35.2346, "step": 10043 }, { "epoch": 239.14328358208957, "grad_norm": 19.386783599853516, "learning_rate": 9.640952380952382e-06, "loss": 35.1441, "step": 10044 }, { "epoch": 239.16716417910447, "grad_norm": 17.285106658935547, "learning_rate": 9.640000000000001e-06, "loss": 36.4102, "step": 10045 }, { "epoch": 239.1910447761194, "grad_norm": 21.81467056274414, "learning_rate": 9.63904761904762e-06, "loss": 36.1376, "step": 10046 }, { "epoch": 239.21492537313432, "grad_norm": 18.64617919921875, "learning_rate": 9.638095238095239e-06, "loss": 35.7971, "step": 10047 }, { "epoch": 239.23880597014926, "grad_norm": 16.2796688079834, "learning_rate": 9.637142857142858e-06, "loss": 35.9561, "step": 10048 }, { "epoch": 239.26268656716417, "grad_norm": 20.983346939086914, "learning_rate": 9.636190476190476e-06, "loss": 37.1588, "step": 10049 }, { "epoch": 239.2865671641791, "grad_norm": 18.213701248168945, "learning_rate": 9.635238095238097e-06, "loss": 37.4883, "step": 10050 }, { "epoch": 239.31044776119404, "grad_norm": 19.881954193115234, "learning_rate": 9.634285714285714e-06, "loss": 36.383, "step": 10051 }, { "epoch": 239.33432835820895, "grad_norm": 14.353389739990234, "learning_rate": 9.633333333333335e-06, "loss": 35.5836, "step": 10052 }, { "epoch": 239.3582089552239, "grad_norm": 23.765892028808594, "learning_rate": 9.632380952380954e-06, "loss": 36.0893, "step": 10053 }, { "epoch": 239.3820895522388, "grad_norm": 16.295591354370117, "learning_rate": 9.631428571428573e-06, "loss": 35.1261, "step": 10054 }, { "epoch": 239.40597014925373, "grad_norm": 27.36651039123535, "learning_rate": 9.630476190476192e-06, "loss": 37.2512, "step": 10055 }, { "epoch": 239.42985074626867, "grad_norm": 20.327367782592773, "learning_rate": 9.62952380952381e-06, "loss": 36.1105, "step": 10056 }, { "epoch": 239.45373134328358, "grad_norm": 18.55379295349121, "learning_rate": 9.62857142857143e-06, "loss": 35.7757, "step": 10057 }, { "epoch": 239.47761194029852, "grad_norm": 20.76352310180664, "learning_rate": 9.627619047619048e-06, "loss": 36.8951, "step": 10058 }, { "epoch": 239.50149253731342, "grad_norm": 18.17314910888672, "learning_rate": 9.626666666666667e-06, "loss": 36.5615, "step": 10059 }, { "epoch": 239.52537313432836, "grad_norm": 23.08365249633789, "learning_rate": 9.625714285714286e-06, "loss": 35.4985, "step": 10060 }, { "epoch": 239.54925373134327, "grad_norm": 20.333942413330078, "learning_rate": 9.624761904761905e-06, "loss": 34.9326, "step": 10061 }, { "epoch": 239.5731343283582, "grad_norm": 19.267133712768555, "learning_rate": 9.623809523809524e-06, "loss": 36.3737, "step": 10062 }, { "epoch": 239.59701492537314, "grad_norm": 19.04371452331543, "learning_rate": 9.622857142857144e-06, "loss": 37.6879, "step": 10063 }, { "epoch": 239.62089552238805, "grad_norm": 22.713504791259766, "learning_rate": 9.621904761904762e-06, "loss": 34.4376, "step": 10064 }, { "epoch": 239.644776119403, "grad_norm": 17.133039474487305, "learning_rate": 9.620952380952382e-06, "loss": 37.9597, "step": 10065 }, { "epoch": 239.6686567164179, "grad_norm": 25.80938720703125, "learning_rate": 9.620000000000001e-06, "loss": 34.1879, "step": 10066 }, { "epoch": 239.69253731343284, "grad_norm": 20.448535919189453, "learning_rate": 9.61904761904762e-06, "loss": 36.6585, "step": 10067 }, { "epoch": 239.71641791044777, "grad_norm": 20.890085220336914, "learning_rate": 9.618095238095239e-06, "loss": 35.2319, "step": 10068 }, { "epoch": 239.74029850746268, "grad_norm": 29.210161209106445, "learning_rate": 9.617142857142858e-06, "loss": 35.5395, "step": 10069 }, { "epoch": 239.76417910447762, "grad_norm": 22.1795597076416, "learning_rate": 9.616190476190477e-06, "loss": 35.7438, "step": 10070 }, { "epoch": 239.78805970149253, "grad_norm": 27.816953659057617, "learning_rate": 9.615238095238096e-06, "loss": 36.204, "step": 10071 }, { "epoch": 239.81194029850747, "grad_norm": 26.449193954467773, "learning_rate": 9.614285714285714e-06, "loss": 36.5446, "step": 10072 }, { "epoch": 239.83582089552237, "grad_norm": 27.123445510864258, "learning_rate": 9.613333333333335e-06, "loss": 36.8264, "step": 10073 }, { "epoch": 239.8597014925373, "grad_norm": 17.471189498901367, "learning_rate": 9.612380952380952e-06, "loss": 35.4465, "step": 10074 }, { "epoch": 239.88358208955225, "grad_norm": 22.181690216064453, "learning_rate": 9.611428571428573e-06, "loss": 36.093, "step": 10075 }, { "epoch": 239.90746268656716, "grad_norm": 23.73118019104004, "learning_rate": 9.610476190476192e-06, "loss": 35.5252, "step": 10076 }, { "epoch": 239.9313432835821, "grad_norm": 17.87926483154297, "learning_rate": 9.60952380952381e-06, "loss": 35.8809, "step": 10077 }, { "epoch": 239.955223880597, "grad_norm": 25.186649322509766, "learning_rate": 9.60857142857143e-06, "loss": 35.1261, "step": 10078 }, { "epoch": 239.97910447761194, "grad_norm": 18.682348251342773, "learning_rate": 9.607619047619048e-06, "loss": 36.4747, "step": 10079 }, { "epoch": 240.0, "grad_norm": 14.701883316040039, "learning_rate": 9.606666666666667e-06, "loss": 30.8581, "step": 10080 }, { "epoch": 240.02388059701494, "grad_norm": 24.948392868041992, "learning_rate": 9.605714285714286e-06, "loss": 35.7126, "step": 10081 }, { "epoch": 240.04776119402985, "grad_norm": 16.947065353393555, "learning_rate": 9.604761904761905e-06, "loss": 36.3141, "step": 10082 }, { "epoch": 240.07164179104478, "grad_norm": 21.494142532348633, "learning_rate": 9.603809523809524e-06, "loss": 35.4828, "step": 10083 }, { "epoch": 240.0955223880597, "grad_norm": 26.018396377563477, "learning_rate": 9.602857142857145e-06, "loss": 36.1045, "step": 10084 }, { "epoch": 240.11940298507463, "grad_norm": 16.760507583618164, "learning_rate": 9.601904761904762e-06, "loss": 36.3321, "step": 10085 }, { "epoch": 240.14328358208957, "grad_norm": 28.821687698364258, "learning_rate": 9.600952380952382e-06, "loss": 35.164, "step": 10086 }, { "epoch": 240.16716417910447, "grad_norm": 23.643774032592773, "learning_rate": 9.600000000000001e-06, "loss": 36.3261, "step": 10087 }, { "epoch": 240.1910447761194, "grad_norm": 32.337833404541016, "learning_rate": 9.59904761904762e-06, "loss": 36.7649, "step": 10088 }, { "epoch": 240.21492537313432, "grad_norm": 19.09001350402832, "learning_rate": 9.598095238095239e-06, "loss": 36.0283, "step": 10089 }, { "epoch": 240.23880597014926, "grad_norm": 37.15082931518555, "learning_rate": 9.597142857142858e-06, "loss": 34.712, "step": 10090 }, { "epoch": 240.26268656716417, "grad_norm": 27.156841278076172, "learning_rate": 9.596190476190477e-06, "loss": 36.0739, "step": 10091 }, { "epoch": 240.2865671641791, "grad_norm": 40.636470794677734, "learning_rate": 9.595238095238096e-06, "loss": 36.5028, "step": 10092 }, { "epoch": 240.31044776119404, "grad_norm": 30.511669158935547, "learning_rate": 9.594285714285715e-06, "loss": 36.1824, "step": 10093 }, { "epoch": 240.33432835820895, "grad_norm": 35.35037612915039, "learning_rate": 9.593333333333335e-06, "loss": 36.3874, "step": 10094 }, { "epoch": 240.3582089552239, "grad_norm": 31.171030044555664, "learning_rate": 9.592380952380952e-06, "loss": 36.1055, "step": 10095 }, { "epoch": 240.3820895522388, "grad_norm": 28.244115829467773, "learning_rate": 9.591428571428573e-06, "loss": 36.846, "step": 10096 }, { "epoch": 240.40597014925373, "grad_norm": 25.969768524169922, "learning_rate": 9.590476190476192e-06, "loss": 34.7639, "step": 10097 }, { "epoch": 240.42985074626867, "grad_norm": 32.089637756347656, "learning_rate": 9.58952380952381e-06, "loss": 35.7644, "step": 10098 }, { "epoch": 240.45373134328358, "grad_norm": 26.51710319519043, "learning_rate": 9.58857142857143e-06, "loss": 35.1346, "step": 10099 }, { "epoch": 240.47761194029852, "grad_norm": 32.1282958984375, "learning_rate": 9.587619047619049e-06, "loss": 35.4712, "step": 10100 }, { "epoch": 240.50149253731342, "grad_norm": 25.199325561523438, "learning_rate": 9.586666666666667e-06, "loss": 36.1035, "step": 10101 }, { "epoch": 240.52537313432836, "grad_norm": 35.87451171875, "learning_rate": 9.585714285714286e-06, "loss": 37.1188, "step": 10102 }, { "epoch": 240.54925373134327, "grad_norm": 30.406360626220703, "learning_rate": 9.584761904761905e-06, "loss": 36.0955, "step": 10103 }, { "epoch": 240.5731343283582, "grad_norm": 32.87398147583008, "learning_rate": 9.583809523809524e-06, "loss": 36.6677, "step": 10104 }, { "epoch": 240.59701492537314, "grad_norm": 27.0983829498291, "learning_rate": 9.582857142857143e-06, "loss": 36.1804, "step": 10105 }, { "epoch": 240.62089552238805, "grad_norm": 29.340635299682617, "learning_rate": 9.581904761904762e-06, "loss": 37.3455, "step": 10106 }, { "epoch": 240.644776119403, "grad_norm": 26.483362197875977, "learning_rate": 9.580952380952383e-06, "loss": 36.3505, "step": 10107 }, { "epoch": 240.6686567164179, "grad_norm": 29.381149291992188, "learning_rate": 9.58e-06, "loss": 36.7096, "step": 10108 }, { "epoch": 240.69253731343284, "grad_norm": 25.296951293945312, "learning_rate": 9.57904761904762e-06, "loss": 35.6373, "step": 10109 }, { "epoch": 240.71641791044777, "grad_norm": 33.18749237060547, "learning_rate": 9.57809523809524e-06, "loss": 37.1643, "step": 10110 }, { "epoch": 240.74029850746268, "grad_norm": 27.313159942626953, "learning_rate": 9.577142857142858e-06, "loss": 35.2829, "step": 10111 }, { "epoch": 240.76417910447762, "grad_norm": 29.12482452392578, "learning_rate": 9.576190476190477e-06, "loss": 36.1714, "step": 10112 }, { "epoch": 240.78805970149253, "grad_norm": 26.63406753540039, "learning_rate": 9.575238095238096e-06, "loss": 35.2008, "step": 10113 }, { "epoch": 240.81194029850747, "grad_norm": 26.729719161987305, "learning_rate": 9.574285714285715e-06, "loss": 35.7582, "step": 10114 }, { "epoch": 240.83582089552237, "grad_norm": 23.687339782714844, "learning_rate": 9.573333333333334e-06, "loss": 34.6782, "step": 10115 }, { "epoch": 240.8597014925373, "grad_norm": 31.28934669494629, "learning_rate": 9.572380952380953e-06, "loss": 35.9475, "step": 10116 }, { "epoch": 240.88358208955225, "grad_norm": 27.232389450073242, "learning_rate": 9.571428571428573e-06, "loss": 36.1738, "step": 10117 }, { "epoch": 240.90746268656716, "grad_norm": 33.5478630065918, "learning_rate": 9.57047619047619e-06, "loss": 35.5291, "step": 10118 }, { "epoch": 240.9313432835821, "grad_norm": 31.18778419494629, "learning_rate": 9.569523809523811e-06, "loss": 35.8111, "step": 10119 }, { "epoch": 240.955223880597, "grad_norm": 28.460308074951172, "learning_rate": 9.56857142857143e-06, "loss": 36.3354, "step": 10120 }, { "epoch": 240.97910447761194, "grad_norm": 29.261287689208984, "learning_rate": 9.567619047619049e-06, "loss": 37.0578, "step": 10121 }, { "epoch": 241.0, "grad_norm": 22.982912063598633, "learning_rate": 9.566666666666668e-06, "loss": 31.6339, "step": 10122 }, { "epoch": 241.02388059701494, "grad_norm": 24.988357543945312, "learning_rate": 9.565714285714287e-06, "loss": 36.1846, "step": 10123 }, { "epoch": 241.04776119402985, "grad_norm": 30.265308380126953, "learning_rate": 9.564761904761905e-06, "loss": 35.8522, "step": 10124 }, { "epoch": 241.07164179104478, "grad_norm": 24.690147399902344, "learning_rate": 9.563809523809524e-06, "loss": 35.5594, "step": 10125 }, { "epoch": 241.0955223880597, "grad_norm": 32.83644485473633, "learning_rate": 9.562857142857143e-06, "loss": 35.4214, "step": 10126 }, { "epoch": 241.11940298507463, "grad_norm": 27.488298416137695, "learning_rate": 9.561904761904762e-06, "loss": 37.4046, "step": 10127 }, { "epoch": 241.14328358208957, "grad_norm": 26.66729164123535, "learning_rate": 9.560952380952381e-06, "loss": 36.687, "step": 10128 }, { "epoch": 241.16716417910447, "grad_norm": 27.678674697875977, "learning_rate": 9.56e-06, "loss": 35.4188, "step": 10129 }, { "epoch": 241.1910447761194, "grad_norm": 28.47911262512207, "learning_rate": 9.55904761904762e-06, "loss": 36.3526, "step": 10130 }, { "epoch": 241.21492537313432, "grad_norm": 25.199949264526367, "learning_rate": 9.558095238095238e-06, "loss": 34.3721, "step": 10131 }, { "epoch": 241.23880597014926, "grad_norm": 28.23126983642578, "learning_rate": 9.557142857142858e-06, "loss": 36.4467, "step": 10132 }, { "epoch": 241.26268656716417, "grad_norm": 28.039873123168945, "learning_rate": 9.556190476190477e-06, "loss": 36.2541, "step": 10133 }, { "epoch": 241.2865671641791, "grad_norm": 30.603967666625977, "learning_rate": 9.555238095238096e-06, "loss": 35.7894, "step": 10134 }, { "epoch": 241.31044776119404, "grad_norm": 24.964921951293945, "learning_rate": 9.554285714285715e-06, "loss": 34.9858, "step": 10135 }, { "epoch": 241.33432835820895, "grad_norm": 29.842731475830078, "learning_rate": 9.553333333333334e-06, "loss": 36.6908, "step": 10136 }, { "epoch": 241.3582089552239, "grad_norm": 28.306358337402344, "learning_rate": 9.552380952380953e-06, "loss": 36.9513, "step": 10137 }, { "epoch": 241.3820895522388, "grad_norm": 30.514263153076172, "learning_rate": 9.551428571428573e-06, "loss": 36.1615, "step": 10138 }, { "epoch": 241.40597014925373, "grad_norm": 27.851089477539062, "learning_rate": 9.55047619047619e-06, "loss": 36.9024, "step": 10139 }, { "epoch": 241.42985074626867, "grad_norm": 29.087785720825195, "learning_rate": 9.549523809523811e-06, "loss": 36.2207, "step": 10140 }, { "epoch": 241.45373134328358, "grad_norm": 23.997663497924805, "learning_rate": 9.54857142857143e-06, "loss": 34.5732, "step": 10141 }, { "epoch": 241.47761194029852, "grad_norm": 28.922880172729492, "learning_rate": 9.547619047619049e-06, "loss": 34.9478, "step": 10142 }, { "epoch": 241.50149253731342, "grad_norm": 23.746774673461914, "learning_rate": 9.546666666666668e-06, "loss": 35.9709, "step": 10143 }, { "epoch": 241.52537313432836, "grad_norm": 29.665267944335938, "learning_rate": 9.545714285714287e-06, "loss": 35.6752, "step": 10144 }, { "epoch": 241.54925373134327, "grad_norm": 25.615671157836914, "learning_rate": 9.544761904761906e-06, "loss": 35.5674, "step": 10145 }, { "epoch": 241.5731343283582, "grad_norm": 28.494916915893555, "learning_rate": 9.543809523809525e-06, "loss": 35.2555, "step": 10146 }, { "epoch": 241.59701492537314, "grad_norm": 25.82793426513672, "learning_rate": 9.542857142857143e-06, "loss": 35.0491, "step": 10147 }, { "epoch": 241.62089552238805, "grad_norm": 28.713882446289062, "learning_rate": 9.541904761904762e-06, "loss": 36.6035, "step": 10148 }, { "epoch": 241.644776119403, "grad_norm": 26.348346710205078, "learning_rate": 9.540952380952381e-06, "loss": 34.6533, "step": 10149 }, { "epoch": 241.6686567164179, "grad_norm": 29.28573989868164, "learning_rate": 9.54e-06, "loss": 36.938, "step": 10150 }, { "epoch": 241.69253731343284, "grad_norm": 26.416595458984375, "learning_rate": 9.53904761904762e-06, "loss": 37.1741, "step": 10151 }, { "epoch": 241.71641791044777, "grad_norm": 28.165164947509766, "learning_rate": 9.538095238095238e-06, "loss": 35.7796, "step": 10152 }, { "epoch": 241.74029850746268, "grad_norm": 28.025209426879883, "learning_rate": 9.537142857142859e-06, "loss": 35.9985, "step": 10153 }, { "epoch": 241.76417910447762, "grad_norm": 27.24212074279785, "learning_rate": 9.536190476190477e-06, "loss": 36.0308, "step": 10154 }, { "epoch": 241.78805970149253, "grad_norm": 23.920875549316406, "learning_rate": 9.535238095238096e-06, "loss": 36.5636, "step": 10155 }, { "epoch": 241.81194029850747, "grad_norm": 29.511032104492188, "learning_rate": 9.534285714285715e-06, "loss": 36.862, "step": 10156 }, { "epoch": 241.83582089552237, "grad_norm": 25.657102584838867, "learning_rate": 9.533333333333334e-06, "loss": 35.3501, "step": 10157 }, { "epoch": 241.8597014925373, "grad_norm": 28.469913482666016, "learning_rate": 9.532380952380953e-06, "loss": 35.3241, "step": 10158 }, { "epoch": 241.88358208955225, "grad_norm": 27.132144927978516, "learning_rate": 9.531428571428572e-06, "loss": 35.7612, "step": 10159 }, { "epoch": 241.90746268656716, "grad_norm": 26.227014541625977, "learning_rate": 9.53047619047619e-06, "loss": 36.5643, "step": 10160 }, { "epoch": 241.9313432835821, "grad_norm": 23.205352783203125, "learning_rate": 9.529523809523811e-06, "loss": 37.4714, "step": 10161 }, { "epoch": 241.955223880597, "grad_norm": 32.46830368041992, "learning_rate": 9.528571428571429e-06, "loss": 37.3917, "step": 10162 }, { "epoch": 241.97910447761194, "grad_norm": 26.595823287963867, "learning_rate": 9.52761904761905e-06, "loss": 34.6719, "step": 10163 }, { "epoch": 242.0, "grad_norm": 26.23459815979004, "learning_rate": 9.526666666666668e-06, "loss": 31.4156, "step": 10164 }, { "epoch": 242.02388059701494, "grad_norm": 27.86235237121582, "learning_rate": 9.525714285714287e-06, "loss": 37.0225, "step": 10165 }, { "epoch": 242.04776119402985, "grad_norm": 28.9195613861084, "learning_rate": 9.524761904761906e-06, "loss": 36.7197, "step": 10166 }, { "epoch": 242.07164179104478, "grad_norm": 25.50555419921875, "learning_rate": 9.523809523809525e-06, "loss": 34.8611, "step": 10167 }, { "epoch": 242.0955223880597, "grad_norm": 27.159404754638672, "learning_rate": 9.522857142857144e-06, "loss": 35.2628, "step": 10168 }, { "epoch": 242.11940298507463, "grad_norm": 26.802696228027344, "learning_rate": 9.521904761904763e-06, "loss": 36.4047, "step": 10169 }, { "epoch": 242.14328358208957, "grad_norm": 27.503740310668945, "learning_rate": 9.520952380952381e-06, "loss": 35.6765, "step": 10170 }, { "epoch": 242.16716417910447, "grad_norm": 26.80796241760254, "learning_rate": 9.52e-06, "loss": 35.8367, "step": 10171 }, { "epoch": 242.1910447761194, "grad_norm": 26.51300621032715, "learning_rate": 9.51904761904762e-06, "loss": 35.946, "step": 10172 }, { "epoch": 242.21492537313432, "grad_norm": 23.54167938232422, "learning_rate": 9.518095238095238e-06, "loss": 36.5757, "step": 10173 }, { "epoch": 242.23880597014926, "grad_norm": 28.93527603149414, "learning_rate": 9.517142857142859e-06, "loss": 35.1881, "step": 10174 }, { "epoch": 242.26268656716417, "grad_norm": 23.34199333190918, "learning_rate": 9.516190476190476e-06, "loss": 35.704, "step": 10175 }, { "epoch": 242.2865671641791, "grad_norm": 28.4584903717041, "learning_rate": 9.515238095238097e-06, "loss": 34.5494, "step": 10176 }, { "epoch": 242.31044776119404, "grad_norm": 24.635061264038086, "learning_rate": 9.514285714285715e-06, "loss": 35.07, "step": 10177 }, { "epoch": 242.33432835820895, "grad_norm": 27.54088020324707, "learning_rate": 9.513333333333334e-06, "loss": 36.0655, "step": 10178 }, { "epoch": 242.3582089552239, "grad_norm": 27.176959991455078, "learning_rate": 9.512380952380953e-06, "loss": 37.2354, "step": 10179 }, { "epoch": 242.3820895522388, "grad_norm": 28.35399627685547, "learning_rate": 9.511428571428572e-06, "loss": 36.5617, "step": 10180 }, { "epoch": 242.40597014925373, "grad_norm": 25.509428024291992, "learning_rate": 9.510476190476191e-06, "loss": 36.1441, "step": 10181 }, { "epoch": 242.42985074626867, "grad_norm": 26.95767593383789, "learning_rate": 9.50952380952381e-06, "loss": 35.866, "step": 10182 }, { "epoch": 242.45373134328358, "grad_norm": 27.304424285888672, "learning_rate": 9.508571428571429e-06, "loss": 35.9361, "step": 10183 }, { "epoch": 242.47761194029852, "grad_norm": 29.883323669433594, "learning_rate": 9.50761904761905e-06, "loss": 36.4334, "step": 10184 }, { "epoch": 242.50149253731342, "grad_norm": 25.83658218383789, "learning_rate": 9.506666666666667e-06, "loss": 34.6341, "step": 10185 }, { "epoch": 242.52537313432836, "grad_norm": 26.64058494567871, "learning_rate": 9.505714285714287e-06, "loss": 36.2131, "step": 10186 }, { "epoch": 242.54925373134327, "grad_norm": 24.996538162231445, "learning_rate": 9.504761904761906e-06, "loss": 36.6959, "step": 10187 }, { "epoch": 242.5731343283582, "grad_norm": 30.702699661254883, "learning_rate": 9.503809523809523e-06, "loss": 36.7857, "step": 10188 }, { "epoch": 242.59701492537314, "grad_norm": 25.07971954345703, "learning_rate": 9.502857142857144e-06, "loss": 34.2304, "step": 10189 }, { "epoch": 242.62089552238805, "grad_norm": 33.22471237182617, "learning_rate": 9.501904761904763e-06, "loss": 36.0073, "step": 10190 }, { "epoch": 242.644776119403, "grad_norm": 29.005470275878906, "learning_rate": 9.500952380952382e-06, "loss": 36.4248, "step": 10191 }, { "epoch": 242.6686567164179, "grad_norm": 25.369693756103516, "learning_rate": 9.5e-06, "loss": 36.308, "step": 10192 }, { "epoch": 242.69253731343284, "grad_norm": 23.734210968017578, "learning_rate": 9.49904761904762e-06, "loss": 35.9578, "step": 10193 }, { "epoch": 242.71641791044777, "grad_norm": 26.945390701293945, "learning_rate": 9.498095238095238e-06, "loss": 35.776, "step": 10194 }, { "epoch": 242.74029850746268, "grad_norm": 24.644451141357422, "learning_rate": 9.497142857142859e-06, "loss": 35.684, "step": 10195 }, { "epoch": 242.76417910447762, "grad_norm": 29.59189796447754, "learning_rate": 9.496190476190476e-06, "loss": 35.4032, "step": 10196 }, { "epoch": 242.78805970149253, "grad_norm": 25.79729461669922, "learning_rate": 9.495238095238097e-06, "loss": 37.3598, "step": 10197 }, { "epoch": 242.81194029850747, "grad_norm": 26.13395881652832, "learning_rate": 9.494285714285716e-06, "loss": 35.872, "step": 10198 }, { "epoch": 242.83582089552237, "grad_norm": 22.409400939941406, "learning_rate": 9.493333333333334e-06, "loss": 35.7373, "step": 10199 }, { "epoch": 242.8597014925373, "grad_norm": 29.817716598510742, "learning_rate": 9.492380952380953e-06, "loss": 37.0355, "step": 10200 }, { "epoch": 242.88358208955225, "grad_norm": 25.475627899169922, "learning_rate": 9.491428571428572e-06, "loss": 35.3912, "step": 10201 }, { "epoch": 242.90746268656716, "grad_norm": 29.206342697143555, "learning_rate": 9.490476190476191e-06, "loss": 35.9925, "step": 10202 }, { "epoch": 242.9313432835821, "grad_norm": 26.866357803344727, "learning_rate": 9.48952380952381e-06, "loss": 36.1207, "step": 10203 }, { "epoch": 242.955223880597, "grad_norm": 25.74506378173828, "learning_rate": 9.488571428571429e-06, "loss": 35.8876, "step": 10204 }, { "epoch": 242.97910447761194, "grad_norm": 23.352527618408203, "learning_rate": 9.48761904761905e-06, "loss": 35.6897, "step": 10205 }, { "epoch": 243.0, "grad_norm": 24.392724990844727, "learning_rate": 9.486666666666667e-06, "loss": 32.0426, "step": 10206 }, { "epoch": 243.02388059701494, "grad_norm": 24.063154220581055, "learning_rate": 9.485714285714287e-06, "loss": 35.2103, "step": 10207 }, { "epoch": 243.04776119402985, "grad_norm": 27.244365692138672, "learning_rate": 9.484761904761906e-06, "loss": 35.7363, "step": 10208 }, { "epoch": 243.07164179104478, "grad_norm": 24.47391128540039, "learning_rate": 9.483809523809525e-06, "loss": 35.8727, "step": 10209 }, { "epoch": 243.0955223880597, "grad_norm": 26.44282341003418, "learning_rate": 9.482857142857144e-06, "loss": 35.1092, "step": 10210 }, { "epoch": 243.11940298507463, "grad_norm": 21.45967674255371, "learning_rate": 9.481904761904763e-06, "loss": 36.1031, "step": 10211 }, { "epoch": 243.14328358208957, "grad_norm": 25.74978256225586, "learning_rate": 9.480952380952382e-06, "loss": 36.1324, "step": 10212 }, { "epoch": 243.16716417910447, "grad_norm": 22.984970092773438, "learning_rate": 9.48e-06, "loss": 36.8233, "step": 10213 }, { "epoch": 243.1910447761194, "grad_norm": 26.591062545776367, "learning_rate": 9.47904761904762e-06, "loss": 36.2472, "step": 10214 }, { "epoch": 243.21492537313432, "grad_norm": NaN, "learning_rate": 9.478095238095239e-06, "loss": 40.3855, "step": 10215 }, { "epoch": 243.23880597014926, "grad_norm": 24.373151779174805, "learning_rate": 9.478095238095239e-06, "loss": 36.6809, "step": 10216 }, { "epoch": 243.26268656716417, "grad_norm": 27.12587547302246, "learning_rate": 9.477142857142857e-06, "loss": 35.7168, "step": 10217 }, { "epoch": 243.2865671641791, "grad_norm": 22.10392951965332, "learning_rate": 9.476190476190476e-06, "loss": 37.373, "step": 10218 }, { "epoch": 243.31044776119404, "grad_norm": 25.559600830078125, "learning_rate": 9.475238095238097e-06, "loss": 34.4548, "step": 10219 }, { "epoch": 243.33432835820895, "grad_norm": 22.069833755493164, "learning_rate": 9.474285714285714e-06, "loss": 36.0303, "step": 10220 }, { "epoch": 243.3582089552239, "grad_norm": 23.599863052368164, "learning_rate": 9.473333333333335e-06, "loss": 36.3221, "step": 10221 }, { "epoch": 243.3820895522388, "grad_norm": 22.96292495727539, "learning_rate": 9.472380952380954e-06, "loss": 35.2564, "step": 10222 }, { "epoch": 243.40597014925373, "grad_norm": 23.840822219848633, "learning_rate": 9.471428571428572e-06, "loss": 35.5719, "step": 10223 }, { "epoch": 243.42985074626867, "grad_norm": 20.89339256286621, "learning_rate": 9.470476190476191e-06, "loss": 35.5866, "step": 10224 }, { "epoch": 243.45373134328358, "grad_norm": 23.84319496154785, "learning_rate": 9.46952380952381e-06, "loss": 34.2371, "step": 10225 }, { "epoch": 243.47761194029852, "grad_norm": 20.901281356811523, "learning_rate": 9.46857142857143e-06, "loss": 36.5407, "step": 10226 }, { "epoch": 243.50149253731342, "grad_norm": 25.31196403503418, "learning_rate": 9.467619047619048e-06, "loss": 36.6019, "step": 10227 }, { "epoch": 243.52537313432836, "grad_norm": 20.52994728088379, "learning_rate": 9.466666666666667e-06, "loss": 36.518, "step": 10228 }, { "epoch": 243.54925373134327, "grad_norm": 19.71125602722168, "learning_rate": 9.465714285714288e-06, "loss": 35.2616, "step": 10229 }, { "epoch": 243.5731343283582, "grad_norm": 18.73887825012207, "learning_rate": 9.464761904761905e-06, "loss": 36.5793, "step": 10230 }, { "epoch": 243.59701492537314, "grad_norm": 21.38459587097168, "learning_rate": 9.463809523809525e-06, "loss": 35.9661, "step": 10231 }, { "epoch": 243.62089552238805, "grad_norm": 18.351490020751953, "learning_rate": 9.462857142857144e-06, "loss": 36.1207, "step": 10232 }, { "epoch": 243.644776119403, "grad_norm": 23.784090042114258, "learning_rate": 9.461904761904761e-06, "loss": 35.976, "step": 10233 }, { "epoch": 243.6686567164179, "grad_norm": 20.061128616333008, "learning_rate": 9.460952380952382e-06, "loss": 35.8996, "step": 10234 }, { "epoch": 243.69253731343284, "grad_norm": 18.1326847076416, "learning_rate": 9.460000000000001e-06, "loss": 35.8992, "step": 10235 }, { "epoch": 243.71641791044777, "grad_norm": 20.553115844726562, "learning_rate": 9.45904761904762e-06, "loss": 36.8324, "step": 10236 }, { "epoch": 243.74029850746268, "grad_norm": 17.57961654663086, "learning_rate": 9.458095238095239e-06, "loss": 35.5464, "step": 10237 }, { "epoch": 243.76417910447762, "grad_norm": 20.132938385009766, "learning_rate": 9.457142857142858e-06, "loss": 35.262, "step": 10238 }, { "epoch": 243.78805970149253, "grad_norm": 17.194721221923828, "learning_rate": 9.456190476190476e-06, "loss": 35.5574, "step": 10239 }, { "epoch": 243.81194029850747, "grad_norm": 14.782204627990723, "learning_rate": 9.455238095238095e-06, "loss": 36.5212, "step": 10240 }, { "epoch": 243.83582089552237, "grad_norm": 19.932167053222656, "learning_rate": 9.454285714285714e-06, "loss": 35.4265, "step": 10241 }, { "epoch": 243.8597014925373, "grad_norm": 15.766868591308594, "learning_rate": 9.453333333333335e-06, "loss": 36.4644, "step": 10242 }, { "epoch": 243.88358208955225, "grad_norm": 14.553804397583008, "learning_rate": 9.452380952380952e-06, "loss": 36.093, "step": 10243 }, { "epoch": 243.90746268656716, "grad_norm": 14.554533958435059, "learning_rate": 9.451428571428573e-06, "loss": 35.6284, "step": 10244 }, { "epoch": 243.9313432835821, "grad_norm": 18.88169288635254, "learning_rate": 9.450476190476192e-06, "loss": 37.5024, "step": 10245 }, { "epoch": 243.955223880597, "grad_norm": 17.068729400634766, "learning_rate": 9.44952380952381e-06, "loss": 35.2893, "step": 10246 }, { "epoch": 243.97910447761194, "grad_norm": 19.206130981445312, "learning_rate": 9.44857142857143e-06, "loss": 35.7482, "step": 10247 }, { "epoch": 244.0, "grad_norm": 15.875373840332031, "learning_rate": 9.447619047619048e-06, "loss": 31.5993, "step": 10248 }, { "epoch": 244.02388059701494, "grad_norm": NaN, "learning_rate": 9.446666666666667e-06, "loss": 58.6215, "step": 10249 }, { "epoch": 244.04776119402985, "grad_norm": 18.51722526550293, "learning_rate": 9.446666666666667e-06, "loss": 35.8964, "step": 10250 }, { "epoch": 244.07164179104478, "grad_norm": 19.248676300048828, "learning_rate": 9.445714285714288e-06, "loss": 36.2713, "step": 10251 }, { "epoch": 244.0955223880597, "grad_norm": 17.334125518798828, "learning_rate": 9.444761904761905e-06, "loss": 35.1127, "step": 10252 }, { "epoch": 244.11940298507463, "grad_norm": 21.649171829223633, "learning_rate": 9.443809523809526e-06, "loss": 35.9074, "step": 10253 }, { "epoch": 244.14328358208957, "grad_norm": 16.352853775024414, "learning_rate": 9.442857142857144e-06, "loss": 36.4875, "step": 10254 }, { "epoch": 244.16716417910447, "grad_norm": 21.442800521850586, "learning_rate": 9.441904761904762e-06, "loss": 35.6998, "step": 10255 }, { "epoch": 244.1910447761194, "grad_norm": 19.510009765625, "learning_rate": 9.440952380952382e-06, "loss": 36.013, "step": 10256 }, { "epoch": 244.21492537313432, "grad_norm": 21.225709915161133, "learning_rate": 9.440000000000001e-06, "loss": 34.7438, "step": 10257 }, { "epoch": 244.23880597014926, "grad_norm": 19.475543975830078, "learning_rate": 9.43904761904762e-06, "loss": 35.4845, "step": 10258 }, { "epoch": 244.26268656716417, "grad_norm": 17.805879592895508, "learning_rate": 9.438095238095239e-06, "loss": 35.3803, "step": 10259 }, { "epoch": 244.2865671641791, "grad_norm": 18.243566513061523, "learning_rate": 9.437142857142858e-06, "loss": 35.433, "step": 10260 }, { "epoch": 244.31044776119404, "grad_norm": 19.33938217163086, "learning_rate": 9.436190476190477e-06, "loss": 36.3215, "step": 10261 }, { "epoch": 244.33432835820895, "grad_norm": 18.11855125427246, "learning_rate": 9.435238095238096e-06, "loss": 35.5869, "step": 10262 }, { "epoch": 244.3582089552239, "grad_norm": 20.2562255859375, "learning_rate": 9.434285714285714e-06, "loss": 36.9068, "step": 10263 }, { "epoch": 244.3820895522388, "grad_norm": 16.003639221191406, "learning_rate": 9.433333333333335e-06, "loss": 36.0697, "step": 10264 }, { "epoch": 244.40597014925373, "grad_norm": 23.99402618408203, "learning_rate": 9.432380952380952e-06, "loss": 35.1908, "step": 10265 }, { "epoch": 244.42985074626867, "grad_norm": 17.916046142578125, "learning_rate": 9.431428571428573e-06, "loss": 36.2081, "step": 10266 }, { "epoch": 244.45373134328358, "grad_norm": 18.749805450439453, "learning_rate": 9.430476190476192e-06, "loss": 36.253, "step": 10267 }, { "epoch": 244.47761194029852, "grad_norm": 20.116228103637695, "learning_rate": 9.42952380952381e-06, "loss": 36.5355, "step": 10268 }, { "epoch": 244.50149253731342, "grad_norm": 14.6526517868042, "learning_rate": 9.42857142857143e-06, "loss": 35.943, "step": 10269 }, { "epoch": 244.52537313432836, "grad_norm": 18.03446388244629, "learning_rate": 9.427619047619048e-06, "loss": 36.748, "step": 10270 }, { "epoch": 244.54925373134327, "grad_norm": 18.715557098388672, "learning_rate": 9.426666666666667e-06, "loss": 34.953, "step": 10271 }, { "epoch": 244.5731343283582, "grad_norm": 13.508959770202637, "learning_rate": 9.425714285714286e-06, "loss": 35.8598, "step": 10272 }, { "epoch": 244.59701492537314, "grad_norm": 15.250314712524414, "learning_rate": 9.424761904761905e-06, "loss": 36.003, "step": 10273 }, { "epoch": 244.62089552238805, "grad_norm": 18.034427642822266, "learning_rate": 9.423809523809526e-06, "loss": 36.7388, "step": 10274 }, { "epoch": 244.644776119403, "grad_norm": 16.868148803710938, "learning_rate": 9.422857142857143e-06, "loss": 36.4878, "step": 10275 }, { "epoch": 244.6686567164179, "grad_norm": 16.670799255371094, "learning_rate": 9.421904761904763e-06, "loss": 35.4029, "step": 10276 }, { "epoch": 244.69253731343284, "grad_norm": 18.17951202392578, "learning_rate": 9.420952380952382e-06, "loss": 35.147, "step": 10277 }, { "epoch": 244.71641791044777, "grad_norm": 18.339149475097656, "learning_rate": 9.42e-06, "loss": 36.0372, "step": 10278 }, { "epoch": 244.74029850746268, "grad_norm": 18.232181549072266, "learning_rate": 9.41904761904762e-06, "loss": 36.8727, "step": 10279 }, { "epoch": 244.76417910447762, "grad_norm": 13.441219329833984, "learning_rate": 9.418095238095239e-06, "loss": 35.9545, "step": 10280 }, { "epoch": 244.78805970149253, "grad_norm": 19.54502296447754, "learning_rate": 9.417142857142858e-06, "loss": 36.2901, "step": 10281 }, { "epoch": 244.81194029850747, "grad_norm": 15.229294776916504, "learning_rate": 9.416190476190477e-06, "loss": 36.0402, "step": 10282 }, { "epoch": 244.83582089552237, "grad_norm": 20.756439208984375, "learning_rate": 9.415238095238096e-06, "loss": 37.4198, "step": 10283 }, { "epoch": 244.8597014925373, "grad_norm": 17.38568115234375, "learning_rate": 9.414285714285715e-06, "loss": 35.5644, "step": 10284 }, { "epoch": 244.88358208955225, "grad_norm": 16.86087417602539, "learning_rate": 9.413333333333334e-06, "loss": 35.8171, "step": 10285 }, { "epoch": 244.90746268656716, "grad_norm": 18.812097549438477, "learning_rate": 9.412380952380952e-06, "loss": 35.5254, "step": 10286 }, { "epoch": 244.9313432835821, "grad_norm": 15.785964965820312, "learning_rate": 9.411428571428573e-06, "loss": 35.1337, "step": 10287 }, { "epoch": 244.955223880597, "grad_norm": 16.645219802856445, "learning_rate": 9.41047619047619e-06, "loss": 36.283, "step": 10288 }, { "epoch": 244.97910447761194, "grad_norm": 19.539493560791016, "learning_rate": 9.40952380952381e-06, "loss": 35.3269, "step": 10289 }, { "epoch": 245.0, "grad_norm": 14.891328811645508, "learning_rate": 9.40857142857143e-06, "loss": 30.4666, "step": 10290 }, { "epoch": 245.02388059701494, "grad_norm": 18.36530876159668, "learning_rate": 9.407619047619049e-06, "loss": 36.0385, "step": 10291 }, { "epoch": 245.04776119402985, "grad_norm": 18.795873641967773, "learning_rate": 9.406666666666668e-06, "loss": 34.3661, "step": 10292 }, { "epoch": 245.07164179104478, "grad_norm": 16.63825798034668, "learning_rate": 9.405714285714286e-06, "loss": 35.9693, "step": 10293 }, { "epoch": 245.0955223880597, "grad_norm": 19.72463035583496, "learning_rate": 9.404761904761905e-06, "loss": 37.4437, "step": 10294 }, { "epoch": 245.11940298507463, "grad_norm": 15.989143371582031, "learning_rate": 9.403809523809526e-06, "loss": 35.7794, "step": 10295 }, { "epoch": 245.14328358208957, "grad_norm": 16.804611206054688, "learning_rate": 9.402857142857143e-06, "loss": 35.8952, "step": 10296 }, { "epoch": 245.16716417910447, "grad_norm": 20.517375946044922, "learning_rate": 9.401904761904764e-06, "loss": 35.2546, "step": 10297 }, { "epoch": 245.1910447761194, "grad_norm": 15.119479179382324, "learning_rate": 9.400952380952381e-06, "loss": 36.0732, "step": 10298 }, { "epoch": 245.21492537313432, "grad_norm": 18.243501663208008, "learning_rate": 9.4e-06, "loss": 36.1616, "step": 10299 }, { "epoch": 245.23880597014926, "grad_norm": 19.40731430053711, "learning_rate": 9.39904761904762e-06, "loss": 37.3905, "step": 10300 }, { "epoch": 245.26268656716417, "grad_norm": 15.139904975891113, "learning_rate": 9.398095238095238e-06, "loss": 37.0587, "step": 10301 }, { "epoch": 245.2865671641791, "grad_norm": 17.99896812438965, "learning_rate": 9.397142857142858e-06, "loss": 35.2688, "step": 10302 }, { "epoch": 245.31044776119404, "grad_norm": 22.78380584716797, "learning_rate": 9.396190476190477e-06, "loss": 36.1511, "step": 10303 }, { "epoch": 245.33432835820895, "grad_norm": 15.877861976623535, "learning_rate": 9.395238095238096e-06, "loss": 36.2515, "step": 10304 }, { "epoch": 245.3582089552239, "grad_norm": 22.85430145263672, "learning_rate": 9.394285714285715e-06, "loss": 36.4375, "step": 10305 }, { "epoch": 245.3820895522388, "grad_norm": 19.368839263916016, "learning_rate": 9.393333333333334e-06, "loss": 35.9143, "step": 10306 }, { "epoch": 245.40597014925373, "grad_norm": 14.689555168151855, "learning_rate": 9.392380952380953e-06, "loss": 35.2567, "step": 10307 }, { "epoch": 245.42985074626867, "grad_norm": 29.445722579956055, "learning_rate": 9.391428571428573e-06, "loss": 34.6743, "step": 10308 }, { "epoch": 245.45373134328358, "grad_norm": NaN, "learning_rate": 9.39047619047619e-06, "loss": 64.4562, "step": 10309 }, { "epoch": 245.47761194029852, "grad_norm": 18.444366455078125, "learning_rate": 9.39047619047619e-06, "loss": 35.5516, "step": 10310 }, { "epoch": 245.50149253731342, "grad_norm": 31.422306060791016, "learning_rate": 9.389523809523811e-06, "loss": 36.2244, "step": 10311 }, { "epoch": 245.52537313432836, "grad_norm": 23.68085289001465, "learning_rate": 9.38857142857143e-06, "loss": 34.9389, "step": 10312 }, { "epoch": 245.54925373134327, "grad_norm": 32.45897674560547, "learning_rate": 9.387619047619049e-06, "loss": 36.2821, "step": 10313 }, { "epoch": 245.5731343283582, "grad_norm": 24.41595458984375, "learning_rate": 9.386666666666668e-06, "loss": 35.7378, "step": 10314 }, { "epoch": 245.59701492537314, "grad_norm": 30.536373138427734, "learning_rate": 9.385714285714287e-06, "loss": 36.2082, "step": 10315 }, { "epoch": 245.62089552238805, "grad_norm": 24.87826156616211, "learning_rate": 9.384761904761906e-06, "loss": 35.542, "step": 10316 }, { "epoch": 245.644776119403, "grad_norm": 33.00590896606445, "learning_rate": 9.383809523809524e-06, "loss": 36.2549, "step": 10317 }, { "epoch": 245.6686567164179, "grad_norm": 28.54401397705078, "learning_rate": 9.382857142857143e-06, "loss": 36.9198, "step": 10318 }, { "epoch": 245.69253731343284, "grad_norm": 26.909244537353516, "learning_rate": 9.381904761904764e-06, "loss": 35.9212, "step": 10319 }, { "epoch": 245.71641791044777, "grad_norm": 25.473735809326172, "learning_rate": 9.380952380952381e-06, "loss": 36.0517, "step": 10320 }, { "epoch": 245.74029850746268, "grad_norm": 29.483325958251953, "learning_rate": 9.38e-06, "loss": 35.915, "step": 10321 }, { "epoch": 245.76417910447762, "grad_norm": 25.776201248168945, "learning_rate": 9.37904761904762e-06, "loss": 35.5704, "step": 10322 }, { "epoch": 245.78805970149253, "grad_norm": 31.685558319091797, "learning_rate": 9.378095238095238e-06, "loss": 35.5814, "step": 10323 }, { "epoch": 245.81194029850747, "grad_norm": 28.644330978393555, "learning_rate": 9.377142857142858e-06, "loss": 36.7554, "step": 10324 }, { "epoch": 245.83582089552237, "grad_norm": 31.40442657470703, "learning_rate": 9.376190476190477e-06, "loss": 35.4414, "step": 10325 }, { "epoch": 245.8597014925373, "grad_norm": 27.47274398803711, "learning_rate": 9.375238095238096e-06, "loss": 36.1345, "step": 10326 }, { "epoch": 245.88358208955225, "grad_norm": 31.04277992248535, "learning_rate": 9.374285714285715e-06, "loss": 36.1506, "step": 10327 }, { "epoch": 245.90746268656716, "grad_norm": 27.77751922607422, "learning_rate": 9.373333333333334e-06, "loss": 36.4583, "step": 10328 }, { "epoch": 245.9313432835821, "grad_norm": 30.050209045410156, "learning_rate": 9.372380952380953e-06, "loss": 35.4964, "step": 10329 }, { "epoch": 245.955223880597, "grad_norm": 28.161046981811523, "learning_rate": 9.371428571428572e-06, "loss": 34.7306, "step": 10330 }, { "epoch": 245.97910447761194, "grad_norm": 26.274951934814453, "learning_rate": 9.37047619047619e-06, "loss": 34.5474, "step": 10331 }, { "epoch": 246.0, "grad_norm": 21.592979431152344, "learning_rate": 9.369523809523811e-06, "loss": 30.3718, "step": 10332 }, { "epoch": 246.02388059701494, "grad_norm": 27.67121696472168, "learning_rate": 9.368571428571428e-06, "loss": 35.0143, "step": 10333 }, { "epoch": 246.04776119402985, "grad_norm": 23.754695892333984, "learning_rate": 9.367619047619049e-06, "loss": 35.4082, "step": 10334 }, { "epoch": 246.07164179104478, "grad_norm": 31.148208618164062, "learning_rate": 9.366666666666668e-06, "loss": 35.0931, "step": 10335 }, { "epoch": 246.0955223880597, "grad_norm": 27.08707618713379, "learning_rate": 9.365714285714287e-06, "loss": 34.3186, "step": 10336 }, { "epoch": 246.11940298507463, "grad_norm": 30.51011085510254, "learning_rate": 9.364761904761906e-06, "loss": 34.9747, "step": 10337 }, { "epoch": 246.14328358208957, "grad_norm": 28.628009796142578, "learning_rate": 9.363809523809525e-06, "loss": 36.8224, "step": 10338 }, { "epoch": 246.16716417910447, "grad_norm": 27.875980377197266, "learning_rate": 9.362857142857143e-06, "loss": 35.1735, "step": 10339 }, { "epoch": 246.1910447761194, "grad_norm": 22.788734436035156, "learning_rate": 9.361904761904762e-06, "loss": 35.0024, "step": 10340 }, { "epoch": 246.21492537313432, "grad_norm": 29.667194366455078, "learning_rate": 9.360952380952381e-06, "loss": 36.5179, "step": 10341 }, { "epoch": 246.23880597014926, "grad_norm": 26.609294891357422, "learning_rate": 9.360000000000002e-06, "loss": 36.5552, "step": 10342 }, { "epoch": 246.26268656716417, "grad_norm": 28.798431396484375, "learning_rate": 9.359047619047619e-06, "loss": 35.0698, "step": 10343 }, { "epoch": 246.2865671641791, "grad_norm": 25.941505432128906, "learning_rate": 9.358095238095238e-06, "loss": 36.0461, "step": 10344 }, { "epoch": 246.31044776119404, "grad_norm": 26.68019676208496, "learning_rate": 9.357142857142859e-06, "loss": 35.2952, "step": 10345 }, { "epoch": 246.33432835820895, "grad_norm": 23.037126541137695, "learning_rate": 9.356190476190476e-06, "loss": 35.8529, "step": 10346 }, { "epoch": 246.3582089552239, "grad_norm": 30.258909225463867, "learning_rate": 9.355238095238096e-06, "loss": 35.8508, "step": 10347 }, { "epoch": 246.3820895522388, "grad_norm": 27.253101348876953, "learning_rate": 9.354285714285715e-06, "loss": 35.9056, "step": 10348 }, { "epoch": 246.40597014925373, "grad_norm": 29.530893325805664, "learning_rate": 9.353333333333334e-06, "loss": 35.328, "step": 10349 }, { "epoch": 246.42985074626867, "grad_norm": 28.249046325683594, "learning_rate": 9.352380952380953e-06, "loss": 35.0326, "step": 10350 }, { "epoch": 246.45373134328358, "grad_norm": 29.183799743652344, "learning_rate": 9.351428571428572e-06, "loss": 35.6106, "step": 10351 }, { "epoch": 246.47761194029852, "grad_norm": 26.009546279907227, "learning_rate": 9.35047619047619e-06, "loss": 36.1377, "step": 10352 }, { "epoch": 246.50149253731342, "grad_norm": 29.63404083251953, "learning_rate": 9.34952380952381e-06, "loss": 37.2717, "step": 10353 }, { "epoch": 246.52537313432836, "grad_norm": 26.10556411743164, "learning_rate": 9.348571428571429e-06, "loss": 37.3727, "step": 10354 }, { "epoch": 246.54925373134327, "grad_norm": 28.26673126220703, "learning_rate": 9.34761904761905e-06, "loss": 36.8424, "step": 10355 }, { "epoch": 246.5731343283582, "grad_norm": 25.901973724365234, "learning_rate": 9.346666666666666e-06, "loss": 36.1249, "step": 10356 }, { "epoch": 246.59701492537314, "grad_norm": 28.327436447143555, "learning_rate": 9.345714285714287e-06, "loss": 35.6474, "step": 10357 }, { "epoch": 246.62089552238805, "grad_norm": 26.3010311126709, "learning_rate": 9.344761904761906e-06, "loss": 35.104, "step": 10358 }, { "epoch": 246.644776119403, "grad_norm": 30.107545852661133, "learning_rate": 9.343809523809525e-06, "loss": 36.2501, "step": 10359 }, { "epoch": 246.6686567164179, "grad_norm": 28.717906951904297, "learning_rate": 9.342857142857144e-06, "loss": 35.7715, "step": 10360 }, { "epoch": 246.69253731343284, "grad_norm": 27.056331634521484, "learning_rate": 9.341904761904763e-06, "loss": 35.9851, "step": 10361 }, { "epoch": 246.71641791044777, "grad_norm": 23.271900177001953, "learning_rate": 9.340952380952381e-06, "loss": 36.2637, "step": 10362 }, { "epoch": 246.74029850746268, "grad_norm": 26.731822967529297, "learning_rate": 9.340000000000002e-06, "loss": 34.6707, "step": 10363 }, { "epoch": 246.76417910447762, "grad_norm": 22.142702102661133, "learning_rate": 9.33904761904762e-06, "loss": 36.4722, "step": 10364 }, { "epoch": 246.78805970149253, "grad_norm": 32.12749481201172, "learning_rate": 9.338095238095238e-06, "loss": 35.502, "step": 10365 }, { "epoch": 246.81194029850747, "grad_norm": 25.281442642211914, "learning_rate": 9.337142857142859e-06, "loss": 37.0552, "step": 10366 }, { "epoch": 246.83582089552237, "grad_norm": 26.85059928894043, "learning_rate": 9.336190476190476e-06, "loss": 35.6356, "step": 10367 }, { "epoch": 246.8597014925373, "grad_norm": 24.89397430419922, "learning_rate": 9.335238095238097e-06, "loss": 35.9212, "step": 10368 }, { "epoch": 246.88358208955225, "grad_norm": 29.490575790405273, "learning_rate": 9.334285714285715e-06, "loss": 37.0296, "step": 10369 }, { "epoch": 246.90746268656716, "grad_norm": 27.18348503112793, "learning_rate": 9.333333333333334e-06, "loss": 35.7107, "step": 10370 }, { "epoch": 246.9313432835821, "grad_norm": 25.741382598876953, "learning_rate": 9.332380952380953e-06, "loss": 35.7305, "step": 10371 }, { "epoch": 246.955223880597, "grad_norm": 24.733936309814453, "learning_rate": 9.331428571428572e-06, "loss": 35.7664, "step": 10372 }, { "epoch": 246.97910447761194, "grad_norm": 26.003232955932617, "learning_rate": 9.330476190476191e-06, "loss": 36.4173, "step": 10373 }, { "epoch": 247.0, "grad_norm": 18.96998405456543, "learning_rate": 9.32952380952381e-06, "loss": 30.9039, "step": 10374 }, { "epoch": 247.02388059701494, "grad_norm": 25.966978073120117, "learning_rate": 9.328571428571429e-06, "loss": 35.8479, "step": 10375 }, { "epoch": 247.04776119402985, "grad_norm": 26.212934494018555, "learning_rate": 9.32761904761905e-06, "loss": 36.9688, "step": 10376 }, { "epoch": 247.07164179104478, "grad_norm": 27.39366340637207, "learning_rate": 9.326666666666667e-06, "loss": 36.1078, "step": 10377 }, { "epoch": 247.0955223880597, "grad_norm": 24.970836639404297, "learning_rate": 9.325714285714287e-06, "loss": 36.078, "step": 10378 }, { "epoch": 247.11940298507463, "grad_norm": 25.114280700683594, "learning_rate": 9.324761904761906e-06, "loss": 35.7343, "step": 10379 }, { "epoch": 247.14328358208957, "grad_norm": 22.088726043701172, "learning_rate": 9.323809523809525e-06, "loss": 35.0877, "step": 10380 }, { "epoch": 247.16716417910447, "grad_norm": 30.845657348632812, "learning_rate": 9.322857142857144e-06, "loss": 36.3526, "step": 10381 }, { "epoch": 247.1910447761194, "grad_norm": 22.784645080566406, "learning_rate": 9.321904761904763e-06, "loss": 36.6681, "step": 10382 }, { "epoch": 247.21492537313432, "grad_norm": 29.72661018371582, "learning_rate": 9.320952380952382e-06, "loss": 35.3475, "step": 10383 }, { "epoch": 247.23880597014926, "grad_norm": 26.268980026245117, "learning_rate": 9.32e-06, "loss": 34.6149, "step": 10384 }, { "epoch": 247.26268656716417, "grad_norm": 26.10773468017578, "learning_rate": 9.31904761904762e-06, "loss": 35.4511, "step": 10385 }, { "epoch": 247.2865671641791, "grad_norm": 23.967748641967773, "learning_rate": 9.318095238095238e-06, "loss": 35.0177, "step": 10386 }, { "epoch": 247.31044776119404, "grad_norm": 29.59598159790039, "learning_rate": 9.317142857142857e-06, "loss": 36.5897, "step": 10387 }, { "epoch": 247.33432835820895, "grad_norm": 27.748294830322266, "learning_rate": 9.316190476190476e-06, "loss": 35.9019, "step": 10388 }, { "epoch": 247.3582089552239, "grad_norm": 21.567779541015625, "learning_rate": 9.315238095238097e-06, "loss": 36.0501, "step": 10389 }, { "epoch": 247.3820895522388, "grad_norm": 20.922203063964844, "learning_rate": 9.314285714285714e-06, "loss": 36.001, "step": 10390 }, { "epoch": 247.40597014925373, "grad_norm": 23.449371337890625, "learning_rate": 9.313333333333335e-06, "loss": 36.0758, "step": 10391 }, { "epoch": 247.42985074626867, "grad_norm": 20.626108169555664, "learning_rate": 9.312380952380953e-06, "loss": 35.4947, "step": 10392 }, { "epoch": 247.45373134328358, "grad_norm": 22.764175415039062, "learning_rate": 9.311428571428572e-06, "loss": 36.4139, "step": 10393 }, { "epoch": 247.47761194029852, "grad_norm": NaN, "learning_rate": 9.310476190476191e-06, "loss": 48.6844, "step": 10394 }, { "epoch": 247.50149253731342, "grad_norm": 18.010173797607422, "learning_rate": 9.310476190476191e-06, "loss": 35.2145, "step": 10395 }, { "epoch": 247.52537313432836, "grad_norm": 23.32284927368164, "learning_rate": 9.30952380952381e-06, "loss": 35.7392, "step": 10396 }, { "epoch": 247.54925373134327, "grad_norm": 16.008895874023438, "learning_rate": 9.308571428571429e-06, "loss": 35.3958, "step": 10397 }, { "epoch": 247.5731343283582, "grad_norm": 25.21518325805664, "learning_rate": 9.307619047619048e-06, "loss": 35.2711, "step": 10398 }, { "epoch": 247.59701492537314, "grad_norm": 20.520076751708984, "learning_rate": 9.306666666666667e-06, "loss": 37.6941, "step": 10399 }, { "epoch": 247.62089552238805, "grad_norm": 21.775188446044922, "learning_rate": 9.305714285714287e-06, "loss": 35.9768, "step": 10400 }, { "epoch": 247.644776119403, "grad_norm": 19.160938262939453, "learning_rate": 9.304761904761905e-06, "loss": 35.7068, "step": 10401 }, { "epoch": 247.6686567164179, "grad_norm": 23.554811477661133, "learning_rate": 9.303809523809525e-06, "loss": 35.8011, "step": 10402 }, { "epoch": 247.69253731343284, "grad_norm": 19.529022216796875, "learning_rate": 9.302857142857144e-06, "loss": 35.9624, "step": 10403 }, { "epoch": 247.71641791044777, "grad_norm": 22.15897560119629, "learning_rate": 9.301904761904763e-06, "loss": 34.4005, "step": 10404 }, { "epoch": 247.74029850746268, "grad_norm": 19.75330352783203, "learning_rate": 9.300952380952382e-06, "loss": 36.5212, "step": 10405 }, { "epoch": 247.76417910447762, "grad_norm": 20.824283599853516, "learning_rate": 9.3e-06, "loss": 35.4547, "step": 10406 }, { "epoch": 247.78805970149253, "grad_norm": 17.906557083129883, "learning_rate": 9.29904761904762e-06, "loss": 36.7554, "step": 10407 }, { "epoch": 247.81194029850747, "grad_norm": 25.11722183227539, "learning_rate": 9.29809523809524e-06, "loss": 36.1553, "step": 10408 }, { "epoch": 247.83582089552237, "grad_norm": 18.463071823120117, "learning_rate": 9.297142857142857e-06, "loss": 35.5073, "step": 10409 }, { "epoch": 247.8597014925373, "grad_norm": 23.776803970336914, "learning_rate": 9.296190476190476e-06, "loss": 36.1157, "step": 10410 }, { "epoch": 247.88358208955225, "grad_norm": 17.98375701904297, "learning_rate": 9.295238095238095e-06, "loss": 35.5557, "step": 10411 }, { "epoch": 247.90746268656716, "grad_norm": 21.57294273376465, "learning_rate": 9.294285714285714e-06, "loss": 35.1734, "step": 10412 }, { "epoch": 247.9313432835821, "grad_norm": 18.360673904418945, "learning_rate": 9.293333333333335e-06, "loss": 34.3661, "step": 10413 }, { "epoch": 247.955223880597, "grad_norm": 18.266700744628906, "learning_rate": 9.292380952380952e-06, "loss": 35.3242, "step": 10414 }, { "epoch": 247.97910447761194, "grad_norm": 16.929805755615234, "learning_rate": 9.291428571428572e-06, "loss": 35.7476, "step": 10415 }, { "epoch": 248.0, "grad_norm": 18.62027359008789, "learning_rate": 9.290476190476191e-06, "loss": 32.6399, "step": 10416 }, { "epoch": 248.02388059701494, "grad_norm": 17.48893165588379, "learning_rate": 9.28952380952381e-06, "loss": 34.2671, "step": 10417 }, { "epoch": 248.04776119402985, "grad_norm": 15.929790496826172, "learning_rate": 9.28857142857143e-06, "loss": 36.183, "step": 10418 }, { "epoch": 248.07164179104478, "grad_norm": 20.762374877929688, "learning_rate": 9.287619047619048e-06, "loss": 34.9601, "step": 10419 }, { "epoch": 248.0955223880597, "grad_norm": 17.352806091308594, "learning_rate": 9.286666666666667e-06, "loss": 34.1468, "step": 10420 }, { "epoch": 248.11940298507463, "grad_norm": 17.688629150390625, "learning_rate": 9.285714285714288e-06, "loss": 36.46, "step": 10421 }, { "epoch": 248.14328358208957, "grad_norm": 21.053203582763672, "learning_rate": 9.284761904761905e-06, "loss": 36.585, "step": 10422 }, { "epoch": 248.16716417910447, "grad_norm": 16.51744842529297, "learning_rate": 9.283809523809525e-06, "loss": 34.8284, "step": 10423 }, { "epoch": 248.1910447761194, "grad_norm": 15.983092308044434, "learning_rate": 9.282857142857144e-06, "loss": 35.8418, "step": 10424 }, { "epoch": 248.21492537313432, "grad_norm": 21.639421463012695, "learning_rate": 9.281904761904763e-06, "loss": 34.4315, "step": 10425 }, { "epoch": 248.23880597014926, "grad_norm": 15.690654754638672, "learning_rate": 9.280952380952382e-06, "loss": 35.8647, "step": 10426 }, { "epoch": 248.26268656716417, "grad_norm": 22.966873168945312, "learning_rate": 9.280000000000001e-06, "loss": 35.9926, "step": 10427 }, { "epoch": 248.2865671641791, "grad_norm": 17.881546020507812, "learning_rate": 9.27904761904762e-06, "loss": 35.0961, "step": 10428 }, { "epoch": 248.31044776119404, "grad_norm": 16.167945861816406, "learning_rate": 9.278095238095239e-06, "loss": 35.6078, "step": 10429 }, { "epoch": 248.33432835820895, "grad_norm": 23.028915405273438, "learning_rate": 9.277142857142858e-06, "loss": 36.3881, "step": 10430 }, { "epoch": 248.3582089552239, "grad_norm": 17.872678756713867, "learning_rate": 9.276190476190477e-06, "loss": 36.1854, "step": 10431 }, { "epoch": 248.3820895522388, "grad_norm": 16.718168258666992, "learning_rate": 9.275238095238095e-06, "loss": 36.5898, "step": 10432 }, { "epoch": 248.40597014925373, "grad_norm": 14.91796875, "learning_rate": 9.274285714285714e-06, "loss": 35.5027, "step": 10433 }, { "epoch": 248.42985074626867, "grad_norm": 17.224084854125977, "learning_rate": 9.273333333333335e-06, "loss": 36.627, "step": 10434 }, { "epoch": 248.45373134328358, "grad_norm": 14.895155906677246, "learning_rate": 9.272380952380952e-06, "loss": 36.2135, "step": 10435 }, { "epoch": 248.47761194029852, "grad_norm": 17.61219596862793, "learning_rate": 9.271428571428573e-06, "loss": 36.2376, "step": 10436 }, { "epoch": 248.50149253731342, "grad_norm": 17.631935119628906, "learning_rate": 9.270476190476192e-06, "loss": 36.7248, "step": 10437 }, { "epoch": 248.52537313432836, "grad_norm": 19.364099502563477, "learning_rate": 9.26952380952381e-06, "loss": 36.2454, "step": 10438 }, { "epoch": 248.54925373134327, "grad_norm": 14.401960372924805, "learning_rate": 9.26857142857143e-06, "loss": 36.7114, "step": 10439 }, { "epoch": 248.5731343283582, "grad_norm": 24.623205184936523, "learning_rate": 9.267619047619048e-06, "loss": 37.094, "step": 10440 }, { "epoch": 248.59701492537314, "grad_norm": 19.38271141052246, "learning_rate": 9.266666666666667e-06, "loss": 36.164, "step": 10441 }, { "epoch": 248.62089552238805, "grad_norm": 18.6229305267334, "learning_rate": 9.265714285714286e-06, "loss": 35.5167, "step": 10442 }, { "epoch": 248.644776119403, "grad_norm": 23.795459747314453, "learning_rate": 9.264761904761905e-06, "loss": 36.2195, "step": 10443 }, { "epoch": 248.6686567164179, "grad_norm": 14.197778701782227, "learning_rate": 9.263809523809526e-06, "loss": 34.8214, "step": 10444 }, { "epoch": 248.69253731343284, "grad_norm": 26.66496467590332, "learning_rate": 9.262857142857143e-06, "loss": 34.9951, "step": 10445 }, { "epoch": 248.71641791044777, "grad_norm": 21.354198455810547, "learning_rate": 9.261904761904763e-06, "loss": 36.153, "step": 10446 }, { "epoch": 248.74029850746268, "grad_norm": 24.006046295166016, "learning_rate": 9.260952380952382e-06, "loss": 36.0312, "step": 10447 }, { "epoch": 248.76417910447762, "grad_norm": 19.989986419677734, "learning_rate": 9.260000000000001e-06, "loss": 36.9065, "step": 10448 }, { "epoch": 248.78805970149253, "grad_norm": 21.58749771118164, "learning_rate": 9.25904761904762e-06, "loss": 35.464, "step": 10449 }, { "epoch": 248.81194029850747, "grad_norm": 19.249740600585938, "learning_rate": 9.258095238095239e-06, "loss": 34.9151, "step": 10450 }, { "epoch": 248.83582089552237, "grad_norm": 16.742633819580078, "learning_rate": 9.257142857142858e-06, "loss": 36.4266, "step": 10451 }, { "epoch": 248.8597014925373, "grad_norm": 20.563678741455078, "learning_rate": 9.256190476190477e-06, "loss": 35.2583, "step": 10452 }, { "epoch": 248.88358208955225, "grad_norm": 17.337919235229492, "learning_rate": 9.255238095238096e-06, "loss": 34.6552, "step": 10453 }, { "epoch": 248.90746268656716, "grad_norm": 18.023143768310547, "learning_rate": 9.254285714285714e-06, "loss": 35.7979, "step": 10454 }, { "epoch": 248.9313432835821, "grad_norm": 20.726966857910156, "learning_rate": 9.253333333333333e-06, "loss": 34.7366, "step": 10455 }, { "epoch": 248.955223880597, "grad_norm": 13.486516952514648, "learning_rate": 9.252380952380952e-06, "loss": 35.0271, "step": 10456 }, { "epoch": 248.97910447761194, "grad_norm": 19.241594314575195, "learning_rate": 9.251428571428573e-06, "loss": 36.4078, "step": 10457 }, { "epoch": 249.0, "grad_norm": 18.16057586669922, "learning_rate": 9.25047619047619e-06, "loss": 32.761, "step": 10458 }, { "epoch": 249.02388059701494, "grad_norm": 15.924449920654297, "learning_rate": 9.24952380952381e-06, "loss": 35.9239, "step": 10459 }, { "epoch": 249.04776119402985, "grad_norm": 17.457326889038086, "learning_rate": 9.24857142857143e-06, "loss": 36.6521, "step": 10460 }, { "epoch": 249.07164179104478, "grad_norm": 17.974369049072266, "learning_rate": 9.247619047619048e-06, "loss": 35.8246, "step": 10461 }, { "epoch": 249.0955223880597, "grad_norm": 18.67871856689453, "learning_rate": 9.246666666666667e-06, "loss": 35.4491, "step": 10462 }, { "epoch": 249.11940298507463, "grad_norm": 15.618583679199219, "learning_rate": 9.245714285714286e-06, "loss": 35.8043, "step": 10463 }, { "epoch": 249.14328358208957, "grad_norm": 16.679800033569336, "learning_rate": 9.244761904761905e-06, "loss": 36.6722, "step": 10464 }, { "epoch": 249.16716417910447, "grad_norm": 15.760109901428223, "learning_rate": 9.243809523809526e-06, "loss": 36.4263, "step": 10465 }, { "epoch": 249.1910447761194, "grad_norm": 14.788159370422363, "learning_rate": 9.242857142857143e-06, "loss": 36.8779, "step": 10466 }, { "epoch": 249.21492537313432, "grad_norm": 19.794042587280273, "learning_rate": 9.241904761904764e-06, "loss": 36.17, "step": 10467 }, { "epoch": 249.23880597014926, "grad_norm": 14.545011520385742, "learning_rate": 9.24095238095238e-06, "loss": 34.7776, "step": 10468 }, { "epoch": 249.26268656716417, "grad_norm": 17.406023025512695, "learning_rate": 9.240000000000001e-06, "loss": 35.3433, "step": 10469 }, { "epoch": 249.2865671641791, "grad_norm": 18.526445388793945, "learning_rate": 9.23904761904762e-06, "loss": 36.0128, "step": 10470 }, { "epoch": 249.31044776119404, "grad_norm": 20.413652420043945, "learning_rate": 9.238095238095239e-06, "loss": 36.7404, "step": 10471 }, { "epoch": 249.33432835820895, "grad_norm": 14.878579139709473, "learning_rate": 9.237142857142858e-06, "loss": 34.8491, "step": 10472 }, { "epoch": 249.3582089552239, "grad_norm": 25.159494400024414, "learning_rate": 9.236190476190477e-06, "loss": 35.5789, "step": 10473 }, { "epoch": 249.3820895522388, "grad_norm": 18.520427703857422, "learning_rate": 9.235238095238096e-06, "loss": 35.8261, "step": 10474 }, { "epoch": 249.40597014925373, "grad_norm": 18.790002822875977, "learning_rate": 9.234285714285715e-06, "loss": 34.3875, "step": 10475 }, { "epoch": 249.42985074626867, "grad_norm": 18.690185546875, "learning_rate": 9.233333333333334e-06, "loss": 36.4462, "step": 10476 }, { "epoch": 249.45373134328358, "grad_norm": 18.580333709716797, "learning_rate": 9.232380952380952e-06, "loss": 35.0818, "step": 10477 }, { "epoch": 249.47761194029852, "grad_norm": 16.21982765197754, "learning_rate": 9.231428571428573e-06, "loss": 35.0682, "step": 10478 }, { "epoch": 249.50149253731342, "grad_norm": 20.19671058654785, "learning_rate": 9.23047619047619e-06, "loss": 36.4464, "step": 10479 }, { "epoch": 249.52537313432836, "grad_norm": 15.847772598266602, "learning_rate": 9.229523809523811e-06, "loss": 35.3635, "step": 10480 }, { "epoch": 249.54925373134327, "grad_norm": 22.037715911865234, "learning_rate": 9.22857142857143e-06, "loss": 35.3782, "step": 10481 }, { "epoch": 249.5731343283582, "grad_norm": 16.708955764770508, "learning_rate": 9.227619047619049e-06, "loss": 35.9639, "step": 10482 }, { "epoch": 249.59701492537314, "grad_norm": 20.463565826416016, "learning_rate": 9.226666666666668e-06, "loss": 35.7338, "step": 10483 }, { "epoch": 249.62089552238805, "grad_norm": 20.576095581054688, "learning_rate": 9.225714285714286e-06, "loss": 34.4538, "step": 10484 }, { "epoch": 249.644776119403, "grad_norm": 13.691299438476562, "learning_rate": 9.224761904761905e-06, "loss": 35.6541, "step": 10485 }, { "epoch": 249.6686567164179, "grad_norm": 25.414104461669922, "learning_rate": 9.223809523809524e-06, "loss": 36.3277, "step": 10486 }, { "epoch": 249.69253731343284, "grad_norm": 18.99018096923828, "learning_rate": 9.222857142857143e-06, "loss": 34.9627, "step": 10487 }, { "epoch": 249.71641791044777, "grad_norm": 16.70145606994629, "learning_rate": 9.221904761904764e-06, "loss": 36.5231, "step": 10488 }, { "epoch": 249.74029850746268, "grad_norm": 27.659765243530273, "learning_rate": 9.220952380952381e-06, "loss": 35.7842, "step": 10489 }, { "epoch": 249.76417910447762, "grad_norm": 18.988645553588867, "learning_rate": 9.220000000000002e-06, "loss": 34.4095, "step": 10490 }, { "epoch": 249.78805970149253, "grad_norm": 30.256803512573242, "learning_rate": 9.21904761904762e-06, "loss": 34.4638, "step": 10491 }, { "epoch": 249.81194029850747, "grad_norm": 23.25464630126953, "learning_rate": 9.21809523809524e-06, "loss": 37.1966, "step": 10492 }, { "epoch": 249.83582089552237, "grad_norm": 32.919071197509766, "learning_rate": 9.217142857142858e-06, "loss": 36.1309, "step": 10493 }, { "epoch": 249.8597014925373, "grad_norm": 25.972665786743164, "learning_rate": 9.216190476190477e-06, "loss": 36.472, "step": 10494 }, { "epoch": 249.88358208955225, "grad_norm": 35.995391845703125, "learning_rate": 9.215238095238096e-06, "loss": 35.8965, "step": 10495 }, { "epoch": 249.90746268656716, "grad_norm": 34.837398529052734, "learning_rate": 9.214285714285715e-06, "loss": 36.5224, "step": 10496 }, { "epoch": 249.9313432835821, "grad_norm": 24.088912963867188, "learning_rate": 9.213333333333334e-06, "loss": 35.1959, "step": 10497 }, { "epoch": 249.955223880597, "grad_norm": 23.177581787109375, "learning_rate": 9.212380952380953e-06, "loss": 35.1953, "step": 10498 }, { "epoch": 249.97910447761194, "grad_norm": 28.00490951538086, "learning_rate": 9.211428571428572e-06, "loss": 36.7443, "step": 10499 }, { "epoch": 250.0, "grad_norm": 18.642913818359375, "learning_rate": 9.21047619047619e-06, "loss": 30.3419, "step": 10500 }, { "epoch": 250.0, "step": 10500, "total_flos": 5.161723630445509e+17, "train_loss": 2.888430085136777, "train_runtime": 25668.5876, "train_samples_per_second": 52.126, "train_steps_per_second": 0.409 }, { "epoch": 250.02388059701494, "grad_norm": 28.627452850341797, "learning_rate": 1e-05, "loss": 36.1543, "step": 10501 }, { "epoch": 250.04776119402985, "grad_norm": Infinity, "learning_rate": 9.99908424908425e-06, "loss": 44.2863, "step": 10502 }, { "epoch": 250.07164179104478, "grad_norm": Infinity, "learning_rate": 9.99908424908425e-06, "loss": 42.9037, "step": 10503 }, { "epoch": 250.0955223880597, "grad_norm": 468.29779052734375, "learning_rate": 9.99908424908425e-06, "loss": 43.7593, "step": 10504 }, { "epoch": 250.11940298507463, "grad_norm": 299.20428466796875, "learning_rate": 9.998168498168499e-06, "loss": 40.5097, "step": 10505 }, { "epoch": 250.14328358208957, "grad_norm": 90.93639373779297, "learning_rate": 9.997252747252748e-06, "loss": 37.8938, "step": 10506 }, { "epoch": 250.16716417910447, "grad_norm": 102.78959655761719, "learning_rate": 9.996336996336997e-06, "loss": 37.3469, "step": 10507 }, { "epoch": 250.1910447761194, "grad_norm": 79.3243408203125, "learning_rate": 9.995421245421246e-06, "loss": 35.8218, "step": 10508 }, { "epoch": 250.21492537313432, "grad_norm": 63.47758483886719, "learning_rate": 9.994505494505496e-06, "loss": 35.8017, "step": 10509 }, { "epoch": 250.23880597014926, "grad_norm": 50.43954086303711, "learning_rate": 9.993589743589745e-06, "loss": 37.1732, "step": 10510 }, { "epoch": 250.26268656716417, "grad_norm": 41.410343170166016, "learning_rate": 9.992673992673994e-06, "loss": 37.0164, "step": 10511 }, { "epoch": 250.2865671641791, "grad_norm": 39.2127685546875, "learning_rate": 9.991758241758243e-06, "loss": 36.3307, "step": 10512 }, { "epoch": 250.31044776119404, "grad_norm": 28.172439575195312, "learning_rate": 9.990842490842492e-06, "loss": 36.6386, "step": 10513 }, { "epoch": 250.33432835820895, "grad_norm": 29.20684242248535, "learning_rate": 9.98992673992674e-06, "loss": 36.7574, "step": 10514 }, { "epoch": 250.3582089552239, "grad_norm": 19.41738510131836, "learning_rate": 9.98901098901099e-06, "loss": 35.7028, "step": 10515 }, { "epoch": 250.3820895522388, "grad_norm": 23.410886764526367, "learning_rate": 9.988095238095239e-06, "loss": 36.2899, "step": 10516 }, { "epoch": 250.40597014925373, "grad_norm": 20.149150848388672, "learning_rate": 9.987179487179488e-06, "loss": 36.4283, "step": 10517 }, { "epoch": 250.42985074626867, "grad_norm": 19.36992073059082, "learning_rate": 9.986263736263737e-06, "loss": 34.6911, "step": 10518 }, { "epoch": 250.45373134328358, "grad_norm": 17.414880752563477, "learning_rate": 9.985347985347986e-06, "loss": 36.1586, "step": 10519 }, { "epoch": 250.47761194029852, "grad_norm": 23.79262924194336, "learning_rate": 9.984432234432236e-06, "loss": 36.3096, "step": 10520 }, { "epoch": 250.50149253731342, "grad_norm": 19.86983871459961, "learning_rate": 9.983516483516485e-06, "loss": 35.8956, "step": 10521 }, { "epoch": 250.52537313432836, "grad_norm": 17.115524291992188, "learning_rate": 9.982600732600734e-06, "loss": 37.0792, "step": 10522 }, { "epoch": 250.54925373134327, "grad_norm": 18.628732681274414, "learning_rate": 9.981684981684983e-06, "loss": 35.3367, "step": 10523 }, { "epoch": 250.5731343283582, "grad_norm": 18.738739013671875, "learning_rate": 9.980769230769232e-06, "loss": 36.0787, "step": 10524 }, { "epoch": 250.59701492537314, "grad_norm": 18.18105125427246, "learning_rate": 9.97985347985348e-06, "loss": 35.7178, "step": 10525 }, { "epoch": 250.62089552238805, "grad_norm": 16.89411735534668, "learning_rate": 9.97893772893773e-06, "loss": 35.0889, "step": 10526 }, { "epoch": 250.644776119403, "grad_norm": 16.15926170349121, "learning_rate": 9.978021978021979e-06, "loss": 35.7411, "step": 10527 }, { "epoch": 250.6686567164179, "grad_norm": 21.53275489807129, "learning_rate": 9.977106227106228e-06, "loss": 35.1226, "step": 10528 }, { "epoch": 250.69253731343284, "grad_norm": 18.122953414916992, "learning_rate": 9.976190476190477e-06, "loss": 36.944, "step": 10529 }, { "epoch": 250.71641791044777, "grad_norm": 17.504945755004883, "learning_rate": 9.975274725274726e-06, "loss": 35.4007, "step": 10530 }, { "epoch": 250.74029850746268, "grad_norm": 14.981329917907715, "learning_rate": 9.974358974358974e-06, "loss": 36.2675, "step": 10531 }, { "epoch": 250.76417910447762, "grad_norm": 15.348061561584473, "learning_rate": 9.973443223443225e-06, "loss": 35.8405, "step": 10532 }, { "epoch": 250.78805970149253, "grad_norm": 21.222579956054688, "learning_rate": 9.972527472527474e-06, "loss": 35.9964, "step": 10533 }, { "epoch": 250.81194029850747, "grad_norm": 14.186641693115234, "learning_rate": 9.971611721611723e-06, "loss": 35.1728, "step": 10534 }, { "epoch": 250.83582089552237, "grad_norm": 20.096670150756836, "learning_rate": 9.970695970695972e-06, "loss": 35.2764, "step": 10535 }, { "epoch": 250.8597014925373, "grad_norm": 16.014314651489258, "learning_rate": 9.969780219780221e-06, "loss": 35.7504, "step": 10536 }, { "epoch": 250.88358208955225, "grad_norm": 22.304344177246094, "learning_rate": 9.96886446886447e-06, "loss": 35.7299, "step": 10537 }, { "epoch": 250.90746268656716, "grad_norm": 17.067577362060547, "learning_rate": 9.967948717948719e-06, "loss": 36.3097, "step": 10538 }, { "epoch": 250.9313432835821, "grad_norm": 20.34279441833496, "learning_rate": 9.967032967032968e-06, "loss": 36.7156, "step": 10539 }, { "epoch": 250.955223880597, "grad_norm": 18.29696273803711, "learning_rate": 9.966117216117217e-06, "loss": 34.6629, "step": 10540 }, { "epoch": 250.97910447761194, "grad_norm": 17.84090805053711, "learning_rate": 9.965201465201466e-06, "loss": 35.3662, "step": 10541 }, { "epoch": 251.0, "grad_norm": 14.382659912109375, "learning_rate": 9.964285714285714e-06, "loss": 31.1388, "step": 10542 }, { "epoch": 251.02388059701494, "grad_norm": 16.641250610351562, "learning_rate": 9.963369963369965e-06, "loss": 35.9895, "step": 10543 }, { "epoch": 251.04776119402985, "grad_norm": 24.440488815307617, "learning_rate": 9.962454212454214e-06, "loss": 36.1701, "step": 10544 }, { "epoch": 251.07164179104478, "grad_norm": 15.083442687988281, "learning_rate": 9.961538461538463e-06, "loss": 34.2133, "step": 10545 }, { "epoch": 251.0955223880597, "grad_norm": 18.536592483520508, "learning_rate": 9.960622710622712e-06, "loss": 36.3549, "step": 10546 }, { "epoch": 251.11940298507463, "grad_norm": 21.398738861083984, "learning_rate": 9.959706959706961e-06, "loss": 36.8876, "step": 10547 }, { "epoch": 251.14328358208957, "grad_norm": 19.11338996887207, "learning_rate": 9.95879120879121e-06, "loss": 35.3419, "step": 10548 }, { "epoch": 251.16716417910447, "grad_norm": 14.9404296875, "learning_rate": 9.957875457875459e-06, "loss": 35.8585, "step": 10549 }, { "epoch": 251.1910447761194, "grad_norm": 17.329944610595703, "learning_rate": 9.956959706959708e-06, "loss": 34.8394, "step": 10550 }, { "epoch": 251.21492537313432, "grad_norm": 18.323760986328125, "learning_rate": 9.956043956043957e-06, "loss": 35.0695, "step": 10551 }, { "epoch": 251.23880597014926, "grad_norm": 14.89617919921875, "learning_rate": 9.955128205128206e-06, "loss": 36.0575, "step": 10552 }, { "epoch": 251.26268656716417, "grad_norm": 14.273953437805176, "learning_rate": 9.954212454212454e-06, "loss": 34.5598, "step": 10553 }, { "epoch": 251.2865671641791, "grad_norm": 20.694751739501953, "learning_rate": 9.953296703296705e-06, "loss": 35.7871, "step": 10554 }, { "epoch": 251.31044776119404, "grad_norm": 18.564138412475586, "learning_rate": 9.952380952380954e-06, "loss": 34.7276, "step": 10555 }, { "epoch": 251.33432835820895, "grad_norm": 16.587295532226562, "learning_rate": 9.951465201465203e-06, "loss": 33.4794, "step": 10556 }, { "epoch": 251.3582089552239, "grad_norm": 13.673843383789062, "learning_rate": 9.950549450549452e-06, "loss": 35.7263, "step": 10557 }, { "epoch": 251.3820895522388, "grad_norm": 17.324235916137695, "learning_rate": 9.949633699633701e-06, "loss": 37.2802, "step": 10558 }, { "epoch": 251.40597014925373, "grad_norm": 15.747190475463867, "learning_rate": 9.94871794871795e-06, "loss": 35.5896, "step": 10559 }, { "epoch": 251.42985074626867, "grad_norm": 13.725537300109863, "learning_rate": 9.947802197802199e-06, "loss": 34.8298, "step": 10560 }, { "epoch": 251.45373134328358, "grad_norm": 15.41905689239502, "learning_rate": 9.946886446886448e-06, "loss": 35.1748, "step": 10561 }, { "epoch": 251.47761194029852, "grad_norm": 14.468822479248047, "learning_rate": 9.945970695970697e-06, "loss": 36.4486, "step": 10562 }, { "epoch": 251.50149253731342, "grad_norm": 24.43714141845703, "learning_rate": 9.945054945054946e-06, "loss": 35.5891, "step": 10563 }, { "epoch": 251.52537313432836, "grad_norm": 15.711543083190918, "learning_rate": 9.944139194139194e-06, "loss": 34.2064, "step": 10564 }, { "epoch": 251.54925373134327, "grad_norm": 18.085830688476562, "learning_rate": 9.943223443223443e-06, "loss": 35.3084, "step": 10565 }, { "epoch": 251.5731343283582, "grad_norm": 26.0734920501709, "learning_rate": 9.942307692307694e-06, "loss": 36.4838, "step": 10566 }, { "epoch": 251.59701492537314, "grad_norm": 16.947580337524414, "learning_rate": 9.941391941391943e-06, "loss": 35.6721, "step": 10567 }, { "epoch": 251.62089552238805, "grad_norm": 14.086678504943848, "learning_rate": 9.940476190476192e-06, "loss": 35.9191, "step": 10568 }, { "epoch": 251.644776119403, "grad_norm": 21.768564224243164, "learning_rate": 9.939560439560441e-06, "loss": 37.1093, "step": 10569 }, { "epoch": 251.6686567164179, "grad_norm": 19.308162689208984, "learning_rate": 9.93864468864469e-06, "loss": 35.1927, "step": 10570 }, { "epoch": 251.69253731343284, "grad_norm": 13.51604175567627, "learning_rate": 9.937728937728939e-06, "loss": 36.3524, "step": 10571 }, { "epoch": 251.71641791044777, "grad_norm": 23.234386444091797, "learning_rate": 9.936813186813188e-06, "loss": 36.2885, "step": 10572 }, { "epoch": 251.74029850746268, "grad_norm": 20.840883255004883, "learning_rate": 9.935897435897437e-06, "loss": 35.4507, "step": 10573 }, { "epoch": 251.76417910447762, "grad_norm": 17.30851173400879, "learning_rate": 9.934981684981686e-06, "loss": 36.5154, "step": 10574 }, { "epoch": 251.78805970149253, "grad_norm": 20.508922576904297, "learning_rate": 9.934065934065935e-06, "loss": 37.1845, "step": 10575 }, { "epoch": 251.81194029850747, "grad_norm": 16.55733871459961, "learning_rate": 9.933150183150183e-06, "loss": 35.43, "step": 10576 }, { "epoch": 251.83582089552237, "grad_norm": 19.661312103271484, "learning_rate": 9.932234432234434e-06, "loss": 37.1997, "step": 10577 }, { "epoch": 251.8597014925373, "grad_norm": 19.369338989257812, "learning_rate": 9.931318681318683e-06, "loss": 35.6278, "step": 10578 }, { "epoch": 251.88358208955225, "grad_norm": 19.126426696777344, "learning_rate": 9.930402930402932e-06, "loss": 36.139, "step": 10579 }, { "epoch": 251.90746268656716, "grad_norm": 15.560017585754395, "learning_rate": 9.929487179487181e-06, "loss": 35.9964, "step": 10580 }, { "epoch": 251.9313432835821, "grad_norm": 19.47745704650879, "learning_rate": 9.92857142857143e-06, "loss": 34.6067, "step": 10581 }, { "epoch": 251.955223880597, "grad_norm": 17.10700225830078, "learning_rate": 9.927655677655679e-06, "loss": 35.3225, "step": 10582 }, { "epoch": 251.97910447761194, "grad_norm": 17.476547241210938, "learning_rate": 9.926739926739928e-06, "loss": 36.3405, "step": 10583 }, { "epoch": 252.0, "grad_norm": 13.041790962219238, "learning_rate": 9.925824175824177e-06, "loss": 30.8011, "step": 10584 }, { "epoch": 252.02388059701494, "grad_norm": 17.96108627319336, "learning_rate": 9.924908424908426e-06, "loss": 35.7446, "step": 10585 }, { "epoch": 252.04776119402985, "grad_norm": 16.00111198425293, "learning_rate": 9.923992673992675e-06, "loss": 36.71, "step": 10586 }, { "epoch": 252.07164179104478, "grad_norm": 17.1088809967041, "learning_rate": 9.923076923076923e-06, "loss": 34.772, "step": 10587 }, { "epoch": 252.0955223880597, "grad_norm": 14.14958667755127, "learning_rate": 9.922161172161174e-06, "loss": 36.1205, "step": 10588 }, { "epoch": 252.11940298507463, "grad_norm": 15.669458389282227, "learning_rate": 9.921245421245423e-06, "loss": 35.1513, "step": 10589 }, { "epoch": 252.14328358208957, "grad_norm": 13.559708595275879, "learning_rate": 9.920329670329672e-06, "loss": 36.3405, "step": 10590 }, { "epoch": 252.16716417910447, "grad_norm": 14.526890754699707, "learning_rate": 9.919413919413921e-06, "loss": 35.3845, "step": 10591 }, { "epoch": 252.1910447761194, "grad_norm": 15.401045799255371, "learning_rate": 9.91849816849817e-06, "loss": 34.7524, "step": 10592 }, { "epoch": 252.21492537313432, "grad_norm": 15.31092357635498, "learning_rate": 9.917582417582419e-06, "loss": 36.2405, "step": 10593 }, { "epoch": 252.23880597014926, "grad_norm": 17.768455505371094, "learning_rate": 9.916666666666668e-06, "loss": 36.9562, "step": 10594 }, { "epoch": 252.26268656716417, "grad_norm": 16.205974578857422, "learning_rate": 9.915750915750917e-06, "loss": 34.8221, "step": 10595 }, { "epoch": 252.2865671641791, "grad_norm": 17.758411407470703, "learning_rate": 9.914835164835166e-06, "loss": 35.4166, "step": 10596 }, { "epoch": 252.31044776119404, "grad_norm": 18.2944393157959, "learning_rate": 9.913919413919415e-06, "loss": 36.4242, "step": 10597 }, { "epoch": 252.33432835820895, "grad_norm": 15.548382759094238, "learning_rate": 9.913003663003663e-06, "loss": 36.1634, "step": 10598 }, { "epoch": 252.3582089552239, "grad_norm": NaN, "learning_rate": 9.912087912087912e-06, "loss": 39.2695, "step": 10599 }, { "epoch": 252.3820895522388, "grad_norm": 22.2453670501709, "learning_rate": 9.912087912087912e-06, "loss": 35.3513, "step": 10600 }, { "epoch": 252.40597014925373, "grad_norm": 14.400615692138672, "learning_rate": 9.911172161172163e-06, "loss": 35.7981, "step": 10601 }, { "epoch": 252.42985074626867, "grad_norm": 22.20725440979004, "learning_rate": 9.910256410256412e-06, "loss": 35.2674, "step": 10602 }, { "epoch": 252.45373134328358, "grad_norm": 19.966716766357422, "learning_rate": 9.909340659340661e-06, "loss": 35.7884, "step": 10603 }, { "epoch": 252.47761194029852, "grad_norm": 18.020273208618164, "learning_rate": 9.90842490842491e-06, "loss": 35.4788, "step": 10604 }, { "epoch": 252.50149253731342, "grad_norm": 14.49180793762207, "learning_rate": 9.907509157509159e-06, "loss": 35.6622, "step": 10605 }, { "epoch": 252.52537313432836, "grad_norm": 20.081396102905273, "learning_rate": 9.906593406593408e-06, "loss": 36.1306, "step": 10606 }, { "epoch": 252.54925373134327, "grad_norm": 16.541526794433594, "learning_rate": 9.905677655677657e-06, "loss": 35.7649, "step": 10607 }, { "epoch": 252.5731343283582, "grad_norm": 18.83778190612793, "learning_rate": 9.904761904761906e-06, "loss": 35.8396, "step": 10608 }, { "epoch": 252.59701492537314, "grad_norm": 16.112417221069336, "learning_rate": 9.903846153846155e-06, "loss": 36.2038, "step": 10609 }, { "epoch": 252.62089552238805, "grad_norm": 17.835330963134766, "learning_rate": 9.902930402930403e-06, "loss": 36.0932, "step": 10610 }, { "epoch": 252.644776119403, "grad_norm": 18.30726432800293, "learning_rate": 9.902014652014652e-06, "loss": 36.0771, "step": 10611 }, { "epoch": 252.6686567164179, "grad_norm": 15.045116424560547, "learning_rate": 9.901098901098903e-06, "loss": 36.3511, "step": 10612 }, { "epoch": 252.69253731343284, "grad_norm": 14.947028160095215, "learning_rate": 9.900183150183152e-06, "loss": 35.7482, "step": 10613 }, { "epoch": 252.71641791044777, "grad_norm": 14.970281600952148, "learning_rate": 9.899267399267401e-06, "loss": 35.0967, "step": 10614 }, { "epoch": 252.74029850746268, "grad_norm": 14.05617618560791, "learning_rate": 9.89835164835165e-06, "loss": 34.6361, "step": 10615 }, { "epoch": 252.76417910447762, "grad_norm": 18.122276306152344, "learning_rate": 9.897435897435899e-06, "loss": 35.2779, "step": 10616 }, { "epoch": 252.78805970149253, "grad_norm": 16.886306762695312, "learning_rate": 9.896520146520148e-06, "loss": 35.1227, "step": 10617 }, { "epoch": 252.81194029850747, "grad_norm": 21.922508239746094, "learning_rate": 9.895604395604397e-06, "loss": 35.4356, "step": 10618 }, { "epoch": 252.83582089552237, "grad_norm": 14.450441360473633, "learning_rate": 9.894688644688646e-06, "loss": 35.8054, "step": 10619 }, { "epoch": 252.8597014925373, "grad_norm": 18.629562377929688, "learning_rate": 9.893772893772895e-06, "loss": 34.0044, "step": 10620 }, { "epoch": 252.88358208955225, "grad_norm": 19.719114303588867, "learning_rate": 9.892857142857143e-06, "loss": 35.2392, "step": 10621 }, { "epoch": 252.90746268656716, "grad_norm": 16.302021026611328, "learning_rate": 9.891941391941392e-06, "loss": 36.1957, "step": 10622 }, { "epoch": 252.9313432835821, "grad_norm": 17.12664031982422, "learning_rate": 9.891025641025643e-06, "loss": 35.7172, "step": 10623 }, { "epoch": 252.955223880597, "grad_norm": 14.867701530456543, "learning_rate": 9.890109890109892e-06, "loss": 35.5574, "step": 10624 }, { "epoch": 252.97910447761194, "grad_norm": NaN, "learning_rate": 9.889194139194141e-06, "loss": 57.8029, "step": 10625 }, { "epoch": 253.0, "grad_norm": 16.28997230529785, "learning_rate": 9.889194139194141e-06, "loss": 31.8982, "step": 10626 }, { "epoch": 253.02388059701494, "grad_norm": 20.980283737182617, "learning_rate": 9.88827838827839e-06, "loss": 36.4026, "step": 10627 }, { "epoch": 253.04776119402985, "grad_norm": 14.806510925292969, "learning_rate": 9.887362637362639e-06, "loss": 35.5898, "step": 10628 }, { "epoch": 253.07164179104478, "grad_norm": 14.85124397277832, "learning_rate": 9.886446886446888e-06, "loss": 34.7019, "step": 10629 }, { "epoch": 253.0955223880597, "grad_norm": 16.852712631225586, "learning_rate": 9.885531135531137e-06, "loss": 35.713, "step": 10630 }, { "epoch": 253.11940298507463, "grad_norm": 18.0716552734375, "learning_rate": 9.884615384615386e-06, "loss": 36.6052, "step": 10631 }, { "epoch": 253.14328358208957, "grad_norm": 16.551170349121094, "learning_rate": 9.883699633699635e-06, "loss": 35.315, "step": 10632 }, { "epoch": 253.16716417910447, "grad_norm": 17.68378448486328, "learning_rate": 9.882783882783884e-06, "loss": 35.2735, "step": 10633 }, { "epoch": 253.1910447761194, "grad_norm": 13.689247131347656, "learning_rate": 9.881868131868132e-06, "loss": 35.1588, "step": 10634 }, { "epoch": 253.21492537313432, "grad_norm": 17.445785522460938, "learning_rate": 9.880952380952381e-06, "loss": 36.4603, "step": 10635 }, { "epoch": 253.23880597014926, "grad_norm": 14.128791809082031, "learning_rate": 9.880036630036632e-06, "loss": 35.8233, "step": 10636 }, { "epoch": 253.26268656716417, "grad_norm": 20.628108978271484, "learning_rate": 9.879120879120881e-06, "loss": 36.1753, "step": 10637 }, { "epoch": 253.2865671641791, "grad_norm": 17.83094596862793, "learning_rate": 9.87820512820513e-06, "loss": 34.08, "step": 10638 }, { "epoch": 253.31044776119404, "grad_norm": 16.69542694091797, "learning_rate": 9.877289377289379e-06, "loss": 35.9817, "step": 10639 }, { "epoch": 253.33432835820895, "grad_norm": 22.935531616210938, "learning_rate": 9.876373626373628e-06, "loss": 35.6918, "step": 10640 }, { "epoch": 253.3582089552239, "grad_norm": 17.162960052490234, "learning_rate": 9.875457875457877e-06, "loss": 35.6679, "step": 10641 }, { "epoch": 253.3820895522388, "grad_norm": 19.26456642150879, "learning_rate": 9.874542124542126e-06, "loss": 35.9056, "step": 10642 }, { "epoch": 253.40597014925373, "grad_norm": 16.83243751525879, "learning_rate": 9.873626373626375e-06, "loss": 35.8743, "step": 10643 }, { "epoch": 253.42985074626867, "grad_norm": 16.80619239807129, "learning_rate": 9.872710622710624e-06, "loss": 35.5985, "step": 10644 }, { "epoch": 253.45373134328358, "grad_norm": 17.081470489501953, "learning_rate": 9.871794871794872e-06, "loss": 36.2124, "step": 10645 }, { "epoch": 253.47761194029852, "grad_norm": 16.921592712402344, "learning_rate": 9.870879120879121e-06, "loss": 37.2168, "step": 10646 }, { "epoch": 253.50149253731342, "grad_norm": 19.568693161010742, "learning_rate": 9.869963369963372e-06, "loss": 35.4296, "step": 10647 }, { "epoch": 253.52537313432836, "grad_norm": 15.584951400756836, "learning_rate": 9.869047619047621e-06, "loss": 34.499, "step": 10648 }, { "epoch": 253.54925373134327, "grad_norm": NaN, "learning_rate": 9.86813186813187e-06, "loss": 51.2525, "step": 10649 }, { "epoch": 253.5731343283582, "grad_norm": 20.872446060180664, "learning_rate": 9.86813186813187e-06, "loss": 36.0167, "step": 10650 }, { "epoch": 253.59701492537314, "grad_norm": 15.182371139526367, "learning_rate": 9.867216117216119e-06, "loss": 35.4729, "step": 10651 }, { "epoch": 253.62089552238805, "grad_norm": 16.24883270263672, "learning_rate": 9.866300366300368e-06, "loss": 35.0087, "step": 10652 }, { "epoch": 253.644776119403, "grad_norm": 17.47576904296875, "learning_rate": 9.865384615384617e-06, "loss": 36.4064, "step": 10653 }, { "epoch": 253.6686567164179, "grad_norm": 14.746021270751953, "learning_rate": 9.864468864468866e-06, "loss": 34.953, "step": 10654 }, { "epoch": 253.69253731343284, "grad_norm": 17.191213607788086, "learning_rate": 9.863553113553115e-06, "loss": 35.8077, "step": 10655 }, { "epoch": 253.71641791044777, "grad_norm": 18.858600616455078, "learning_rate": 9.862637362637364e-06, "loss": 35.475, "step": 10656 }, { "epoch": 253.74029850746268, "grad_norm": 15.895038604736328, "learning_rate": 9.861721611721612e-06, "loss": 35.7695, "step": 10657 }, { "epoch": 253.76417910447762, "grad_norm": 16.461393356323242, "learning_rate": 9.860805860805861e-06, "loss": 34.7558, "step": 10658 }, { "epoch": 253.78805970149253, "grad_norm": 13.94180679321289, "learning_rate": 9.859890109890112e-06, "loss": 35.5977, "step": 10659 }, { "epoch": 253.81194029850747, "grad_norm": 19.791231155395508, "learning_rate": 9.858974358974361e-06, "loss": 34.6555, "step": 10660 }, { "epoch": 253.83582089552237, "grad_norm": 23.466222763061523, "learning_rate": 9.85805860805861e-06, "loss": 36.2696, "step": 10661 }, { "epoch": 253.8597014925373, "grad_norm": 13.160076141357422, "learning_rate": 9.857142857142859e-06, "loss": 34.7341, "step": 10662 }, { "epoch": 253.88358208955225, "grad_norm": 22.34493637084961, "learning_rate": 9.856227106227108e-06, "loss": 36.6567, "step": 10663 }, { "epoch": 253.90746268656716, "grad_norm": 19.96533203125, "learning_rate": 9.855311355311357e-06, "loss": 35.8252, "step": 10664 }, { "epoch": 253.9313432835821, "grad_norm": 14.463510513305664, "learning_rate": 9.854395604395606e-06, "loss": 35.7169, "step": 10665 }, { "epoch": 253.955223880597, "grad_norm": 16.703460693359375, "learning_rate": 9.853479853479855e-06, "loss": 36.2886, "step": 10666 }, { "epoch": 253.97910447761194, "grad_norm": 18.7047119140625, "learning_rate": 9.852564102564104e-06, "loss": 34.8688, "step": 10667 }, { "epoch": 254.0, "grad_norm": 18.250211715698242, "learning_rate": 9.851648351648352e-06, "loss": 30.4847, "step": 10668 }, { "epoch": 254.02388059701494, "grad_norm": 15.180882453918457, "learning_rate": 9.850732600732601e-06, "loss": 35.606, "step": 10669 }, { "epoch": 254.04776119402985, "grad_norm": 21.814985275268555, "learning_rate": 9.84981684981685e-06, "loss": 34.8113, "step": 10670 }, { "epoch": 254.07164179104478, "grad_norm": 19.73859405517578, "learning_rate": 9.848901098901101e-06, "loss": 35.4863, "step": 10671 }, { "epoch": 254.0955223880597, "grad_norm": 16.870332717895508, "learning_rate": 9.84798534798535e-06, "loss": 33.621, "step": 10672 }, { "epoch": 254.11940298507463, "grad_norm": 13.403437614440918, "learning_rate": 9.847069597069599e-06, "loss": 35.928, "step": 10673 }, { "epoch": 254.14328358208957, "grad_norm": 22.546737670898438, "learning_rate": 9.846153846153848e-06, "loss": 34.8652, "step": 10674 }, { "epoch": 254.16716417910447, "grad_norm": 21.375059127807617, "learning_rate": 9.845238095238097e-06, "loss": 35.008, "step": 10675 }, { "epoch": 254.1910447761194, "grad_norm": 13.61618423461914, "learning_rate": 9.844322344322346e-06, "loss": 35.4534, "step": 10676 }, { "epoch": 254.21492537313432, "grad_norm": 30.130348205566406, "learning_rate": 9.843406593406593e-06, "loss": 35.8521, "step": 10677 }, { "epoch": 254.23880597014926, "grad_norm": 21.24388313293457, "learning_rate": 9.842490842490844e-06, "loss": 36.396, "step": 10678 }, { "epoch": 254.26268656716417, "grad_norm": 24.324691772460938, "learning_rate": 9.841575091575092e-06, "loss": 35.4676, "step": 10679 }, { "epoch": 254.2865671641791, "grad_norm": 20.920507431030273, "learning_rate": 9.840659340659341e-06, "loss": 34.6309, "step": 10680 }, { "epoch": 254.31044776119404, "grad_norm": 14.969414710998535, "learning_rate": 9.83974358974359e-06, "loss": 35.4426, "step": 10681 }, { "epoch": 254.33432835820895, "grad_norm": 29.528079986572266, "learning_rate": 9.83882783882784e-06, "loss": 34.5512, "step": 10682 }, { "epoch": 254.3582089552239, "grad_norm": 18.79596519470215, "learning_rate": 9.837912087912088e-06, "loss": 34.5028, "step": 10683 }, { "epoch": 254.3820895522388, "grad_norm": 30.32623291015625, "learning_rate": 9.836996336996337e-06, "loss": 36.0936, "step": 10684 }, { "epoch": 254.40597014925373, "grad_norm": 19.43465232849121, "learning_rate": 9.836080586080586e-06, "loss": 35.9299, "step": 10685 }, { "epoch": 254.42985074626867, "grad_norm": 29.898040771484375, "learning_rate": 9.835164835164835e-06, "loss": 36.6122, "step": 10686 }, { "epoch": 254.45373134328358, "grad_norm": 19.775339126586914, "learning_rate": 9.834249084249084e-06, "loss": 35.9243, "step": 10687 }, { "epoch": 254.47761194029852, "grad_norm": 21.716243743896484, "learning_rate": 9.833333333333333e-06, "loss": 36.7411, "step": 10688 }, { "epoch": 254.50149253731342, "grad_norm": 26.77560806274414, "learning_rate": 9.832417582417582e-06, "loss": 35.0157, "step": 10689 }, { "epoch": 254.52537313432836, "grad_norm": 17.745500564575195, "learning_rate": 9.831501831501832e-06, "loss": 36.6571, "step": 10690 }, { "epoch": 254.54925373134327, "grad_norm": 35.65312194824219, "learning_rate": 9.830586080586081e-06, "loss": 35.8319, "step": 10691 }, { "epoch": 254.5731343283582, "grad_norm": 24.51959991455078, "learning_rate": 9.82967032967033e-06, "loss": 36.0061, "step": 10692 }, { "epoch": 254.59701492537314, "grad_norm": 32.02620315551758, "learning_rate": 9.82875457875458e-06, "loss": 36.1562, "step": 10693 }, { "epoch": 254.62089552238805, "grad_norm": 22.318777084350586, "learning_rate": 9.827838827838828e-06, "loss": 35.8546, "step": 10694 }, { "epoch": 254.644776119403, "grad_norm": 35.524166107177734, "learning_rate": 9.826923076923077e-06, "loss": 36.0162, "step": 10695 }, { "epoch": 254.6686567164179, "grad_norm": 23.13291358947754, "learning_rate": 9.826007326007326e-06, "loss": 34.7835, "step": 10696 }, { "epoch": 254.69253731343284, "grad_norm": 44.42383575439453, "learning_rate": 9.825091575091575e-06, "loss": 36.2586, "step": 10697 }, { "epoch": 254.71641791044777, "grad_norm": 36.46686935424805, "learning_rate": 9.824175824175824e-06, "loss": 35.597, "step": 10698 }, { "epoch": 254.74029850746268, "grad_norm": 28.54291534423828, "learning_rate": 9.823260073260073e-06, "loss": 35.1425, "step": 10699 }, { "epoch": 254.76417910447762, "grad_norm": 30.905284881591797, "learning_rate": 9.822344322344322e-06, "loss": 35.0142, "step": 10700 }, { "epoch": 254.78805970149253, "grad_norm": 25.988637924194336, "learning_rate": 9.821428571428573e-06, "loss": 35.7779, "step": 10701 }, { "epoch": 254.81194029850747, "grad_norm": 23.147994995117188, "learning_rate": 9.820512820512821e-06, "loss": 35.113, "step": 10702 }, { "epoch": 254.83582089552237, "grad_norm": 33.8663215637207, "learning_rate": 9.81959706959707e-06, "loss": 36.0861, "step": 10703 }, { "epoch": 254.8597014925373, "grad_norm": 27.69142723083496, "learning_rate": 9.81868131868132e-06, "loss": 34.5732, "step": 10704 }, { "epoch": 254.88358208955225, "grad_norm": 31.73871612548828, "learning_rate": 9.817765567765568e-06, "loss": 35.9929, "step": 10705 }, { "epoch": 254.90746268656716, "grad_norm": 32.973655700683594, "learning_rate": 9.816849816849817e-06, "loss": 36.3537, "step": 10706 }, { "epoch": 254.9313432835821, "grad_norm": 26.514671325683594, "learning_rate": 9.815934065934066e-06, "loss": 35.5262, "step": 10707 }, { "epoch": 254.955223880597, "grad_norm": 26.40163803100586, "learning_rate": 9.815018315018315e-06, "loss": 35.4639, "step": 10708 }, { "epoch": 254.97910447761194, "grad_norm": 31.693086624145508, "learning_rate": 9.814102564102564e-06, "loss": 36.5741, "step": 10709 }, { "epoch": 255.0, "grad_norm": 25.91537857055664, "learning_rate": 9.813186813186813e-06, "loss": 31.1786, "step": 10710 }, { "epoch": 255.02388059701494, "grad_norm": 28.340864181518555, "learning_rate": 9.812271062271062e-06, "loss": 35.6379, "step": 10711 }, { "epoch": 255.04776119402985, "grad_norm": 24.879518508911133, "learning_rate": 9.811355311355313e-06, "loss": 35.3954, "step": 10712 }, { "epoch": 255.07164179104478, "grad_norm": 35.86328887939453, "learning_rate": 9.810439560439561e-06, "loss": 35.1415, "step": 10713 }, { "epoch": 255.0955223880597, "grad_norm": 26.792383193969727, "learning_rate": 9.80952380952381e-06, "loss": 35.8137, "step": 10714 }, { "epoch": 255.11940298507463, "grad_norm": 34.05632400512695, "learning_rate": 9.80860805860806e-06, "loss": 36.3998, "step": 10715 }, { "epoch": 255.14328358208957, "grad_norm": 31.903276443481445, "learning_rate": 9.807692307692308e-06, "loss": 35.251, "step": 10716 }, { "epoch": 255.16716417910447, "grad_norm": 26.982019424438477, "learning_rate": 9.806776556776557e-06, "loss": 35.2743, "step": 10717 }, { "epoch": 255.1910447761194, "grad_norm": 25.058006286621094, "learning_rate": 9.805860805860806e-06, "loss": 35.5574, "step": 10718 }, { "epoch": 255.21492537313432, "grad_norm": 29.534523010253906, "learning_rate": 9.804945054945055e-06, "loss": 36.2936, "step": 10719 }, { "epoch": 255.23880597014926, "grad_norm": 23.080780029296875, "learning_rate": 9.804029304029304e-06, "loss": 36.2085, "step": 10720 }, { "epoch": 255.26268656716417, "grad_norm": 35.26456069946289, "learning_rate": 9.803113553113553e-06, "loss": 36.2345, "step": 10721 }, { "epoch": 255.2865671641791, "grad_norm": 31.185991287231445, "learning_rate": 9.802197802197802e-06, "loss": 36.1327, "step": 10722 }, { "epoch": 255.31044776119404, "grad_norm": 28.98785400390625, "learning_rate": 9.801282051282053e-06, "loss": 34.582, "step": 10723 }, { "epoch": 255.33432835820895, "grad_norm": 28.52055549621582, "learning_rate": 9.800366300366301e-06, "loss": 35.0338, "step": 10724 }, { "epoch": 255.3582089552239, "grad_norm": 28.13873291015625, "learning_rate": 9.79945054945055e-06, "loss": 35.6373, "step": 10725 }, { "epoch": 255.3820895522388, "grad_norm": 22.614482879638672, "learning_rate": 9.7985347985348e-06, "loss": 34.2453, "step": 10726 }, { "epoch": 255.40597014925373, "grad_norm": 31.850677490234375, "learning_rate": 9.797619047619048e-06, "loss": 35.9406, "step": 10727 }, { "epoch": 255.42985074626867, "grad_norm": 28.57715606689453, "learning_rate": 9.796703296703297e-06, "loss": 36.612, "step": 10728 }, { "epoch": 255.45373134328358, "grad_norm": 30.37266731262207, "learning_rate": 9.795787545787546e-06, "loss": 35.3086, "step": 10729 }, { "epoch": 255.47761194029852, "grad_norm": 28.511348724365234, "learning_rate": 9.794871794871795e-06, "loss": 35.2757, "step": 10730 }, { "epoch": 255.50149253731342, "grad_norm": 27.241514205932617, "learning_rate": 9.793956043956044e-06, "loss": 34.9542, "step": 10731 }, { "epoch": 255.52537313432836, "grad_norm": 25.99595069885254, "learning_rate": 9.793040293040293e-06, "loss": 35.4933, "step": 10732 }, { "epoch": 255.54925373134327, "grad_norm": 31.940399169921875, "learning_rate": 9.792124542124542e-06, "loss": 36.7967, "step": 10733 }, { "epoch": 255.5731343283582, "grad_norm": 26.10555076599121, "learning_rate": 9.79120879120879e-06, "loss": 35.4873, "step": 10734 }, { "epoch": 255.59701492537314, "grad_norm": 30.710800170898438, "learning_rate": 9.790293040293041e-06, "loss": 35.6772, "step": 10735 }, { "epoch": 255.62089552238805, "grad_norm": 26.41657829284668, "learning_rate": 9.78937728937729e-06, "loss": 35.5839, "step": 10736 }, { "epoch": 255.644776119403, "grad_norm": 27.801651000976562, "learning_rate": 9.78846153846154e-06, "loss": 36.1724, "step": 10737 }, { "epoch": 255.6686567164179, "grad_norm": 25.25094223022461, "learning_rate": 9.787545787545788e-06, "loss": 34.6832, "step": 10738 }, { "epoch": 255.69253731343284, "grad_norm": 32.438541412353516, "learning_rate": 9.786630036630037e-06, "loss": 34.7333, "step": 10739 }, { "epoch": 255.71641791044777, "grad_norm": 24.55843734741211, "learning_rate": 9.785714285714286e-06, "loss": 34.6326, "step": 10740 }, { "epoch": 255.74029850746268, "grad_norm": 33.32674789428711, "learning_rate": 9.784798534798535e-06, "loss": 35.8815, "step": 10741 }, { "epoch": 255.76417910447762, "grad_norm": 30.69706916809082, "learning_rate": 9.783882783882784e-06, "loss": 36.6207, "step": 10742 }, { "epoch": 255.78805970149253, "grad_norm": 26.328582763671875, "learning_rate": 9.782967032967033e-06, "loss": 36.0933, "step": 10743 }, { "epoch": 255.81194029850747, "grad_norm": 24.8414306640625, "learning_rate": 9.782051282051282e-06, "loss": 35.9427, "step": 10744 }, { "epoch": 255.83582089552237, "grad_norm": 27.940202713012695, "learning_rate": 9.781135531135531e-06, "loss": 34.602, "step": 10745 }, { "epoch": 255.8597014925373, "grad_norm": 25.440942764282227, "learning_rate": 9.780219780219781e-06, "loss": 35.254, "step": 10746 }, { "epoch": 255.88358208955225, "grad_norm": 32.52458572387695, "learning_rate": 9.77930402930403e-06, "loss": 35.3009, "step": 10747 }, { "epoch": 255.90746268656716, "grad_norm": 27.224624633789062, "learning_rate": 9.77838827838828e-06, "loss": 34.3565, "step": 10748 }, { "epoch": 255.9313432835821, "grad_norm": 26.033937454223633, "learning_rate": 9.777472527472528e-06, "loss": 36.2242, "step": 10749 }, { "epoch": 255.955223880597, "grad_norm": 24.01490020751953, "learning_rate": 9.776556776556777e-06, "loss": 35.5857, "step": 10750 }, { "epoch": 255.97910447761194, "grad_norm": 25.998130798339844, "learning_rate": 9.775641025641026e-06, "loss": 34.538, "step": 10751 }, { "epoch": 256.0, "grad_norm": 19.31838607788086, "learning_rate": 9.774725274725275e-06, "loss": 30.682, "step": 10752 }, { "epoch": 256.0238805970149, "grad_norm": 31.648189544677734, "learning_rate": 9.773809523809524e-06, "loss": 36.0167, "step": 10753 }, { "epoch": 256.0477611940299, "grad_norm": 27.066003799438477, "learning_rate": 9.772893772893773e-06, "loss": 36.0605, "step": 10754 }, { "epoch": 256.0716417910448, "grad_norm": 29.421175003051758, "learning_rate": 9.771978021978022e-06, "loss": 36.7364, "step": 10755 }, { "epoch": 256.0955223880597, "grad_norm": 25.722335815429688, "learning_rate": 9.771062271062271e-06, "loss": 35.1385, "step": 10756 }, { "epoch": 256.1194029850746, "grad_norm": 28.203012466430664, "learning_rate": 9.770146520146521e-06, "loss": 34.9666, "step": 10757 }, { "epoch": 256.14328358208957, "grad_norm": 26.54973793029785, "learning_rate": 9.76923076923077e-06, "loss": 35.6997, "step": 10758 }, { "epoch": 256.1671641791045, "grad_norm": 28.533681869506836, "learning_rate": 9.76831501831502e-06, "loss": 35.9888, "step": 10759 }, { "epoch": 256.1910447761194, "grad_norm": 26.133970260620117, "learning_rate": 9.767399267399268e-06, "loss": 35.8334, "step": 10760 }, { "epoch": 256.21492537313435, "grad_norm": 26.8007755279541, "learning_rate": 9.766483516483517e-06, "loss": 35.0449, "step": 10761 }, { "epoch": 256.23880597014926, "grad_norm": 25.353805541992188, "learning_rate": 9.765567765567766e-06, "loss": 35.4466, "step": 10762 }, { "epoch": 256.26268656716417, "grad_norm": 31.411041259765625, "learning_rate": 9.764652014652015e-06, "loss": 34.9599, "step": 10763 }, { "epoch": 256.28656716417913, "grad_norm": 26.66668701171875, "learning_rate": 9.763736263736264e-06, "loss": 34.7922, "step": 10764 }, { "epoch": 256.31044776119404, "grad_norm": 28.99530029296875, "learning_rate": 9.762820512820513e-06, "loss": 35.2831, "step": 10765 }, { "epoch": 256.33432835820895, "grad_norm": 24.208465576171875, "learning_rate": 9.761904761904762e-06, "loss": 36.117, "step": 10766 }, { "epoch": 256.35820895522386, "grad_norm": 30.489770889282227, "learning_rate": 9.760989010989011e-06, "loss": 35.5068, "step": 10767 }, { "epoch": 256.3820895522388, "grad_norm": 25.3145751953125, "learning_rate": 9.76007326007326e-06, "loss": 35.2146, "step": 10768 }, { "epoch": 256.40597014925373, "grad_norm": 29.982032775878906, "learning_rate": 9.75915750915751e-06, "loss": 34.6297, "step": 10769 }, { "epoch": 256.42985074626864, "grad_norm": 26.358388900756836, "learning_rate": 9.75824175824176e-06, "loss": 34.6239, "step": 10770 }, { "epoch": 256.4537313432836, "grad_norm": 28.4139404296875, "learning_rate": 9.757326007326008e-06, "loss": 34.3071, "step": 10771 }, { "epoch": 256.4776119402985, "grad_norm": 26.507587432861328, "learning_rate": 9.756410256410257e-06, "loss": 34.3401, "step": 10772 }, { "epoch": 256.5014925373134, "grad_norm": 25.461633682250977, "learning_rate": 9.755494505494506e-06, "loss": 34.5249, "step": 10773 }, { "epoch": 256.52537313432833, "grad_norm": 24.861305236816406, "learning_rate": 9.754578754578755e-06, "loss": 36.0731, "step": 10774 }, { "epoch": 256.5492537313433, "grad_norm": 25.40403938293457, "learning_rate": 9.753663003663004e-06, "loss": 35.6725, "step": 10775 }, { "epoch": 256.5731343283582, "grad_norm": 21.132911682128906, "learning_rate": 9.752747252747253e-06, "loss": 35.4077, "step": 10776 }, { "epoch": 256.5970149253731, "grad_norm": 30.389541625976562, "learning_rate": 9.751831501831502e-06, "loss": 34.7391, "step": 10777 }, { "epoch": 256.6208955223881, "grad_norm": 26.128162384033203, "learning_rate": 9.750915750915751e-06, "loss": 35.3478, "step": 10778 }, { "epoch": 256.644776119403, "grad_norm": 23.610139846801758, "learning_rate": 9.75e-06, "loss": 36.2763, "step": 10779 }, { "epoch": 256.6686567164179, "grad_norm": 24.85352897644043, "learning_rate": 9.74908424908425e-06, "loss": 35.2415, "step": 10780 }, { "epoch": 256.6925373134328, "grad_norm": 21.928003311157227, "learning_rate": 9.7481684981685e-06, "loss": 34.296, "step": 10781 }, { "epoch": 256.7164179104478, "grad_norm": 19.506946563720703, "learning_rate": 9.747252747252748e-06, "loss": 36.9694, "step": 10782 }, { "epoch": 256.7402985074627, "grad_norm": 25.680326461791992, "learning_rate": 9.746336996336997e-06, "loss": 37.0831, "step": 10783 }, { "epoch": 256.7641791044776, "grad_norm": 20.675579071044922, "learning_rate": 9.745421245421246e-06, "loss": 34.5955, "step": 10784 }, { "epoch": 256.78805970149256, "grad_norm": 23.569568634033203, "learning_rate": 9.744505494505495e-06, "loss": 35.6532, "step": 10785 }, { "epoch": 256.81194029850747, "grad_norm": 21.853796005249023, "learning_rate": 9.743589743589744e-06, "loss": 34.6575, "step": 10786 }, { "epoch": 256.8358208955224, "grad_norm": 24.558605194091797, "learning_rate": 9.742673992673993e-06, "loss": 36.6856, "step": 10787 }, { "epoch": 256.85970149253734, "grad_norm": 20.775253295898438, "learning_rate": 9.741758241758242e-06, "loss": 35.0564, "step": 10788 }, { "epoch": 256.88358208955225, "grad_norm": 23.337413787841797, "learning_rate": 9.740842490842491e-06, "loss": 36.3046, "step": 10789 }, { "epoch": 256.90746268656716, "grad_norm": 21.221128463745117, "learning_rate": 9.73992673992674e-06, "loss": 35.5655, "step": 10790 }, { "epoch": 256.93134328358207, "grad_norm": 22.46784019470215, "learning_rate": 9.73901098901099e-06, "loss": 35.6197, "step": 10791 }, { "epoch": 256.95522388059703, "grad_norm": 21.595149993896484, "learning_rate": 9.73809523809524e-06, "loss": 36.4712, "step": 10792 }, { "epoch": 256.97910447761194, "grad_norm": 26.207984924316406, "learning_rate": 9.737179487179488e-06, "loss": 36.3833, "step": 10793 }, { "epoch": 257.0, "grad_norm": 19.580183029174805, "learning_rate": 9.736263736263737e-06, "loss": 31.4192, "step": 10794 }, { "epoch": 257.0238805970149, "grad_norm": 25.709213256835938, "learning_rate": 9.735347985347986e-06, "loss": 36.315, "step": 10795 }, { "epoch": 257.0477611940299, "grad_norm": 21.611906051635742, "learning_rate": 9.734432234432235e-06, "loss": 36.14, "step": 10796 }, { "epoch": 257.0716417910448, "grad_norm": 25.615612030029297, "learning_rate": 9.733516483516484e-06, "loss": 33.9203, "step": 10797 }, { "epoch": 257.0955223880597, "grad_norm": 23.026264190673828, "learning_rate": 9.732600732600733e-06, "loss": 35.9109, "step": 10798 }, { "epoch": 257.1194029850746, "grad_norm": 24.4227352142334, "learning_rate": 9.731684981684982e-06, "loss": 35.716, "step": 10799 }, { "epoch": 257.14328358208957, "grad_norm": 22.758514404296875, "learning_rate": 9.730769230769231e-06, "loss": 36.1396, "step": 10800 }, { "epoch": 257.1671641791045, "grad_norm": 23.67724609375, "learning_rate": 9.72985347985348e-06, "loss": 34.4643, "step": 10801 }, { "epoch": 257.1910447761194, "grad_norm": 24.518661499023438, "learning_rate": 9.728937728937729e-06, "loss": 35.7916, "step": 10802 }, { "epoch": 257.21492537313435, "grad_norm": 17.434823989868164, "learning_rate": 9.72802197802198e-06, "loss": 36.2359, "step": 10803 }, { "epoch": 257.23880597014926, "grad_norm": 18.463014602661133, "learning_rate": 9.727106227106228e-06, "loss": 36.2663, "step": 10804 }, { "epoch": 257.26268656716417, "grad_norm": 17.706605911254883, "learning_rate": 9.726190476190477e-06, "loss": 36.4413, "step": 10805 }, { "epoch": 257.28656716417913, "grad_norm": 17.663585662841797, "learning_rate": 9.725274725274726e-06, "loss": 34.5734, "step": 10806 }, { "epoch": 257.31044776119404, "grad_norm": 18.207059860229492, "learning_rate": 9.724358974358975e-06, "loss": 35.1563, "step": 10807 }, { "epoch": 257.33432835820895, "grad_norm": 16.37620735168457, "learning_rate": 9.723443223443224e-06, "loss": 35.1207, "step": 10808 }, { "epoch": 257.35820895522386, "grad_norm": 22.57314682006836, "learning_rate": 9.722527472527473e-06, "loss": 35.2246, "step": 10809 }, { "epoch": 257.3820895522388, "grad_norm": 14.920421600341797, "learning_rate": 9.721611721611722e-06, "loss": 34.9203, "step": 10810 }, { "epoch": 257.40597014925373, "grad_norm": 27.248626708984375, "learning_rate": 9.720695970695971e-06, "loss": 36.1843, "step": 10811 }, { "epoch": 257.42985074626864, "grad_norm": 19.605594635009766, "learning_rate": 9.71978021978022e-06, "loss": 35.979, "step": 10812 }, { "epoch": 257.4537313432836, "grad_norm": 24.189802169799805, "learning_rate": 9.718864468864469e-06, "loss": 36.6211, "step": 10813 }, { "epoch": 257.4776119402985, "grad_norm": 21.813461303710938, "learning_rate": 9.71794871794872e-06, "loss": 35.1964, "step": 10814 }, { "epoch": 257.5014925373134, "grad_norm": 22.19347381591797, "learning_rate": 9.717032967032968e-06, "loss": 36.1637, "step": 10815 }, { "epoch": 257.52537313432833, "grad_norm": 18.67477035522461, "learning_rate": 9.716117216117217e-06, "loss": 35.0804, "step": 10816 }, { "epoch": 257.5492537313433, "grad_norm": 20.382328033447266, "learning_rate": 9.715201465201466e-06, "loss": 35.3635, "step": 10817 }, { "epoch": 257.5731343283582, "grad_norm": 17.92677116394043, "learning_rate": 9.714285714285715e-06, "loss": 36.0351, "step": 10818 }, { "epoch": 257.5970149253731, "grad_norm": 19.9560604095459, "learning_rate": 9.713369963369964e-06, "loss": 35.8682, "step": 10819 }, { "epoch": 257.6208955223881, "grad_norm": 16.868568420410156, "learning_rate": 9.712454212454213e-06, "loss": 36.0979, "step": 10820 }, { "epoch": 257.644776119403, "grad_norm": 18.30596923828125, "learning_rate": 9.711538461538462e-06, "loss": 34.9613, "step": 10821 }, { "epoch": 257.6686567164179, "grad_norm": 15.170578002929688, "learning_rate": 9.710622710622711e-06, "loss": 35.3382, "step": 10822 }, { "epoch": 257.6925373134328, "grad_norm": 15.260246276855469, "learning_rate": 9.70970695970696e-06, "loss": 34.6027, "step": 10823 }, { "epoch": 257.7164179104478, "grad_norm": 16.107736587524414, "learning_rate": 9.708791208791209e-06, "loss": 35.1339, "step": 10824 }, { "epoch": 257.7402985074627, "grad_norm": 17.758358001708984, "learning_rate": 9.70787545787546e-06, "loss": 35.9333, "step": 10825 }, { "epoch": 257.7641791044776, "grad_norm": 19.96453094482422, "learning_rate": 9.706959706959708e-06, "loss": 35.0148, "step": 10826 }, { "epoch": 257.78805970149256, "grad_norm": 16.72601318359375, "learning_rate": 9.706043956043957e-06, "loss": 34.5997, "step": 10827 }, { "epoch": 257.81194029850747, "grad_norm": 14.448206901550293, "learning_rate": 9.705128205128206e-06, "loss": 35.8951, "step": 10828 }, { "epoch": 257.8358208955224, "grad_norm": 16.692916870117188, "learning_rate": 9.704212454212455e-06, "loss": 34.6624, "step": 10829 }, { "epoch": 257.85970149253734, "grad_norm": 16.81743812561035, "learning_rate": 9.703296703296704e-06, "loss": 35.085, "step": 10830 }, { "epoch": 257.88358208955225, "grad_norm": 21.635976791381836, "learning_rate": 9.702380952380953e-06, "loss": 34.591, "step": 10831 }, { "epoch": 257.90746268656716, "grad_norm": 14.481380462646484, "learning_rate": 9.701465201465202e-06, "loss": 35.0912, "step": 10832 }, { "epoch": 257.93134328358207, "grad_norm": 20.64571189880371, "learning_rate": 9.700549450549451e-06, "loss": 34.6963, "step": 10833 }, { "epoch": 257.95522388059703, "grad_norm": 17.591278076171875, "learning_rate": 9.6996336996337e-06, "loss": 35.2977, "step": 10834 }, { "epoch": 257.97910447761194, "grad_norm": 16.043319702148438, "learning_rate": 9.698717948717949e-06, "loss": 36.273, "step": 10835 }, { "epoch": 258.0, "grad_norm": 18.798046112060547, "learning_rate": 9.697802197802198e-06, "loss": 31.4153, "step": 10836 }, { "epoch": 258.0238805970149, "grad_norm": 13.94228458404541, "learning_rate": 9.696886446886448e-06, "loss": 34.8859, "step": 10837 }, { "epoch": 258.0477611940299, "grad_norm": 18.440574645996094, "learning_rate": 9.695970695970697e-06, "loss": 35.0648, "step": 10838 }, { "epoch": 258.0716417910448, "grad_norm": 19.227081298828125, "learning_rate": 9.695054945054946e-06, "loss": 35.5322, "step": 10839 }, { "epoch": 258.0955223880597, "grad_norm": 17.29041290283203, "learning_rate": 9.694139194139195e-06, "loss": 36.2281, "step": 10840 }, { "epoch": 258.1194029850746, "grad_norm": 18.552640914916992, "learning_rate": 9.693223443223444e-06, "loss": 34.8531, "step": 10841 }, { "epoch": 258.14328358208957, "grad_norm": 17.46238136291504, "learning_rate": 9.692307692307693e-06, "loss": 36.5649, "step": 10842 }, { "epoch": 258.1671641791045, "grad_norm": 19.12788200378418, "learning_rate": 9.691391941391942e-06, "loss": 36.102, "step": 10843 }, { "epoch": 258.1910447761194, "grad_norm": 15.855060577392578, "learning_rate": 9.690476190476191e-06, "loss": 34.5336, "step": 10844 }, { "epoch": 258.21492537313435, "grad_norm": 16.86677360534668, "learning_rate": 9.68956043956044e-06, "loss": 36.6046, "step": 10845 }, { "epoch": 258.23880597014926, "grad_norm": 13.7073392868042, "learning_rate": 9.688644688644689e-06, "loss": 34.4662, "step": 10846 }, { "epoch": 258.26268656716417, "grad_norm": 18.736238479614258, "learning_rate": 9.687728937728938e-06, "loss": 34.877, "step": 10847 }, { "epoch": 258.28656716417913, "grad_norm": 15.149215698242188, "learning_rate": 9.686813186813188e-06, "loss": 35.4903, "step": 10848 }, { "epoch": 258.31044776119404, "grad_norm": 18.4781436920166, "learning_rate": 9.685897435897437e-06, "loss": 36.1894, "step": 10849 }, { "epoch": 258.33432835820895, "grad_norm": 21.694055557250977, "learning_rate": 9.684981684981686e-06, "loss": 35.5048, "step": 10850 }, { "epoch": 258.35820895522386, "grad_norm": 16.5482120513916, "learning_rate": 9.684065934065935e-06, "loss": 34.999, "step": 10851 }, { "epoch": 258.3820895522388, "grad_norm": 15.136181831359863, "learning_rate": 9.683150183150184e-06, "loss": 35.1913, "step": 10852 }, { "epoch": 258.40597014925373, "grad_norm": 17.934253692626953, "learning_rate": 9.682234432234433e-06, "loss": 35.9427, "step": 10853 }, { "epoch": 258.42985074626864, "grad_norm": 17.596426010131836, "learning_rate": 9.681318681318682e-06, "loss": 35.4773, "step": 10854 }, { "epoch": 258.4537313432836, "grad_norm": 18.374597549438477, "learning_rate": 9.680402930402931e-06, "loss": 35.7086, "step": 10855 }, { "epoch": 258.4776119402985, "grad_norm": 19.101961135864258, "learning_rate": 9.67948717948718e-06, "loss": 35.4414, "step": 10856 }, { "epoch": 258.5014925373134, "grad_norm": 15.79054069519043, "learning_rate": 9.678571428571429e-06, "loss": 34.6681, "step": 10857 }, { "epoch": 258.52537313432833, "grad_norm": 16.025163650512695, "learning_rate": 9.677655677655678e-06, "loss": 36.0638, "step": 10858 }, { "epoch": 258.5492537313433, "grad_norm": 14.512070655822754, "learning_rate": 9.676739926739928e-06, "loss": 35.4172, "step": 10859 }, { "epoch": 258.5731343283582, "grad_norm": 16.589923858642578, "learning_rate": 9.675824175824177e-06, "loss": 36.0823, "step": 10860 }, { "epoch": 258.5970149253731, "grad_norm": 18.830434799194336, "learning_rate": 9.674908424908426e-06, "loss": 36.0289, "step": 10861 }, { "epoch": 258.6208955223881, "grad_norm": 14.532703399658203, "learning_rate": 9.673992673992675e-06, "loss": 34.5007, "step": 10862 }, { "epoch": 258.644776119403, "grad_norm": 16.893735885620117, "learning_rate": 9.673076923076924e-06, "loss": 36.3404, "step": 10863 }, { "epoch": 258.6686567164179, "grad_norm": 19.440937042236328, "learning_rate": 9.672161172161173e-06, "loss": 35.9148, "step": 10864 }, { "epoch": 258.6925373134328, "grad_norm": 16.368810653686523, "learning_rate": 9.671245421245422e-06, "loss": 35.4302, "step": 10865 }, { "epoch": 258.7164179104478, "grad_norm": 15.688422203063965, "learning_rate": 9.670329670329671e-06, "loss": 35.8679, "step": 10866 }, { "epoch": 258.7402985074627, "grad_norm": 22.185697555541992, "learning_rate": 9.66941391941392e-06, "loss": 34.4393, "step": 10867 }, { "epoch": 258.7641791044776, "grad_norm": 16.52676773071289, "learning_rate": 9.668498168498169e-06, "loss": 35.5621, "step": 10868 }, { "epoch": 258.78805970149256, "grad_norm": 19.543874740600586, "learning_rate": 9.667582417582418e-06, "loss": 35.0672, "step": 10869 }, { "epoch": 258.81194029850747, "grad_norm": 21.124961853027344, "learning_rate": 9.666666666666667e-06, "loss": 35.4156, "step": 10870 }, { "epoch": 258.8358208955224, "grad_norm": 16.53673553466797, "learning_rate": 9.665750915750917e-06, "loss": 35.6703, "step": 10871 }, { "epoch": 258.85970149253734, "grad_norm": 24.19443702697754, "learning_rate": 9.664835164835166e-06, "loss": 36.3566, "step": 10872 }, { "epoch": 258.88358208955225, "grad_norm": 18.795320510864258, "learning_rate": 9.663919413919415e-06, "loss": 35.2381, "step": 10873 }, { "epoch": 258.90746268656716, "grad_norm": 16.57818031311035, "learning_rate": 9.663003663003664e-06, "loss": 35.2277, "step": 10874 }, { "epoch": 258.93134328358207, "grad_norm": 19.244136810302734, "learning_rate": 9.662087912087913e-06, "loss": 34.7622, "step": 10875 }, { "epoch": 258.95522388059703, "grad_norm": 16.864898681640625, "learning_rate": 9.661172161172162e-06, "loss": 35.8717, "step": 10876 }, { "epoch": 258.97910447761194, "grad_norm": 16.22411346435547, "learning_rate": 9.660256410256411e-06, "loss": 34.1409, "step": 10877 }, { "epoch": 259.0, "grad_norm": NaN, "learning_rate": 9.65934065934066e-06, "loss": 26.7277, "step": 10878 }, { "epoch": 259.0238805970149, "grad_norm": 17.742145538330078, "learning_rate": 9.65934065934066e-06, "loss": 34.9852, "step": 10879 }, { "epoch": 259.0477611940299, "grad_norm": 20.953575134277344, "learning_rate": 9.658424908424909e-06, "loss": 34.5062, "step": 10880 }, { "epoch": 259.0716417910448, "grad_norm": 14.631403923034668, "learning_rate": 9.657509157509158e-06, "loss": 35.8458, "step": 10881 }, { "epoch": 259.0955223880597, "grad_norm": 22.979354858398438, "learning_rate": 9.656593406593407e-06, "loss": 35.386, "step": 10882 }, { "epoch": 259.1194029850746, "grad_norm": 20.7153377532959, "learning_rate": 9.655677655677657e-06, "loss": 35.166, "step": 10883 }, { "epoch": 259.14328358208957, "grad_norm": 19.420682907104492, "learning_rate": 9.654761904761906e-06, "loss": 36.1378, "step": 10884 }, { "epoch": 259.1671641791045, "grad_norm": 23.77364730834961, "learning_rate": 9.653846153846155e-06, "loss": 35.5886, "step": 10885 }, { "epoch": 259.1910447761194, "grad_norm": 21.49879264831543, "learning_rate": 9.652930402930404e-06, "loss": 35.2042, "step": 10886 }, { "epoch": 259.21492537313435, "grad_norm": 17.184059143066406, "learning_rate": 9.652014652014653e-06, "loss": 34.5951, "step": 10887 }, { "epoch": 259.23880597014926, "grad_norm": 22.123655319213867, "learning_rate": 9.651098901098902e-06, "loss": 36.4969, "step": 10888 }, { "epoch": 259.26268656716417, "grad_norm": 20.89151954650879, "learning_rate": 9.650183150183151e-06, "loss": 35.1259, "step": 10889 }, { "epoch": 259.28656716417913, "grad_norm": 16.542654037475586, "learning_rate": 9.6492673992674e-06, "loss": 34.7581, "step": 10890 }, { "epoch": 259.31044776119404, "grad_norm": 18.11886978149414, "learning_rate": 9.648351648351649e-06, "loss": 35.1464, "step": 10891 }, { "epoch": 259.33432835820895, "grad_norm": 18.424983978271484, "learning_rate": 9.647435897435898e-06, "loss": 35.9606, "step": 10892 }, { "epoch": 259.35820895522386, "grad_norm": 17.356115341186523, "learning_rate": 9.646520146520147e-06, "loss": 34.916, "step": 10893 }, { "epoch": 259.3820895522388, "grad_norm": 19.047100067138672, "learning_rate": 9.645604395604397e-06, "loss": 35.1984, "step": 10894 }, { "epoch": 259.40597014925373, "grad_norm": 23.9052791595459, "learning_rate": 9.644688644688646e-06, "loss": 35.7599, "step": 10895 }, { "epoch": 259.42985074626864, "grad_norm": 15.961407661437988, "learning_rate": 9.643772893772895e-06, "loss": 35.9203, "step": 10896 }, { "epoch": 259.4537313432836, "grad_norm": 19.9751033782959, "learning_rate": 9.642857142857144e-06, "loss": 35.1501, "step": 10897 }, { "epoch": 259.4776119402985, "grad_norm": 19.248432159423828, "learning_rate": 9.641941391941393e-06, "loss": 35.7066, "step": 10898 }, { "epoch": 259.5014925373134, "grad_norm": 16.800792694091797, "learning_rate": 9.641025641025642e-06, "loss": 35.9651, "step": 10899 }, { "epoch": 259.52537313432833, "grad_norm": 14.706574440002441, "learning_rate": 9.640109890109891e-06, "loss": 35.5949, "step": 10900 }, { "epoch": 259.5492537313433, "grad_norm": 19.912813186645508, "learning_rate": 9.63919413919414e-06, "loss": 36.1239, "step": 10901 }, { "epoch": 259.5731343283582, "grad_norm": 16.731714248657227, "learning_rate": 9.638278388278389e-06, "loss": 35.5973, "step": 10902 }, { "epoch": 259.5970149253731, "grad_norm": 15.84050178527832, "learning_rate": 9.637362637362638e-06, "loss": 35.244, "step": 10903 }, { "epoch": 259.6208955223881, "grad_norm": 15.029394149780273, "learning_rate": 9.636446886446887e-06, "loss": 35.456, "step": 10904 }, { "epoch": 259.644776119403, "grad_norm": 17.010866165161133, "learning_rate": 9.635531135531136e-06, "loss": 34.9923, "step": 10905 }, { "epoch": 259.6686567164179, "grad_norm": 14.604995727539062, "learning_rate": 9.634615384615386e-06, "loss": 35.0997, "step": 10906 }, { "epoch": 259.6925373134328, "grad_norm": 15.063443183898926, "learning_rate": 9.633699633699635e-06, "loss": 37.2093, "step": 10907 }, { "epoch": 259.7164179104478, "grad_norm": 17.577592849731445, "learning_rate": 9.632783882783884e-06, "loss": 35.4733, "step": 10908 }, { "epoch": 259.7402985074627, "grad_norm": 14.357279777526855, "learning_rate": 9.631868131868133e-06, "loss": 35.1247, "step": 10909 }, { "epoch": 259.7641791044776, "grad_norm": 18.57563591003418, "learning_rate": 9.630952380952382e-06, "loss": 36.0164, "step": 10910 }, { "epoch": 259.78805970149256, "grad_norm": 15.390028953552246, "learning_rate": 9.630036630036631e-06, "loss": 35.0736, "step": 10911 }, { "epoch": 259.81194029850747, "grad_norm": 20.42272186279297, "learning_rate": 9.62912087912088e-06, "loss": 36.8684, "step": 10912 }, { "epoch": 259.8358208955224, "grad_norm": 15.395928382873535, "learning_rate": 9.628205128205129e-06, "loss": 35.2272, "step": 10913 }, { "epoch": 259.85970149253734, "grad_norm": NaN, "learning_rate": 9.627289377289378e-06, "loss": 30.2606, "step": 10914 }, { "epoch": 259.88358208955225, "grad_norm": 25.252601623535156, "learning_rate": 9.627289377289378e-06, "loss": 35.1984, "step": 10915 }, { "epoch": 259.90746268656716, "grad_norm": 20.136913299560547, "learning_rate": 9.626373626373627e-06, "loss": 35.1247, "step": 10916 }, { "epoch": 259.93134328358207, "grad_norm": 17.569561004638672, "learning_rate": 9.625457875457876e-06, "loss": 34.2426, "step": 10917 }, { "epoch": 259.95522388059703, "grad_norm": 25.293659210205078, "learning_rate": 9.624542124542126e-06, "loss": 36.038, "step": 10918 }, { "epoch": 259.97910447761194, "grad_norm": 16.823022842407227, "learning_rate": 9.623626373626375e-06, "loss": 35.0342, "step": 10919 }, { "epoch": 260.0, "grad_norm": 19.249588012695312, "learning_rate": 9.622710622710624e-06, "loss": 29.7084, "step": 10920 }, { "epoch": 260.0, "step": 10920, "total_flos": 5.3683595927778586e+17, "train_loss": 1.370590279303191, "train_runtime": 12821.5252, "train_samples_per_second": 108.53, "train_steps_per_second": 0.852 }, { "epoch": 260.0238805970149, "grad_norm": 21.460094451904297, "learning_rate": 1e-05, "loss": 36.1349, "step": 10921 }, { "epoch": 260.0477611940299, "grad_norm": Infinity, "learning_rate": 9.999149659863946e-06, "loss": 40.2284, "step": 10922 }, { "epoch": 260.0716417910448, "grad_norm": Infinity, "learning_rate": 9.999149659863946e-06, "loss": 41.4672, "step": 10923 }, { "epoch": 260.0955223880597, "grad_norm": 292.49517822265625, "learning_rate": 9.999149659863946e-06, "loss": 40.1311, "step": 10924 }, { "epoch": 260.1194029850746, "grad_norm": 132.06431579589844, "learning_rate": 9.998299319727893e-06, "loss": 39.675, "step": 10925 }, { "epoch": 260.14328358208957, "grad_norm": 93.1929702758789, "learning_rate": 9.997448979591836e-06, "loss": 38.0186, "step": 10926 }, { "epoch": 260.1671641791045, "grad_norm": 48.46129608154297, "learning_rate": 9.996598639455783e-06, "loss": 37.4163, "step": 10927 }, { "epoch": 260.1910447761194, "grad_norm": 57.307220458984375, "learning_rate": 9.995748299319729e-06, "loss": 35.9804, "step": 10928 }, { "epoch": 260.21492537313435, "grad_norm": 42.48515319824219, "learning_rate": 9.994897959183675e-06, "loss": 37.5594, "step": 10929 }, { "epoch": 260.23880597014926, "grad_norm": 46.436134338378906, "learning_rate": 9.99404761904762e-06, "loss": 36.9654, "step": 10930 }, { "epoch": 260.26268656716417, "grad_norm": 42.507808685302734, "learning_rate": 9.993197278911566e-06, "loss": 36.8308, "step": 10931 }, { "epoch": 260.28656716417913, "grad_norm": 28.198928833007812, "learning_rate": 9.992346938775511e-06, "loss": 36.0199, "step": 10932 }, { "epoch": 260.31044776119404, "grad_norm": 36.01531982421875, "learning_rate": 9.991496598639456e-06, "loss": 35.7276, "step": 10933 }, { "epoch": 260.33432835820895, "grad_norm": 27.35661506652832, "learning_rate": 9.990646258503403e-06, "loss": 36.5345, "step": 10934 }, { "epoch": 260.35820895522386, "grad_norm": 23.625049591064453, "learning_rate": 9.989795918367348e-06, "loss": 36.4087, "step": 10935 }, { "epoch": 260.3820895522388, "grad_norm": 21.105745315551758, "learning_rate": 9.988945578231294e-06, "loss": 35.4079, "step": 10936 }, { "epoch": 260.40597014925373, "grad_norm": 22.002107620239258, "learning_rate": 9.988095238095239e-06, "loss": 35.2193, "step": 10937 }, { "epoch": 260.42985074626864, "grad_norm": 18.042898178100586, "learning_rate": 9.987244897959184e-06, "loss": 35.8775, "step": 10938 }, { "epoch": 260.4537313432836, "grad_norm": 22.857986450195312, "learning_rate": 9.98639455782313e-06, "loss": 35.2346, "step": 10939 }, { "epoch": 260.4776119402985, "grad_norm": 20.077600479125977, "learning_rate": 9.985544217687076e-06, "loss": 34.0219, "step": 10940 }, { "epoch": 260.5014925373134, "grad_norm": 18.646535873413086, "learning_rate": 9.984693877551021e-06, "loss": 34.3711, "step": 10941 }, { "epoch": 260.52537313432833, "grad_norm": 14.77027416229248, "learning_rate": 9.983843537414966e-06, "loss": 31.8077, "step": 10942 }, { "epoch": 260.5492537313433, "grad_norm": 14.842784881591797, "learning_rate": 9.982993197278913e-06, "loss": 34.9431, "step": 10943 }, { "epoch": 260.5731343283582, "grad_norm": 17.824871063232422, "learning_rate": 9.982142857142858e-06, "loss": 35.4375, "step": 10944 }, { "epoch": 260.5970149253731, "grad_norm": 15.629002571105957, "learning_rate": 9.981292517006804e-06, "loss": 35.4708, "step": 10945 }, { "epoch": 260.6208955223881, "grad_norm": 14.504796981811523, "learning_rate": 9.980442176870749e-06, "loss": 35.8836, "step": 10946 }, { "epoch": 260.644776119403, "grad_norm": 14.599526405334473, "learning_rate": 9.979591836734694e-06, "loss": 34.7322, "step": 10947 }, { "epoch": 260.6686567164179, "grad_norm": 14.47644329071045, "learning_rate": 9.978741496598641e-06, "loss": 34.6886, "step": 10948 }, { "epoch": 260.6925373134328, "grad_norm": 16.613548278808594, "learning_rate": 9.977891156462586e-06, "loss": 36.2553, "step": 10949 }, { "epoch": 260.7164179104478, "grad_norm": 14.148774147033691, "learning_rate": 9.977040816326531e-06, "loss": 34.3585, "step": 10950 }, { "epoch": 260.7402985074627, "grad_norm": 19.07725715637207, "learning_rate": 9.976190476190477e-06, "loss": 34.6957, "step": 10951 }, { "epoch": 260.7641791044776, "grad_norm": 25.885623931884766, "learning_rate": 9.975340136054422e-06, "loss": 37.466, "step": 10952 }, { "epoch": 260.78805970149256, "grad_norm": 17.284061431884766, "learning_rate": 9.974489795918369e-06, "loss": 36.3699, "step": 10953 }, { "epoch": 260.81194029850747, "grad_norm": 16.200647354125977, "learning_rate": 9.973639455782314e-06, "loss": 35.8758, "step": 10954 }, { "epoch": 260.8358208955224, "grad_norm": 15.058037757873535, "learning_rate": 9.972789115646259e-06, "loss": 35.9636, "step": 10955 }, { "epoch": 260.85970149253734, "grad_norm": 14.795926094055176, "learning_rate": 9.971938775510204e-06, "loss": 36.0804, "step": 10956 }, { "epoch": 260.88358208955225, "grad_norm": 18.57579803466797, "learning_rate": 9.97108843537415e-06, "loss": 35.1295, "step": 10957 }, { "epoch": 260.90746268656716, "grad_norm": 15.663594245910645, "learning_rate": 9.970238095238096e-06, "loss": 34.9655, "step": 10958 }, { "epoch": 260.93134328358207, "grad_norm": 13.628890991210938, "learning_rate": 9.969387755102042e-06, "loss": 34.7612, "step": 10959 }, { "epoch": 260.95522388059703, "grad_norm": 14.406618118286133, "learning_rate": 9.968537414965987e-06, "loss": 33.267, "step": 10960 }, { "epoch": 260.97910447761194, "grad_norm": 21.73625373840332, "learning_rate": 9.967687074829932e-06, "loss": 36.1063, "step": 10961 }, { "epoch": 261.0, "grad_norm": 15.970307350158691, "learning_rate": 9.966836734693879e-06, "loss": 32.4093, "step": 10962 }, { "epoch": 261.0238805970149, "grad_norm": 15.53166675567627, "learning_rate": 9.965986394557824e-06, "loss": 35.7108, "step": 10963 }, { "epoch": 261.0477611940299, "grad_norm": 18.41387939453125, "learning_rate": 9.96513605442177e-06, "loss": 34.8578, "step": 10964 }, { "epoch": 261.0716417910448, "grad_norm": 17.519445419311523, "learning_rate": 9.964285714285714e-06, "loss": 34.715, "step": 10965 }, { "epoch": 261.0955223880597, "grad_norm": 14.596212387084961, "learning_rate": 9.96343537414966e-06, "loss": 36.0262, "step": 10966 }, { "epoch": 261.1194029850746, "grad_norm": 16.763545989990234, "learning_rate": 9.962585034013607e-06, "loss": 35.9315, "step": 10967 }, { "epoch": 261.14328358208957, "grad_norm": 18.176021575927734, "learning_rate": 9.961734693877552e-06, "loss": 36.1361, "step": 10968 }, { "epoch": 261.1671641791045, "grad_norm": 23.32509422302246, "learning_rate": 9.960884353741499e-06, "loss": 35.1856, "step": 10969 }, { "epoch": 261.1910447761194, "grad_norm": 14.871478080749512, "learning_rate": 9.960034013605442e-06, "loss": 36.0057, "step": 10970 }, { "epoch": 261.21492537313435, "grad_norm": 12.681305885314941, "learning_rate": 9.959183673469387e-06, "loss": 34.8678, "step": 10971 }, { "epoch": 261.23880597014926, "grad_norm": 16.676115036010742, "learning_rate": 9.958333333333334e-06, "loss": 35.8609, "step": 10972 }, { "epoch": 261.26268656716417, "grad_norm": 20.324012756347656, "learning_rate": 9.95748299319728e-06, "loss": 34.4661, "step": 10973 }, { "epoch": 261.28656716417913, "grad_norm": 18.328187942504883, "learning_rate": 9.956632653061226e-06, "loss": 34.5163, "step": 10974 }, { "epoch": 261.31044776119404, "grad_norm": 13.199163436889648, "learning_rate": 9.955782312925172e-06, "loss": 34.7459, "step": 10975 }, { "epoch": 261.33432835820895, "grad_norm": 23.660144805908203, "learning_rate": 9.954931972789117e-06, "loss": 34.9794, "step": 10976 }, { "epoch": 261.35820895522386, "grad_norm": 22.1052188873291, "learning_rate": 9.954081632653062e-06, "loss": 35.8654, "step": 10977 }, { "epoch": 261.3820895522388, "grad_norm": 14.417381286621094, "learning_rate": 9.953231292517007e-06, "loss": 35.0759, "step": 10978 }, { "epoch": 261.40597014925373, "grad_norm": 18.374778747558594, "learning_rate": 9.952380952380954e-06, "loss": 35.8716, "step": 10979 }, { "epoch": 261.42985074626864, "grad_norm": 17.47286605834961, "learning_rate": 9.9515306122449e-06, "loss": 35.3625, "step": 10980 }, { "epoch": 261.4537313432836, "grad_norm": 16.161006927490234, "learning_rate": 9.950680272108844e-06, "loss": 34.0341, "step": 10981 }, { "epoch": 261.4776119402985, "grad_norm": 15.4244966506958, "learning_rate": 9.94982993197279e-06, "loss": 35.6073, "step": 10982 }, { "epoch": 261.5014925373134, "grad_norm": 18.360742568969727, "learning_rate": 9.948979591836737e-06, "loss": 35.9493, "step": 10983 }, { "epoch": 261.52537313432833, "grad_norm": 24.266454696655273, "learning_rate": 9.948129251700682e-06, "loss": 35.0714, "step": 10984 }, { "epoch": 261.5492537313433, "grad_norm": 15.144146919250488, "learning_rate": 9.947278911564627e-06, "loss": 36.047, "step": 10985 }, { "epoch": 261.5731343283582, "grad_norm": 15.100632667541504, "learning_rate": 9.946428571428572e-06, "loss": 33.6753, "step": 10986 }, { "epoch": 261.5970149253731, "grad_norm": 18.780982971191406, "learning_rate": 9.945578231292517e-06, "loss": 36.2126, "step": 10987 }, { "epoch": 261.6208955223881, "grad_norm": 15.762274742126465, "learning_rate": 9.944727891156464e-06, "loss": 33.9035, "step": 10988 }, { "epoch": 261.644776119403, "grad_norm": 16.120458602905273, "learning_rate": 9.94387755102041e-06, "loss": 34.6882, "step": 10989 }, { "epoch": 261.6686567164179, "grad_norm": 15.893648147583008, "learning_rate": 9.943027210884355e-06, "loss": 36.0207, "step": 10990 }, { "epoch": 261.6925373134328, "grad_norm": 19.185096740722656, "learning_rate": 9.9421768707483e-06, "loss": 36.1851, "step": 10991 }, { "epoch": 261.7164179104478, "grad_norm": 16.304075241088867, "learning_rate": 9.941326530612245e-06, "loss": 35.5564, "step": 10992 }, { "epoch": 261.7402985074627, "grad_norm": 17.55813980102539, "learning_rate": 9.940476190476192e-06, "loss": 35.186, "step": 10993 }, { "epoch": 261.7641791044776, "grad_norm": 13.970108032226562, "learning_rate": 9.939625850340137e-06, "loss": 35.7754, "step": 10994 }, { "epoch": 261.78805970149256, "grad_norm": 17.354642868041992, "learning_rate": 9.938775510204082e-06, "loss": 35.0764, "step": 10995 }, { "epoch": 261.81194029850747, "grad_norm": 17.21601676940918, "learning_rate": 9.937925170068028e-06, "loss": 35.5803, "step": 10996 }, { "epoch": 261.8358208955224, "grad_norm": 16.45388412475586, "learning_rate": 9.937074829931974e-06, "loss": 35.6118, "step": 10997 }, { "epoch": 261.85970149253734, "grad_norm": 16.805095672607422, "learning_rate": 9.93622448979592e-06, "loss": 35.6055, "step": 10998 }, { "epoch": 261.88358208955225, "grad_norm": 16.22078514099121, "learning_rate": 9.935374149659865e-06, "loss": 35.8316, "step": 10999 }, { "epoch": 261.90746268656716, "grad_norm": 13.935564994812012, "learning_rate": 9.93452380952381e-06, "loss": 35.6217, "step": 11000 }, { "epoch": 261.93134328358207, "grad_norm": 17.076021194458008, "learning_rate": 9.933673469387755e-06, "loss": 34.7995, "step": 11001 }, { "epoch": 261.95522388059703, "grad_norm": 20.567550659179688, "learning_rate": 9.932823129251702e-06, "loss": 35.4546, "step": 11002 }, { "epoch": 261.97910447761194, "grad_norm": 14.342740058898926, "learning_rate": 9.931972789115647e-06, "loss": 35.7293, "step": 11003 }, { "epoch": 262.0, "grad_norm": 14.136404991149902, "learning_rate": 9.931122448979593e-06, "loss": 31.2726, "step": 11004 }, { "epoch": 262.0238805970149, "grad_norm": 14.922279357910156, "learning_rate": 9.930272108843538e-06, "loss": 34.9358, "step": 11005 }, { "epoch": 262.0477611940299, "grad_norm": 16.061193466186523, "learning_rate": 9.929421768707483e-06, "loss": 35.2566, "step": 11006 }, { "epoch": 262.0716417910448, "grad_norm": 15.24997615814209, "learning_rate": 9.92857142857143e-06, "loss": 35.074, "step": 11007 }, { "epoch": 262.0955223880597, "grad_norm": 15.471017837524414, "learning_rate": 9.927721088435375e-06, "loss": 35.4505, "step": 11008 }, { "epoch": 262.1194029850746, "grad_norm": 16.509521484375, "learning_rate": 9.92687074829932e-06, "loss": 34.2916, "step": 11009 }, { "epoch": 262.14328358208957, "grad_norm": 24.300935745239258, "learning_rate": 9.926020408163265e-06, "loss": 35.0017, "step": 11010 }, { "epoch": 262.1671641791045, "grad_norm": 15.514004707336426, "learning_rate": 9.92517006802721e-06, "loss": 34.9278, "step": 11011 }, { "epoch": 262.1910447761194, "grad_norm": 14.313405990600586, "learning_rate": 9.924319727891158e-06, "loss": 35.0097, "step": 11012 }, { "epoch": 262.21492537313435, "grad_norm": 19.78252410888672, "learning_rate": 9.923469387755103e-06, "loss": 34.6696, "step": 11013 }, { "epoch": 262.23880597014926, "grad_norm": 20.762428283691406, "learning_rate": 9.922619047619048e-06, "loss": 34.8702, "step": 11014 }, { "epoch": 262.26268656716417, "grad_norm": 15.121173858642578, "learning_rate": 9.921768707482993e-06, "loss": 35.0737, "step": 11015 }, { "epoch": 262.28656716417913, "grad_norm": 14.178095817565918, "learning_rate": 9.92091836734694e-06, "loss": 35.1259, "step": 11016 }, { "epoch": 262.31044776119404, "grad_norm": 17.63243865966797, "learning_rate": 9.920068027210885e-06, "loss": 34.6101, "step": 11017 }, { "epoch": 262.33432835820895, "grad_norm": 18.282089233398438, "learning_rate": 9.91921768707483e-06, "loss": 35.0133, "step": 11018 }, { "epoch": 262.35820895522386, "grad_norm": 15.828527450561523, "learning_rate": 9.918367346938776e-06, "loss": 35.33, "step": 11019 }, { "epoch": 262.3820895522388, "grad_norm": 17.393463134765625, "learning_rate": 9.91751700680272e-06, "loss": 36.4822, "step": 11020 }, { "epoch": 262.40597014925373, "grad_norm": 18.632503509521484, "learning_rate": 9.916666666666668e-06, "loss": 35.8496, "step": 11021 }, { "epoch": 262.42985074626864, "grad_norm": 14.081930160522461, "learning_rate": 9.915816326530613e-06, "loss": 34.9148, "step": 11022 }, { "epoch": 262.4537313432836, "grad_norm": 18.444381713867188, "learning_rate": 9.91496598639456e-06, "loss": 35.4705, "step": 11023 }, { "epoch": 262.4776119402985, "grad_norm": 14.789766311645508, "learning_rate": 9.914115646258505e-06, "loss": 35.4245, "step": 11024 }, { "epoch": 262.5014925373134, "grad_norm": 16.575105667114258, "learning_rate": 9.913265306122449e-06, "loss": 34.9822, "step": 11025 }, { "epoch": 262.52537313432833, "grad_norm": 19.523876190185547, "learning_rate": 9.912414965986395e-06, "loss": 36.3229, "step": 11026 }, { "epoch": 262.5492537313433, "grad_norm": 16.93731117248535, "learning_rate": 9.91156462585034e-06, "loss": 35.6036, "step": 11027 }, { "epoch": 262.5731343283582, "grad_norm": 15.929445266723633, "learning_rate": 9.910714285714288e-06, "loss": 35.3657, "step": 11028 }, { "epoch": 262.5970149253731, "grad_norm": 15.706140518188477, "learning_rate": 9.909863945578233e-06, "loss": 34.9803, "step": 11029 }, { "epoch": 262.6208955223881, "grad_norm": 14.913352012634277, "learning_rate": 9.909013605442178e-06, "loss": 35.1816, "step": 11030 }, { "epoch": 262.644776119403, "grad_norm": 14.16916275024414, "learning_rate": 9.908163265306123e-06, "loss": 35.0046, "step": 11031 }, { "epoch": 262.6686567164179, "grad_norm": 16.5507755279541, "learning_rate": 9.907312925170068e-06, "loss": 35.28, "step": 11032 }, { "epoch": 262.6925373134328, "grad_norm": 18.04994773864746, "learning_rate": 9.906462585034015e-06, "loss": 35.4641, "step": 11033 }, { "epoch": 262.7164179104478, "grad_norm": 13.117817878723145, "learning_rate": 9.90561224489796e-06, "loss": 36.5712, "step": 11034 }, { "epoch": 262.7402985074627, "grad_norm": 18.238929748535156, "learning_rate": 9.904761904761906e-06, "loss": 34.8052, "step": 11035 }, { "epoch": 262.7641791044776, "grad_norm": 15.109965324401855, "learning_rate": 9.90391156462585e-06, "loss": 35.5693, "step": 11036 }, { "epoch": 262.78805970149256, "grad_norm": 16.457304000854492, "learning_rate": 9.903061224489798e-06, "loss": 34.7483, "step": 11037 }, { "epoch": 262.81194029850747, "grad_norm": 17.961917877197266, "learning_rate": 9.902210884353743e-06, "loss": 35.2234, "step": 11038 }, { "epoch": 262.8358208955224, "grad_norm": 16.238235473632812, "learning_rate": 9.901360544217688e-06, "loss": 35.4148, "step": 11039 }, { "epoch": 262.85970149253734, "grad_norm": 17.8552188873291, "learning_rate": 9.900510204081633e-06, "loss": 34.473, "step": 11040 }, { "epoch": 262.88358208955225, "grad_norm": 20.533002853393555, "learning_rate": 9.899659863945579e-06, "loss": 35.4556, "step": 11041 }, { "epoch": 262.90746268656716, "grad_norm": 18.42131805419922, "learning_rate": 9.898809523809525e-06, "loss": 35.7505, "step": 11042 }, { "epoch": 262.93134328358207, "grad_norm": 18.14691925048828, "learning_rate": 9.89795918367347e-06, "loss": 35.2995, "step": 11043 }, { "epoch": 262.95522388059703, "grad_norm": 15.01701831817627, "learning_rate": 9.897108843537416e-06, "loss": 36.1057, "step": 11044 }, { "epoch": 262.97910447761194, "grad_norm": 16.602943420410156, "learning_rate": 9.896258503401361e-06, "loss": 35.7632, "step": 11045 }, { "epoch": 263.0, "grad_norm": 12.878350257873535, "learning_rate": 9.895408163265306e-06, "loss": 32.1618, "step": 11046 }, { "epoch": 263.0238805970149, "grad_norm": 16.082096099853516, "learning_rate": 9.894557823129253e-06, "loss": 35.2716, "step": 11047 }, { "epoch": 263.0477611940299, "grad_norm": 14.401817321777344, "learning_rate": 9.893707482993198e-06, "loss": 35.612, "step": 11048 }, { "epoch": 263.0716417910448, "grad_norm": 15.765049934387207, "learning_rate": 9.892857142857143e-06, "loss": 36.6397, "step": 11049 }, { "epoch": 263.0955223880597, "grad_norm": 17.880720138549805, "learning_rate": 9.892006802721089e-06, "loss": 36.4781, "step": 11050 }, { "epoch": 263.1194029850746, "grad_norm": 13.4547119140625, "learning_rate": 9.891156462585036e-06, "loss": 35.9454, "step": 11051 }, { "epoch": 263.14328358208957, "grad_norm": 18.206336975097656, "learning_rate": 9.89030612244898e-06, "loss": 34.6142, "step": 11052 }, { "epoch": 263.1671641791045, "grad_norm": 15.568166732788086, "learning_rate": 9.889455782312926e-06, "loss": 35.4857, "step": 11053 }, { "epoch": 263.1910447761194, "grad_norm": 18.235918045043945, "learning_rate": 9.888605442176871e-06, "loss": 34.8914, "step": 11054 }, { "epoch": 263.21492537313435, "grad_norm": 17.768962860107422, "learning_rate": 9.887755102040816e-06, "loss": 34.9301, "step": 11055 }, { "epoch": 263.23880597014926, "grad_norm": 17.910951614379883, "learning_rate": 9.886904761904763e-06, "loss": 36.5989, "step": 11056 }, { "epoch": 263.26268656716417, "grad_norm": 16.65070915222168, "learning_rate": 9.886054421768708e-06, "loss": 34.838, "step": 11057 }, { "epoch": 263.28656716417913, "grad_norm": 16.371315002441406, "learning_rate": 9.885204081632654e-06, "loss": 34.5002, "step": 11058 }, { "epoch": 263.31044776119404, "grad_norm": 16.746990203857422, "learning_rate": 9.884353741496599e-06, "loss": 34.7129, "step": 11059 }, { "epoch": 263.33432835820895, "grad_norm": 16.800657272338867, "learning_rate": 9.883503401360544e-06, "loss": 35.568, "step": 11060 }, { "epoch": 263.35820895522386, "grad_norm": 16.81734848022461, "learning_rate": 9.882653061224491e-06, "loss": 35.0892, "step": 11061 }, { "epoch": 263.3820895522388, "grad_norm": 14.651569366455078, "learning_rate": 9.881802721088436e-06, "loss": 35.1448, "step": 11062 }, { "epoch": 263.40597014925373, "grad_norm": 20.921037673950195, "learning_rate": 9.880952380952381e-06, "loss": 36.1377, "step": 11063 }, { "epoch": 263.42985074626864, "grad_norm": 14.959305763244629, "learning_rate": 9.880102040816327e-06, "loss": 33.6204, "step": 11064 }, { "epoch": 263.4537313432836, "grad_norm": 16.67865562438965, "learning_rate": 9.879251700680272e-06, "loss": 35.7841, "step": 11065 }, { "epoch": 263.4776119402985, "grad_norm": 18.072956085205078, "learning_rate": 9.878401360544219e-06, "loss": 36.6932, "step": 11066 }, { "epoch": 263.5014925373134, "grad_norm": 13.375307083129883, "learning_rate": 9.877551020408164e-06, "loss": 34.0739, "step": 11067 }, { "epoch": 263.52537313432833, "grad_norm": 18.288284301757812, "learning_rate": 9.87670068027211e-06, "loss": 36.3153, "step": 11068 }, { "epoch": 263.5492537313433, "grad_norm": 14.23848819732666, "learning_rate": 9.875850340136054e-06, "loss": 34.0307, "step": 11069 }, { "epoch": 263.5731343283582, "grad_norm": 16.712451934814453, "learning_rate": 9.875000000000001e-06, "loss": 35.2859, "step": 11070 }, { "epoch": 263.5970149253731, "grad_norm": 17.808401107788086, "learning_rate": 9.874149659863946e-06, "loss": 35.2393, "step": 11071 }, { "epoch": 263.6208955223881, "grad_norm": 20.396392822265625, "learning_rate": 9.873299319727892e-06, "loss": 36.3273, "step": 11072 }, { "epoch": 263.644776119403, "grad_norm": 15.763962745666504, "learning_rate": 9.872448979591838e-06, "loss": 34.1094, "step": 11073 }, { "epoch": 263.6686567164179, "grad_norm": 19.09672737121582, "learning_rate": 9.871598639455782e-06, "loss": 35.4983, "step": 11074 }, { "epoch": 263.6925373134328, "grad_norm": 22.557619094848633, "learning_rate": 9.870748299319729e-06, "loss": 34.8201, "step": 11075 }, { "epoch": 263.7164179104478, "grad_norm": 15.600252151489258, "learning_rate": 9.869897959183674e-06, "loss": 35.1453, "step": 11076 }, { "epoch": 263.7402985074627, "grad_norm": 14.198424339294434, "learning_rate": 9.869047619047621e-06, "loss": 34.9486, "step": 11077 }, { "epoch": 263.7641791044776, "grad_norm": 17.77661895751953, "learning_rate": 9.868197278911566e-06, "loss": 35.6436, "step": 11078 }, { "epoch": 263.78805970149256, "grad_norm": 15.402034759521484, "learning_rate": 9.867346938775511e-06, "loss": 35.532, "step": 11079 }, { "epoch": 263.81194029850747, "grad_norm": 16.1749267578125, "learning_rate": 9.866496598639457e-06, "loss": 35.2346, "step": 11080 }, { "epoch": 263.8358208955224, "grad_norm": 17.312959671020508, "learning_rate": 9.865646258503402e-06, "loss": 35.1054, "step": 11081 }, { "epoch": 263.85970149253734, "grad_norm": 20.187667846679688, "learning_rate": 9.864795918367349e-06, "loss": 36.0356, "step": 11082 }, { "epoch": 263.88358208955225, "grad_norm": 19.60118865966797, "learning_rate": 9.863945578231294e-06, "loss": 35.1906, "step": 11083 }, { "epoch": 263.90746268656716, "grad_norm": 15.168771743774414, "learning_rate": 9.863095238095239e-06, "loss": 35.4974, "step": 11084 }, { "epoch": 263.93134328358207, "grad_norm": 18.66791343688965, "learning_rate": 9.862244897959184e-06, "loss": 34.9326, "step": 11085 }, { "epoch": 263.95522388059703, "grad_norm": 20.811931610107422, "learning_rate": 9.86139455782313e-06, "loss": 35.0817, "step": 11086 }, { "epoch": 263.97910447761194, "grad_norm": 17.739748001098633, "learning_rate": 9.860544217687076e-06, "loss": 34.4644, "step": 11087 }, { "epoch": 264.0, "grad_norm": 13.603070259094238, "learning_rate": 9.859693877551022e-06, "loss": 29.6223, "step": 11088 }, { "epoch": 264.0238805970149, "grad_norm": 19.97246551513672, "learning_rate": 9.858843537414967e-06, "loss": 35.4455, "step": 11089 }, { "epoch": 264.0477611940299, "grad_norm": 18.857791900634766, "learning_rate": 9.857993197278912e-06, "loss": 35.2367, "step": 11090 }, { "epoch": 264.0716417910448, "grad_norm": 22.370830535888672, "learning_rate": 9.857142857142859e-06, "loss": 34.594, "step": 11091 }, { "epoch": 264.0955223880597, "grad_norm": 16.556453704833984, "learning_rate": 9.856292517006804e-06, "loss": 34.8722, "step": 11092 }, { "epoch": 264.1194029850746, "grad_norm": 16.963144302368164, "learning_rate": 9.85544217687075e-06, "loss": 35.4546, "step": 11093 }, { "epoch": 264.14328358208957, "grad_norm": 19.128211975097656, "learning_rate": 9.854591836734694e-06, "loss": 35.3999, "step": 11094 }, { "epoch": 264.1671641791045, "grad_norm": NaN, "learning_rate": 9.85374149659864e-06, "loss": 31.2429, "step": 11095 }, { "epoch": 264.1910447761194, "grad_norm": 16.205318450927734, "learning_rate": 9.85374149659864e-06, "loss": 33.6277, "step": 11096 }, { "epoch": 264.21492537313435, "grad_norm": 21.333288192749023, "learning_rate": 9.852891156462587e-06, "loss": 33.896, "step": 11097 }, { "epoch": 264.23880597014926, "grad_norm": 17.041810989379883, "learning_rate": 9.852040816326532e-06, "loss": 35.4154, "step": 11098 }, { "epoch": 264.26268656716417, "grad_norm": 15.609241485595703, "learning_rate": 9.851190476190477e-06, "loss": 34.4953, "step": 11099 }, { "epoch": 264.28656716417913, "grad_norm": 25.065488815307617, "learning_rate": 9.850340136054422e-06, "loss": 35.8314, "step": 11100 }, { "epoch": 264.31044776119404, "grad_norm": 17.720497131347656, "learning_rate": 9.849489795918367e-06, "loss": 36.2945, "step": 11101 }, { "epoch": 264.33432835820895, "grad_norm": 22.21006202697754, "learning_rate": 9.848639455782314e-06, "loss": 35.1814, "step": 11102 }, { "epoch": 264.35820895522386, "grad_norm": 19.061445236206055, "learning_rate": 9.84778911564626e-06, "loss": 34.9758, "step": 11103 }, { "epoch": 264.3820895522388, "grad_norm": 26.431421279907227, "learning_rate": 9.846938775510205e-06, "loss": 34.7944, "step": 11104 }, { "epoch": 264.40597014925373, "grad_norm": 17.299617767333984, "learning_rate": 9.84608843537415e-06, "loss": 35.0264, "step": 11105 }, { "epoch": 264.42985074626864, "grad_norm": 26.452072143554688, "learning_rate": 9.845238095238097e-06, "loss": 35.9682, "step": 11106 }, { "epoch": 264.4537313432836, "grad_norm": 19.976699829101562, "learning_rate": 9.844387755102042e-06, "loss": 36.4401, "step": 11107 }, { "epoch": 264.4776119402985, "grad_norm": 24.28655242919922, "learning_rate": 9.843537414965987e-06, "loss": 35.165, "step": 11108 }, { "epoch": 264.5014925373134, "grad_norm": 23.443822860717773, "learning_rate": 9.842687074829932e-06, "loss": 34.8307, "step": 11109 }, { "epoch": 264.52537313432833, "grad_norm": 18.813709259033203, "learning_rate": 9.841836734693878e-06, "loss": 36.4762, "step": 11110 }, { "epoch": 264.5492537313433, "grad_norm": 27.69648551940918, "learning_rate": 9.840986394557824e-06, "loss": 34.9634, "step": 11111 }, { "epoch": 264.5731343283582, "grad_norm": 19.616514205932617, "learning_rate": 9.84013605442177e-06, "loss": 35.0395, "step": 11112 }, { "epoch": 264.5970149253731, "grad_norm": 22.776201248168945, "learning_rate": 9.839285714285715e-06, "loss": 35.9003, "step": 11113 }, { "epoch": 264.6208955223881, "grad_norm": 25.816699981689453, "learning_rate": 9.83843537414966e-06, "loss": 34.4998, "step": 11114 }, { "epoch": 264.644776119403, "grad_norm": 15.887907981872559, "learning_rate": 9.837585034013605e-06, "loss": 35.5271, "step": 11115 }, { "epoch": 264.6686567164179, "grad_norm": 31.09943199157715, "learning_rate": 9.836734693877552e-06, "loss": 35.6997, "step": 11116 }, { "epoch": 264.6925373134328, "grad_norm": 19.370866775512695, "learning_rate": 9.835884353741497e-06, "loss": 35.66, "step": 11117 }, { "epoch": 264.7164179104478, "grad_norm": 36.458927154541016, "learning_rate": 9.835034013605444e-06, "loss": 36.1021, "step": 11118 }, { "epoch": 264.7402985074627, "grad_norm": 17.150787353515625, "learning_rate": 9.834183673469388e-06, "loss": 33.9725, "step": 11119 }, { "epoch": 264.7641791044776, "grad_norm": 31.396713256835938, "learning_rate": 9.833333333333333e-06, "loss": 34.9248, "step": 11120 }, { "epoch": 264.78805970149256, "grad_norm": 17.050716400146484, "learning_rate": 9.83248299319728e-06, "loss": 34.7095, "step": 11121 }, { "epoch": 264.81194029850747, "grad_norm": 33.8112678527832, "learning_rate": 9.831632653061225e-06, "loss": 34.9646, "step": 11122 }, { "epoch": 264.8358208955224, "grad_norm": 24.080162048339844, "learning_rate": 9.830782312925172e-06, "loss": 36.9544, "step": 11123 }, { "epoch": 264.85970149253734, "grad_norm": 28.9388370513916, "learning_rate": 9.829931972789115e-06, "loss": 35.5073, "step": 11124 }, { "epoch": 264.88358208955225, "grad_norm": 20.167919158935547, "learning_rate": 9.829081632653062e-06, "loss": 34.4186, "step": 11125 }, { "epoch": 264.90746268656716, "grad_norm": 35.84519958496094, "learning_rate": 9.828231292517008e-06, "loss": 35.6437, "step": 11126 }, { "epoch": 264.93134328358207, "grad_norm": 23.3156795501709, "learning_rate": 9.827380952380953e-06, "loss": 36.2353, "step": 11127 }, { "epoch": 264.95522388059703, "grad_norm": 39.56398391723633, "learning_rate": 9.8265306122449e-06, "loss": 34.7052, "step": 11128 }, { "epoch": 264.97910447761194, "grad_norm": 34.080101013183594, "learning_rate": 9.825680272108845e-06, "loss": 34.9117, "step": 11129 }, { "epoch": 265.0, "grad_norm": 30.904067993164062, "learning_rate": 9.82482993197279e-06, "loss": 29.7723, "step": 11130 }, { "epoch": 265.0238805970149, "grad_norm": 33.645423889160156, "learning_rate": 9.823979591836735e-06, "loss": 34.4562, "step": 11131 }, { "epoch": 265.0477611940299, "grad_norm": 27.822898864746094, "learning_rate": 9.823129251700682e-06, "loss": 35.9515, "step": 11132 }, { "epoch": 265.0716417910448, "grad_norm": 28.682424545288086, "learning_rate": 9.822278911564627e-06, "loss": 34.6865, "step": 11133 }, { "epoch": 265.0955223880597, "grad_norm": 31.294862747192383, "learning_rate": 9.821428571428573e-06, "loss": 35.8039, "step": 11134 }, { "epoch": 265.1194029850746, "grad_norm": 25.919233322143555, "learning_rate": 9.820578231292518e-06, "loss": 36.8074, "step": 11135 }, { "epoch": 265.14328358208957, "grad_norm": 33.744144439697266, "learning_rate": 9.819727891156463e-06, "loss": 35.3448, "step": 11136 }, { "epoch": 265.1671641791045, "grad_norm": NaN, "learning_rate": 9.81887755102041e-06, "loss": 47.9396, "step": 11137 }, { "epoch": 265.1910447761194, "grad_norm": 26.96946144104004, "learning_rate": 9.81887755102041e-06, "loss": 34.2474, "step": 11138 }, { "epoch": 265.21492537313435, "grad_norm": 33.9120979309082, "learning_rate": 9.818027210884355e-06, "loss": 35.1802, "step": 11139 }, { "epoch": 265.23880597014926, "grad_norm": 31.594274520874023, "learning_rate": 9.8171768707483e-06, "loss": 34.7878, "step": 11140 }, { "epoch": 265.26268656716417, "grad_norm": 29.770864486694336, "learning_rate": 9.816326530612245e-06, "loss": 35.0956, "step": 11141 }, { "epoch": 265.28656716417913, "grad_norm": 27.344839096069336, "learning_rate": 9.81547619047619e-06, "loss": 35.9117, "step": 11142 }, { "epoch": 265.31044776119404, "grad_norm": 34.17720413208008, "learning_rate": 9.814625850340137e-06, "loss": 35.0078, "step": 11143 }, { "epoch": 265.33432835820895, "grad_norm": 26.976036071777344, "learning_rate": 9.813775510204083e-06, "loss": 35.4266, "step": 11144 }, { "epoch": 265.35820895522386, "grad_norm": 32.93250274658203, "learning_rate": 9.812925170068028e-06, "loss": 34.8151, "step": 11145 }, { "epoch": 265.3820895522388, "grad_norm": 29.532920837402344, "learning_rate": 9.812074829931973e-06, "loss": 34.8695, "step": 11146 }, { "epoch": 265.40597014925373, "grad_norm": 31.906003952026367, "learning_rate": 9.81122448979592e-06, "loss": 35.2919, "step": 11147 }, { "epoch": 265.42985074626864, "grad_norm": 29.873435974121094, "learning_rate": 9.810374149659865e-06, "loss": 35.4161, "step": 11148 }, { "epoch": 265.4537313432836, "grad_norm": 32.02743911743164, "learning_rate": 9.80952380952381e-06, "loss": 33.8069, "step": 11149 }, { "epoch": 265.4776119402985, "grad_norm": 30.20789909362793, "learning_rate": 9.808673469387756e-06, "loss": 34.9376, "step": 11150 }, { "epoch": 265.5014925373134, "grad_norm": 30.224260330200195, "learning_rate": 9.8078231292517e-06, "loss": 35.2841, "step": 11151 }, { "epoch": 265.52537313432833, "grad_norm": 27.322105407714844, "learning_rate": 9.806972789115648e-06, "loss": 34.8103, "step": 11152 }, { "epoch": 265.5492537313433, "grad_norm": 32.584136962890625, "learning_rate": 9.806122448979593e-06, "loss": 35.3109, "step": 11153 }, { "epoch": 265.5731343283582, "grad_norm": 28.13442611694336, "learning_rate": 9.805272108843538e-06, "loss": 35.7694, "step": 11154 }, { "epoch": 265.5970149253731, "grad_norm": 32.61115264892578, "learning_rate": 9.804421768707483e-06, "loss": 34.7084, "step": 11155 }, { "epoch": 265.6208955223881, "grad_norm": 28.902996063232422, "learning_rate": 9.803571428571428e-06, "loss": 35.6423, "step": 11156 }, { "epoch": 265.644776119403, "grad_norm": 29.57887840270996, "learning_rate": 9.802721088435375e-06, "loss": 35.4606, "step": 11157 }, { "epoch": 265.6686567164179, "grad_norm": 27.093605041503906, "learning_rate": 9.80187074829932e-06, "loss": 35.4729, "step": 11158 }, { "epoch": 265.6925373134328, "grad_norm": 29.557764053344727, "learning_rate": 9.801020408163266e-06, "loss": 34.46, "step": 11159 }, { "epoch": 265.7164179104478, "grad_norm": 26.04657745361328, "learning_rate": 9.800170068027211e-06, "loss": 36.1655, "step": 11160 }, { "epoch": 265.7402985074627, "grad_norm": 34.16873550415039, "learning_rate": 9.799319727891158e-06, "loss": 34.7134, "step": 11161 }, { "epoch": 265.7641791044776, "grad_norm": 31.1507568359375, "learning_rate": 9.798469387755103e-06, "loss": 36.5413, "step": 11162 }, { "epoch": 265.78805970149256, "grad_norm": 28.49561882019043, "learning_rate": 9.797619047619048e-06, "loss": 34.321, "step": 11163 }, { "epoch": 265.81194029850747, "grad_norm": 25.886430740356445, "learning_rate": 9.796768707482993e-06, "loss": 35.7351, "step": 11164 }, { "epoch": 265.8358208955224, "grad_norm": 31.567081451416016, "learning_rate": 9.795918367346939e-06, "loss": 35.9788, "step": 11165 }, { "epoch": 265.85970149253734, "grad_norm": 27.389211654663086, "learning_rate": 9.795068027210886e-06, "loss": 35.588, "step": 11166 }, { "epoch": 265.88358208955225, "grad_norm": 31.836196899414062, "learning_rate": 9.79421768707483e-06, "loss": 33.7717, "step": 11167 }, { "epoch": 265.90746268656716, "grad_norm": 29.477245330810547, "learning_rate": 9.793367346938776e-06, "loss": 36.452, "step": 11168 }, { "epoch": 265.93134328358207, "grad_norm": 27.781171798706055, "learning_rate": 9.792517006802721e-06, "loss": 34.3081, "step": 11169 }, { "epoch": 265.95522388059703, "grad_norm": 27.0958194732666, "learning_rate": 9.791666666666666e-06, "loss": 35.0101, "step": 11170 }, { "epoch": 265.97910447761194, "grad_norm": 29.674766540527344, "learning_rate": 9.790816326530613e-06, "loss": 34.9771, "step": 11171 }, { "epoch": 266.0, "grad_norm": 23.849777221679688, "learning_rate": 9.789965986394558e-06, "loss": 29.2235, "step": 11172 }, { "epoch": 266.0238805970149, "grad_norm": 27.19853401184082, "learning_rate": 9.789115646258505e-06, "loss": 34.1162, "step": 11173 }, { "epoch": 266.0477611940299, "grad_norm": 24.806442260742188, "learning_rate": 9.78826530612245e-06, "loss": 34.4189, "step": 11174 }, { "epoch": 266.0716417910448, "grad_norm": 32.09627914428711, "learning_rate": 9.787414965986394e-06, "loss": 36.0046, "step": 11175 }, { "epoch": 266.0955223880597, "grad_norm": 26.7542667388916, "learning_rate": 9.786564625850341e-06, "loss": 34.3425, "step": 11176 }, { "epoch": 266.1194029850746, "grad_norm": 29.392253875732422, "learning_rate": 9.785714285714286e-06, "loss": 35.3791, "step": 11177 }, { "epoch": 266.14328358208957, "grad_norm": 26.908035278320312, "learning_rate": 9.784863945578233e-06, "loss": 34.5689, "step": 11178 }, { "epoch": 266.1671641791045, "grad_norm": 30.832035064697266, "learning_rate": 9.784013605442178e-06, "loss": 35.0826, "step": 11179 }, { "epoch": 266.1910447761194, "grad_norm": 25.285953521728516, "learning_rate": 9.783163265306123e-06, "loss": 36.279, "step": 11180 }, { "epoch": 266.21492537313435, "grad_norm": 29.02312660217285, "learning_rate": 9.782312925170069e-06, "loss": 34.9367, "step": 11181 }, { "epoch": 266.23880597014926, "grad_norm": 25.359874725341797, "learning_rate": 9.781462585034014e-06, "loss": 35.1541, "step": 11182 }, { "epoch": 266.26268656716417, "grad_norm": 31.250812530517578, "learning_rate": 9.78061224489796e-06, "loss": 34.5048, "step": 11183 }, { "epoch": 266.28656716417913, "grad_norm": 25.30484962463379, "learning_rate": 9.779761904761906e-06, "loss": 34.3538, "step": 11184 }, { "epoch": 266.31044776119404, "grad_norm": 29.979816436767578, "learning_rate": 9.778911564625851e-06, "loss": 36.5315, "step": 11185 }, { "epoch": 266.33432835820895, "grad_norm": 26.533626556396484, "learning_rate": 9.778061224489796e-06, "loss": 36.219, "step": 11186 }, { "epoch": 266.35820895522386, "grad_norm": 28.2723388671875, "learning_rate": 9.777210884353743e-06, "loss": 36.0145, "step": 11187 }, { "epoch": 266.3820895522388, "grad_norm": 25.69159698486328, "learning_rate": 9.776360544217688e-06, "loss": 34.5542, "step": 11188 }, { "epoch": 266.40597014925373, "grad_norm": 30.758480072021484, "learning_rate": 9.775510204081634e-06, "loss": 35.2351, "step": 11189 }, { "epoch": 266.42985074626864, "grad_norm": 28.381084442138672, "learning_rate": 9.774659863945579e-06, "loss": 34.902, "step": 11190 }, { "epoch": 266.4537313432836, "grad_norm": 31.15910530090332, "learning_rate": 9.773809523809524e-06, "loss": 35.0499, "step": 11191 }, { "epoch": 266.4776119402985, "grad_norm": 27.630773544311523, "learning_rate": 9.772959183673471e-06, "loss": 34.4043, "step": 11192 }, { "epoch": 266.5014925373134, "grad_norm": 30.58504867553711, "learning_rate": 9.772108843537416e-06, "loss": 36.0267, "step": 11193 }, { "epoch": 266.52537313432833, "grad_norm": 28.479476928710938, "learning_rate": 9.771258503401361e-06, "loss": 34.5769, "step": 11194 }, { "epoch": 266.5492537313433, "grad_norm": 27.64145851135254, "learning_rate": 9.770408163265307e-06, "loss": 34.2969, "step": 11195 }, { "epoch": 266.5731343283582, "grad_norm": 26.412824630737305, "learning_rate": 9.769557823129252e-06, "loss": 35.1859, "step": 11196 }, { "epoch": 266.5970149253731, "grad_norm": 31.01604461669922, "learning_rate": 9.768707482993199e-06, "loss": 34.4392, "step": 11197 }, { "epoch": 266.6208955223881, "grad_norm": 27.325464248657227, "learning_rate": 9.767857142857144e-06, "loss": 36.2125, "step": 11198 }, { "epoch": 266.644776119403, "grad_norm": 29.46001625061035, "learning_rate": 9.767006802721089e-06, "loss": 34.6586, "step": 11199 }, { "epoch": 266.6686567164179, "grad_norm": 26.013404846191406, "learning_rate": 9.766156462585034e-06, "loss": 36.9322, "step": 11200 }, { "epoch": 266.6925373134328, "grad_norm": 31.534645080566406, "learning_rate": 9.765306122448981e-06, "loss": 35.9709, "step": 11201 }, { "epoch": 266.7164179104478, "grad_norm": 27.22188377380371, "learning_rate": 9.764455782312926e-06, "loss": 34.5973, "step": 11202 }, { "epoch": 266.7402985074627, "grad_norm": 29.313804626464844, "learning_rate": 9.763605442176872e-06, "loss": 36.2248, "step": 11203 }, { "epoch": 266.7641791044776, "grad_norm": 24.632644653320312, "learning_rate": 9.762755102040817e-06, "loss": 35.0668, "step": 11204 }, { "epoch": 266.78805970149256, "grad_norm": 28.769880294799805, "learning_rate": 9.761904761904762e-06, "loss": 35.0193, "step": 11205 }, { "epoch": 266.81194029850747, "grad_norm": 25.255563735961914, "learning_rate": 9.761054421768709e-06, "loss": 35.7949, "step": 11206 }, { "epoch": 266.8358208955224, "grad_norm": 28.24742317199707, "learning_rate": 9.760204081632654e-06, "loss": 34.5309, "step": 11207 }, { "epoch": 266.85970149253734, "grad_norm": 24.634931564331055, "learning_rate": 9.7593537414966e-06, "loss": 34.8172, "step": 11208 }, { "epoch": 266.88358208955225, "grad_norm": 31.981687545776367, "learning_rate": 9.758503401360544e-06, "loss": 33.5913, "step": 11209 }, { "epoch": 266.90746268656716, "grad_norm": 27.23674964904785, "learning_rate": 9.75765306122449e-06, "loss": 34.5798, "step": 11210 }, { "epoch": 266.93134328358207, "grad_norm": 27.958499908447266, "learning_rate": 9.756802721088437e-06, "loss": 36.5225, "step": 11211 }, { "epoch": 266.95522388059703, "grad_norm": 28.824146270751953, "learning_rate": 9.755952380952382e-06, "loss": 34.5472, "step": 11212 }, { "epoch": 266.97910447761194, "grad_norm": NaN, "learning_rate": 9.755102040816327e-06, "loss": 35.4718, "step": 11213 }, { "epoch": 267.0, "grad_norm": 24.083078384399414, "learning_rate": 9.755102040816327e-06, "loss": 30.3082, "step": 11214 }, { "epoch": 267.0238805970149, "grad_norm": 24.65122413635254, "learning_rate": 9.754251700680272e-06, "loss": 35.0222, "step": 11215 }, { "epoch": 267.0477611940299, "grad_norm": 29.230562210083008, "learning_rate": 9.753401360544217e-06, "loss": 36.0623, "step": 11216 }, { "epoch": 267.0716417910448, "grad_norm": NaN, "learning_rate": 9.752551020408164e-06, "loss": 38.9229, "step": 11217 }, { "epoch": 267.0955223880597, "grad_norm": 22.877544403076172, "learning_rate": 9.752551020408164e-06, "loss": 35.772, "step": 11218 }, { "epoch": 267.1194029850746, "grad_norm": 33.116233825683594, "learning_rate": 9.75170068027211e-06, "loss": 35.0121, "step": 11219 }, { "epoch": 267.14328358208957, "grad_norm": 29.101282119750977, "learning_rate": 9.750850340136055e-06, "loss": 34.8035, "step": 11220 }, { "epoch": 267.1671641791045, "grad_norm": 28.55680274963379, "learning_rate": 9.75e-06, "loss": 34.4214, "step": 11221 }, { "epoch": 267.1910447761194, "grad_norm": 28.01104164123535, "learning_rate": 9.749149659863947e-06, "loss": 34.9526, "step": 11222 }, { "epoch": 267.21492537313435, "grad_norm": 26.790014266967773, "learning_rate": 9.748299319727892e-06, "loss": 34.9913, "step": 11223 }, { "epoch": 267.23880597014926, "grad_norm": 21.205453872680664, "learning_rate": 9.747448979591837e-06, "loss": 34.7966, "step": 11224 }, { "epoch": 267.26268656716417, "grad_norm": 30.262779235839844, "learning_rate": 9.746598639455784e-06, "loss": 34.7111, "step": 11225 }, { "epoch": 267.28656716417913, "grad_norm": 23.356658935546875, "learning_rate": 9.745748299319728e-06, "loss": 36.2362, "step": 11226 }, { "epoch": 267.31044776119404, "grad_norm": 32.315345764160156, "learning_rate": 9.744897959183674e-06, "loss": 34.9263, "step": 11227 }, { "epoch": 267.33432835820895, "grad_norm": 30.408292770385742, "learning_rate": 9.74404761904762e-06, "loss": 35.8779, "step": 11228 }, { "epoch": 267.35820895522386, "grad_norm": 22.527212142944336, "learning_rate": 9.743197278911567e-06, "loss": 34.943, "step": 11229 }, { "epoch": 267.3820895522388, "grad_norm": 24.883535385131836, "learning_rate": 9.742346938775512e-06, "loss": 34.7995, "step": 11230 }, { "epoch": 267.40597014925373, "grad_norm": 26.914213180541992, "learning_rate": 9.741496598639457e-06, "loss": 36.486, "step": 11231 }, { "epoch": 267.42985074626864, "grad_norm": 20.010807037353516, "learning_rate": 9.740646258503402e-06, "loss": 34.7843, "step": 11232 }, { "epoch": 267.4537313432836, "grad_norm": 33.46001434326172, "learning_rate": 9.739795918367347e-06, "loss": 35.9539, "step": 11233 }, { "epoch": 267.4776119402985, "grad_norm": 26.157451629638672, "learning_rate": 9.738945578231294e-06, "loss": 35.0678, "step": 11234 }, { "epoch": 267.5014925373134, "grad_norm": 35.644073486328125, "learning_rate": 9.73809523809524e-06, "loss": 35.87, "step": 11235 }, { "epoch": 267.52537313432833, "grad_norm": 31.433055877685547, "learning_rate": 9.737244897959185e-06, "loss": 34.5191, "step": 11236 }, { "epoch": 267.5492537313433, "grad_norm": 24.154205322265625, "learning_rate": 9.73639455782313e-06, "loss": 35.0339, "step": 11237 }, { "epoch": 267.5731343283582, "grad_norm": 23.054718017578125, "learning_rate": 9.735544217687075e-06, "loss": 35.1328, "step": 11238 }, { "epoch": 267.5970149253731, "grad_norm": 29.09781265258789, "learning_rate": 9.734693877551022e-06, "loss": 35.2278, "step": 11239 }, { "epoch": 267.6208955223881, "grad_norm": 19.367177963256836, "learning_rate": 9.733843537414967e-06, "loss": 34.4716, "step": 11240 }, { "epoch": 267.644776119403, "grad_norm": 32.98915481567383, "learning_rate": 9.732993197278912e-06, "loss": 34.7553, "step": 11241 }, { "epoch": 267.6686567164179, "grad_norm": 29.198795318603516, "learning_rate": 9.732142857142858e-06, "loss": 35.211, "step": 11242 }, { "epoch": 267.6925373134328, "grad_norm": 29.72975730895996, "learning_rate": 9.731292517006804e-06, "loss": 34.3863, "step": 11243 }, { "epoch": 267.7164179104478, "grad_norm": 24.961580276489258, "learning_rate": 9.73044217687075e-06, "loss": 34.9152, "step": 11244 }, { "epoch": 267.7402985074627, "grad_norm": 28.849666595458984, "learning_rate": 9.729591836734695e-06, "loss": 35.3199, "step": 11245 }, { "epoch": 267.7641791044776, "grad_norm": 24.429973602294922, "learning_rate": 9.72874149659864e-06, "loss": 35.2488, "step": 11246 }, { "epoch": 267.78805970149256, "grad_norm": 32.53376007080078, "learning_rate": 9.727891156462585e-06, "loss": 34.7199, "step": 11247 }, { "epoch": 267.81194029850747, "grad_norm": 25.328937530517578, "learning_rate": 9.727040816326532e-06, "loss": 34.8706, "step": 11248 }, { "epoch": 267.8358208955224, "grad_norm": 25.075822830200195, "learning_rate": 9.726190476190477e-06, "loss": 34.7175, "step": 11249 }, { "epoch": 267.85970149253734, "grad_norm": 23.769784927368164, "learning_rate": 9.725340136054422e-06, "loss": 35.775, "step": 11250 }, { "epoch": 267.88358208955225, "grad_norm": 25.97585105895996, "learning_rate": 9.724489795918368e-06, "loss": 34.1371, "step": 11251 }, { "epoch": 267.90746268656716, "grad_norm": 22.495651245117188, "learning_rate": 9.723639455782313e-06, "loss": 35.7473, "step": 11252 }, { "epoch": 267.93134328358207, "grad_norm": 25.758033752441406, "learning_rate": 9.72278911564626e-06, "loss": 35.7567, "step": 11253 }, { "epoch": 267.95522388059703, "grad_norm": NaN, "learning_rate": 9.721938775510205e-06, "loss": 48.6902, "step": 11254 }, { "epoch": 267.97910447761194, "grad_norm": 21.118587493896484, "learning_rate": 9.721938775510205e-06, "loss": 34.6988, "step": 11255 }, { "epoch": 268.0, "grad_norm": 23.438156127929688, "learning_rate": 9.72108843537415e-06, "loss": 31.1245, "step": 11256 }, { "epoch": 268.0238805970149, "grad_norm": 22.095630645751953, "learning_rate": 9.720238095238095e-06, "loss": 35.0346, "step": 11257 }, { "epoch": 268.0477611940299, "grad_norm": 19.394258499145508, "learning_rate": 9.719387755102042e-06, "loss": 35.3207, "step": 11258 }, { "epoch": 268.0716417910448, "grad_norm": 20.894695281982422, "learning_rate": 9.718537414965987e-06, "loss": 34.0132, "step": 11259 }, { "epoch": 268.0955223880597, "grad_norm": 19.614351272583008, "learning_rate": 9.717687074829933e-06, "loss": 34.6297, "step": 11260 }, { "epoch": 268.1194029850746, "grad_norm": 24.058088302612305, "learning_rate": 9.716836734693878e-06, "loss": 35.1796, "step": 11261 }, { "epoch": 268.14328358208957, "grad_norm": 17.5927734375, "learning_rate": 9.715986394557823e-06, "loss": 35.7046, "step": 11262 }, { "epoch": 268.1671641791045, "grad_norm": 21.853776931762695, "learning_rate": 9.71513605442177e-06, "loss": 35.5309, "step": 11263 }, { "epoch": 268.1910447761194, "grad_norm": 17.455530166625977, "learning_rate": 9.714285714285715e-06, "loss": 34.8216, "step": 11264 }, { "epoch": 268.21492537313435, "grad_norm": 18.30791664123535, "learning_rate": 9.71343537414966e-06, "loss": 35.6202, "step": 11265 }, { "epoch": 268.23880597014926, "grad_norm": 20.19818115234375, "learning_rate": 9.712585034013606e-06, "loss": 34.8912, "step": 11266 }, { "epoch": 268.26268656716417, "grad_norm": 19.09391975402832, "learning_rate": 9.71173469387755e-06, "loss": 35.8681, "step": 11267 }, { "epoch": 268.28656716417913, "grad_norm": 15.085587501525879, "learning_rate": 9.710884353741498e-06, "loss": 35.7501, "step": 11268 }, { "epoch": 268.31044776119404, "grad_norm": 24.19599151611328, "learning_rate": 9.710034013605443e-06, "loss": 33.5056, "step": 11269 }, { "epoch": 268.33432835820895, "grad_norm": 17.074016571044922, "learning_rate": 9.70918367346939e-06, "loss": 35.6705, "step": 11270 }, { "epoch": 268.35820895522386, "grad_norm": 20.87940216064453, "learning_rate": 9.708333333333333e-06, "loss": 36.1954, "step": 11271 }, { "epoch": 268.3820895522388, "grad_norm": 22.110916137695312, "learning_rate": 9.707482993197278e-06, "loss": 33.2204, "step": 11272 }, { "epoch": 268.40597014925373, "grad_norm": 18.115238189697266, "learning_rate": 9.706632653061225e-06, "loss": 37.2117, "step": 11273 }, { "epoch": 268.42985074626864, "grad_norm": 31.70871353149414, "learning_rate": 9.70578231292517e-06, "loss": 35.9672, "step": 11274 }, { "epoch": 268.4537313432836, "grad_norm": 24.8507022857666, "learning_rate": 9.704931972789117e-06, "loss": 35.1141, "step": 11275 }, { "epoch": 268.4776119402985, "grad_norm": 27.8154354095459, "learning_rate": 9.704081632653061e-06, "loss": 34.846, "step": 11276 }, { "epoch": 268.5014925373134, "grad_norm": 24.144790649414062, "learning_rate": 9.703231292517008e-06, "loss": 36.6943, "step": 11277 }, { "epoch": 268.52537313432833, "grad_norm": 25.23328399658203, "learning_rate": 9.702380952380953e-06, "loss": 34.1247, "step": 11278 }, { "epoch": 268.5492537313433, "grad_norm": 19.879201889038086, "learning_rate": 9.701530612244898e-06, "loss": 35.1627, "step": 11279 }, { "epoch": 268.5731343283582, "grad_norm": 22.306997299194336, "learning_rate": 9.700680272108845e-06, "loss": 33.6694, "step": 11280 }, { "epoch": 268.5970149253731, "grad_norm": 18.249324798583984, "learning_rate": 9.69982993197279e-06, "loss": 34.9906, "step": 11281 }, { "epoch": 268.6208955223881, "grad_norm": 21.096303939819336, "learning_rate": 9.698979591836736e-06, "loss": 35.7008, "step": 11282 }, { "epoch": 268.644776119403, "grad_norm": 17.10426902770996, "learning_rate": 9.69812925170068e-06, "loss": 35.0684, "step": 11283 }, { "epoch": 268.6686567164179, "grad_norm": 21.25813102722168, "learning_rate": 9.697278911564628e-06, "loss": 34.8057, "step": 11284 }, { "epoch": 268.6925373134328, "grad_norm": 16.934974670410156, "learning_rate": 9.696428571428573e-06, "loss": 34.9907, "step": 11285 }, { "epoch": 268.7164179104478, "grad_norm": 18.4528751373291, "learning_rate": 9.695578231292518e-06, "loss": 35.7507, "step": 11286 }, { "epoch": 268.7402985074627, "grad_norm": 17.586544036865234, "learning_rate": 9.694727891156463e-06, "loss": 34.9906, "step": 11287 }, { "epoch": 268.7641791044776, "grad_norm": 17.558521270751953, "learning_rate": 9.693877551020408e-06, "loss": 36.1545, "step": 11288 }, { "epoch": 268.78805970149256, "grad_norm": 14.969913482666016, "learning_rate": 9.693027210884355e-06, "loss": 35.1573, "step": 11289 }, { "epoch": 268.81194029850747, "grad_norm": 16.84869384765625, "learning_rate": 9.6921768707483e-06, "loss": 32.984, "step": 11290 }, { "epoch": 268.8358208955224, "grad_norm": 18.702037811279297, "learning_rate": 9.691326530612246e-06, "loss": 35.1227, "step": 11291 }, { "epoch": 268.85970149253734, "grad_norm": 16.950538635253906, "learning_rate": 9.690476190476191e-06, "loss": 34.2167, "step": 11292 }, { "epoch": 268.88358208955225, "grad_norm": 16.66674041748047, "learning_rate": 9.689625850340136e-06, "loss": 35.0838, "step": 11293 }, { "epoch": 268.90746268656716, "grad_norm": 22.63540267944336, "learning_rate": 9.688775510204083e-06, "loss": 35.0731, "step": 11294 }, { "epoch": 268.93134328358207, "grad_norm": 18.76118278503418, "learning_rate": 9.687925170068028e-06, "loss": 36.0074, "step": 11295 }, { "epoch": 268.95522388059703, "grad_norm": 19.704389572143555, "learning_rate": 9.687074829931973e-06, "loss": 35.7865, "step": 11296 }, { "epoch": 268.97910447761194, "grad_norm": 23.83836555480957, "learning_rate": 9.686224489795919e-06, "loss": 34.5027, "step": 11297 }, { "epoch": 269.0, "grad_norm": 15.99374771118164, "learning_rate": 9.685374149659866e-06, "loss": 30.7481, "step": 11298 }, { "epoch": 269.0238805970149, "grad_norm": 17.470727920532227, "learning_rate": 9.68452380952381e-06, "loss": 34.7472, "step": 11299 }, { "epoch": 269.0477611940299, "grad_norm": 24.246135711669922, "learning_rate": 9.683673469387756e-06, "loss": 35.3944, "step": 11300 }, { "epoch": 269.0716417910448, "grad_norm": 14.26496696472168, "learning_rate": 9.682823129251701e-06, "loss": 34.7151, "step": 11301 }, { "epoch": 269.0955223880597, "grad_norm": 25.587913513183594, "learning_rate": 9.681972789115646e-06, "loss": 34.9974, "step": 11302 }, { "epoch": 269.1194029850746, "grad_norm": 19.023223876953125, "learning_rate": 9.681122448979593e-06, "loss": 35.0883, "step": 11303 }, { "epoch": 269.14328358208957, "grad_norm": 20.049644470214844, "learning_rate": 9.680272108843538e-06, "loss": 34.819, "step": 11304 }, { "epoch": 269.1671641791045, "grad_norm": 19.549583435058594, "learning_rate": 9.679421768707484e-06, "loss": 34.5219, "step": 11305 }, { "epoch": 269.1910447761194, "grad_norm": 18.334081649780273, "learning_rate": 9.678571428571429e-06, "loss": 34.4315, "step": 11306 }, { "epoch": 269.21492537313435, "grad_norm": 22.47246742248535, "learning_rate": 9.677721088435374e-06, "loss": 34.5326, "step": 11307 }, { "epoch": 269.23880597014926, "grad_norm": 19.489429473876953, "learning_rate": 9.676870748299321e-06, "loss": 33.7929, "step": 11308 }, { "epoch": 269.26268656716417, "grad_norm": 18.83839988708496, "learning_rate": 9.676020408163266e-06, "loss": 34.0231, "step": 11309 }, { "epoch": 269.28656716417913, "grad_norm": 23.910572052001953, "learning_rate": 9.675170068027211e-06, "loss": 34.8476, "step": 11310 }, { "epoch": 269.31044776119404, "grad_norm": 17.442350387573242, "learning_rate": 9.674319727891157e-06, "loss": 35.3672, "step": 11311 }, { "epoch": 269.33432835820895, "grad_norm": 20.737442016601562, "learning_rate": 9.673469387755103e-06, "loss": 35.1205, "step": 11312 }, { "epoch": 269.35820895522386, "grad_norm": 21.674774169921875, "learning_rate": 9.672619047619049e-06, "loss": 34.7912, "step": 11313 }, { "epoch": 269.3820895522388, "grad_norm": 16.29989242553711, "learning_rate": 9.671768707482994e-06, "loss": 34.8348, "step": 11314 }, { "epoch": 269.40597014925373, "grad_norm": 20.943859100341797, "learning_rate": 9.670918367346939e-06, "loss": 34.3113, "step": 11315 }, { "epoch": 269.42985074626864, "grad_norm": 19.311450958251953, "learning_rate": 9.670068027210884e-06, "loss": 34.4545, "step": 11316 }, { "epoch": 269.4537313432836, "grad_norm": 18.16386604309082, "learning_rate": 9.669217687074831e-06, "loss": 35.0409, "step": 11317 }, { "epoch": 269.4776119402985, "grad_norm": 22.301490783691406, "learning_rate": 9.668367346938776e-06, "loss": 36.0439, "step": 11318 }, { "epoch": 269.5014925373134, "grad_norm": 18.263423919677734, "learning_rate": 9.667517006802723e-06, "loss": 34.1385, "step": 11319 }, { "epoch": 269.52537313432833, "grad_norm": 16.526214599609375, "learning_rate": 9.666666666666667e-06, "loss": 35.1601, "step": 11320 }, { "epoch": 269.5492537313433, "grad_norm": 26.40668296813965, "learning_rate": 9.665816326530612e-06, "loss": 34.8927, "step": 11321 }, { "epoch": 269.5731343283582, "grad_norm": 16.773963928222656, "learning_rate": 9.664965986394559e-06, "loss": 35.1632, "step": 11322 }, { "epoch": 269.5970149253731, "grad_norm": 23.497394561767578, "learning_rate": 9.664115646258504e-06, "loss": 35.0348, "step": 11323 }, { "epoch": 269.6208955223881, "grad_norm": 18.24677085876465, "learning_rate": 9.663265306122451e-06, "loss": 34.9783, "step": 11324 }, { "epoch": 269.644776119403, "grad_norm": 18.07054901123047, "learning_rate": 9.662414965986396e-06, "loss": 35.1278, "step": 11325 }, { "epoch": 269.6686567164179, "grad_norm": 22.57805061340332, "learning_rate": 9.66156462585034e-06, "loss": 35.4269, "step": 11326 }, { "epoch": 269.6925373134328, "grad_norm": 16.485912322998047, "learning_rate": 9.660714285714287e-06, "loss": 36.1043, "step": 11327 }, { "epoch": 269.7164179104478, "grad_norm": 19.971275329589844, "learning_rate": 9.659863945578232e-06, "loss": 35.3867, "step": 11328 }, { "epoch": 269.7402985074627, "grad_norm": 22.916345596313477, "learning_rate": 9.659013605442179e-06, "loss": 36.8476, "step": 11329 }, { "epoch": 269.7641791044776, "grad_norm": 15.270123481750488, "learning_rate": 9.658163265306124e-06, "loss": 35.4615, "step": 11330 }, { "epoch": 269.78805970149256, "grad_norm": 30.578062057495117, "learning_rate": 9.657312925170069e-06, "loss": 36.31, "step": 11331 }, { "epoch": 269.81194029850747, "grad_norm": 20.64407730102539, "learning_rate": 9.656462585034014e-06, "loss": 35.6959, "step": 11332 }, { "epoch": 269.8358208955224, "grad_norm": 28.821992874145508, "learning_rate": 9.65561224489796e-06, "loss": 36.4402, "step": 11333 }, { "epoch": 269.85970149253734, "grad_norm": 22.246870040893555, "learning_rate": 9.654761904761906e-06, "loss": 35.2498, "step": 11334 }, { "epoch": 269.88358208955225, "grad_norm": 24.748897552490234, "learning_rate": 9.653911564625852e-06, "loss": 35.5736, "step": 11335 }, { "epoch": 269.90746268656716, "grad_norm": 24.36867332458496, "learning_rate": 9.653061224489797e-06, "loss": 35.4285, "step": 11336 }, { "epoch": 269.93134328358207, "grad_norm": 18.55751609802246, "learning_rate": 9.652210884353742e-06, "loss": 35.6872, "step": 11337 }, { "epoch": 269.95522388059703, "grad_norm": 29.225358963012695, "learning_rate": 9.651360544217689e-06, "loss": 34.2868, "step": 11338 }, { "epoch": 269.97910447761194, "grad_norm": 21.866971969604492, "learning_rate": 9.650510204081634e-06, "loss": 34.3606, "step": 11339 }, { "epoch": 270.0, "grad_norm": 24.2423038482666, "learning_rate": 9.64965986394558e-06, "loss": 30.7976, "step": 11340 }, { "epoch": 270.0238805970149, "grad_norm": 23.909801483154297, "learning_rate": 9.648809523809524e-06, "loss": 34.9145, "step": 11341 }, { "epoch": 270.0477611940299, "grad_norm": 31.938671112060547, "learning_rate": 9.64795918367347e-06, "loss": 36.1431, "step": 11342 }, { "epoch": 270.0716417910448, "grad_norm": 20.3516845703125, "learning_rate": 9.647108843537416e-06, "loss": 35.3669, "step": 11343 }, { "epoch": 270.0955223880597, "grad_norm": 35.39886474609375, "learning_rate": 9.646258503401362e-06, "loss": 35.0848, "step": 11344 }, { "epoch": 270.1194029850746, "grad_norm": 28.242713928222656, "learning_rate": 9.645408163265307e-06, "loss": 34.0073, "step": 11345 }, { "epoch": 270.14328358208957, "grad_norm": 35.218013763427734, "learning_rate": 9.644557823129252e-06, "loss": 35.9957, "step": 11346 }, { "epoch": 270.1671641791045, "grad_norm": 30.707536697387695, "learning_rate": 9.643707482993197e-06, "loss": 34.4482, "step": 11347 }, { "epoch": 270.1910447761194, "grad_norm": 24.374370574951172, "learning_rate": 9.642857142857144e-06, "loss": 33.9393, "step": 11348 }, { "epoch": 270.21492537313435, "grad_norm": 25.397491455078125, "learning_rate": 9.64200680272109e-06, "loss": 35.2775, "step": 11349 }, { "epoch": 270.23880597014926, "grad_norm": 31.241239547729492, "learning_rate": 9.641156462585035e-06, "loss": 34.6514, "step": 11350 }, { "epoch": 270.26268656716417, "grad_norm": 23.18448257446289, "learning_rate": 9.64030612244898e-06, "loss": 35.7867, "step": 11351 }, { "epoch": 270.28656716417913, "grad_norm": 33.51908493041992, "learning_rate": 9.639455782312927e-06, "loss": 35.6989, "step": 11352 }, { "epoch": 270.31044776119404, "grad_norm": 25.233726501464844, "learning_rate": 9.638605442176872e-06, "loss": 34.5514, "step": 11353 }, { "epoch": 270.33432835820895, "grad_norm": 32.36314392089844, "learning_rate": 9.637755102040817e-06, "loss": 33.9941, "step": 11354 }, { "epoch": 270.35820895522386, "grad_norm": 27.87277603149414, "learning_rate": 9.636904761904762e-06, "loss": 34.882, "step": 11355 }, { "epoch": 270.3820895522388, "grad_norm": 29.331741333007812, "learning_rate": 9.636054421768707e-06, "loss": 34.7327, "step": 11356 }, { "epoch": 270.40597014925373, "grad_norm": 25.99250602722168, "learning_rate": 9.635204081632654e-06, "loss": 35.8672, "step": 11357 }, { "epoch": 270.42985074626864, "grad_norm": 27.950302124023438, "learning_rate": 9.6343537414966e-06, "loss": 34.026, "step": 11358 }, { "epoch": 270.4537313432836, "grad_norm": 25.09626579284668, "learning_rate": 9.633503401360545e-06, "loss": 35.441, "step": 11359 }, { "epoch": 270.4776119402985, "grad_norm": 33.31979751586914, "learning_rate": 9.63265306122449e-06, "loss": 35.6768, "step": 11360 }, { "epoch": 270.5014925373134, "grad_norm": 28.50912094116211, "learning_rate": 9.631802721088435e-06, "loss": 34.9504, "step": 11361 }, { "epoch": 270.52537313432833, "grad_norm": 30.334747314453125, "learning_rate": 9.630952380952382e-06, "loss": 34.6947, "step": 11362 }, { "epoch": 270.5492537313433, "grad_norm": 26.71424674987793, "learning_rate": 9.630102040816327e-06, "loss": 34.0974, "step": 11363 }, { "epoch": 270.5731343283582, "grad_norm": 28.093780517578125, "learning_rate": 9.629251700680272e-06, "loss": 34.1479, "step": 11364 }, { "epoch": 270.5970149253731, "grad_norm": 26.249269485473633, "learning_rate": 9.628401360544218e-06, "loss": 35.649, "step": 11365 }, { "epoch": 270.6208955223881, "grad_norm": 31.712121963500977, "learning_rate": 9.627551020408165e-06, "loss": 35.2305, "step": 11366 }, { "epoch": 270.644776119403, "grad_norm": 28.87372589111328, "learning_rate": 9.62670068027211e-06, "loss": 34.4322, "step": 11367 }, { "epoch": 270.6686567164179, "grad_norm": 31.236000061035156, "learning_rate": 9.625850340136055e-06, "loss": 35.8472, "step": 11368 }, { "epoch": 270.6925373134328, "grad_norm": 27.504440307617188, "learning_rate": 9.625e-06, "loss": 34.3415, "step": 11369 }, { "epoch": 270.7164179104478, "grad_norm": 28.205368041992188, "learning_rate": 9.624149659863945e-06, "loss": 35.1829, "step": 11370 }, { "epoch": 270.7402985074627, "grad_norm": 25.047800064086914, "learning_rate": 9.623299319727892e-06, "loss": 35.0551, "step": 11371 }, { "epoch": 270.7641791044776, "grad_norm": 31.453554153442383, "learning_rate": 9.622448979591837e-06, "loss": 35.1136, "step": 11372 }, { "epoch": 270.78805970149256, "grad_norm": 29.390029907226562, "learning_rate": 9.621598639455784e-06, "loss": 34.5881, "step": 11373 }, { "epoch": 270.81194029850747, "grad_norm": 30.30431365966797, "learning_rate": 9.62074829931973e-06, "loss": 35.6625, "step": 11374 }, { "epoch": 270.8358208955224, "grad_norm": 27.459156036376953, "learning_rate": 9.619897959183673e-06, "loss": 35.4493, "step": 11375 }, { "epoch": 270.85970149253734, "grad_norm": 29.01818084716797, "learning_rate": 9.61904761904762e-06, "loss": 35.478, "step": 11376 }, { "epoch": 270.88358208955225, "grad_norm": 25.12757682800293, "learning_rate": 9.618197278911565e-06, "loss": 34.8158, "step": 11377 }, { "epoch": 270.90746268656716, "grad_norm": 27.613876342773438, "learning_rate": 9.617346938775512e-06, "loss": 35.2491, "step": 11378 }, { "epoch": 270.93134328358207, "grad_norm": 26.43328857421875, "learning_rate": 9.616496598639457e-06, "loss": 34.1624, "step": 11379 }, { "epoch": 270.95522388059703, "grad_norm": 33.36186981201172, "learning_rate": 9.6156462585034e-06, "loss": 35.9641, "step": 11380 }, { "epoch": 270.97910447761194, "grad_norm": 26.321887969970703, "learning_rate": 9.614795918367348e-06, "loss": 35.3548, "step": 11381 }, { "epoch": 271.0, "grad_norm": 26.828222274780273, "learning_rate": 9.613945578231293e-06, "loss": 30.8285, "step": 11382 }, { "epoch": 271.0238805970149, "grad_norm": 30.941560745239258, "learning_rate": 9.61309523809524e-06, "loss": 35.4498, "step": 11383 }, { "epoch": 271.0477611940299, "grad_norm": 28.217666625976562, "learning_rate": 9.612244897959185e-06, "loss": 34.1935, "step": 11384 }, { "epoch": 271.0716417910448, "grad_norm": 28.074115753173828, "learning_rate": 9.61139455782313e-06, "loss": 33.9021, "step": 11385 }, { "epoch": 271.0955223880597, "grad_norm": 27.322423934936523, "learning_rate": 9.610544217687075e-06, "loss": 33.9694, "step": 11386 }, { "epoch": 271.1194029850746, "grad_norm": 21.89845085144043, "learning_rate": 9.60969387755102e-06, "loss": 35.0677, "step": 11387 }, { "epoch": 271.14328358208957, "grad_norm": 31.446561813354492, "learning_rate": 9.608843537414967e-06, "loss": 34.8357, "step": 11388 }, { "epoch": 271.1671641791045, "grad_norm": 26.480789184570312, "learning_rate": 9.607993197278913e-06, "loss": 34.9295, "step": 11389 }, { "epoch": 271.1910447761194, "grad_norm": 28.349945068359375, "learning_rate": 9.607142857142858e-06, "loss": 35.8994, "step": 11390 }, { "epoch": 271.21492537313435, "grad_norm": 28.527359008789062, "learning_rate": 9.606292517006803e-06, "loss": 35.036, "step": 11391 }, { "epoch": 271.23880597014926, "grad_norm": 26.5610408782959, "learning_rate": 9.60544217687075e-06, "loss": 35.1133, "step": 11392 }, { "epoch": 271.26268656716417, "grad_norm": 22.178897857666016, "learning_rate": 9.604591836734695e-06, "loss": 34.6387, "step": 11393 }, { "epoch": 271.28656716417913, "grad_norm": 29.09298324584961, "learning_rate": 9.60374149659864e-06, "loss": 35.7886, "step": 11394 }, { "epoch": 271.31044776119404, "grad_norm": 21.043861389160156, "learning_rate": 9.602891156462586e-06, "loss": 34.2892, "step": 11395 }, { "epoch": 271.33432835820895, "grad_norm": 37.17613220214844, "learning_rate": 9.60204081632653e-06, "loss": 36.2052, "step": 11396 }, { "epoch": 271.35820895522386, "grad_norm": 33.103919982910156, "learning_rate": 9.601190476190478e-06, "loss": 34.1138, "step": 11397 }, { "epoch": 271.3820895522388, "grad_norm": 28.71168327331543, "learning_rate": 9.600340136054423e-06, "loss": 33.737, "step": 11398 }, { "epoch": 271.40597014925373, "grad_norm": 27.4705810546875, "learning_rate": 9.599489795918368e-06, "loss": 35.4473, "step": 11399 }, { "epoch": 271.42985074626864, "grad_norm": 25.91693878173828, "learning_rate": 9.598639455782313e-06, "loss": 34.5243, "step": 11400 }, { "epoch": 271.4537313432836, "grad_norm": 20.86406707763672, "learning_rate": 9.597789115646258e-06, "loss": 35.2066, "step": 11401 }, { "epoch": 271.4776119402985, "grad_norm": 32.478431701660156, "learning_rate": 9.596938775510205e-06, "loss": 33.305, "step": 11402 }, { "epoch": 271.5014925373134, "grad_norm": 26.85079002380371, "learning_rate": 9.59608843537415e-06, "loss": 35.2552, "step": 11403 }, { "epoch": 271.52537313432833, "grad_norm": 29.79864501953125, "learning_rate": 9.595238095238096e-06, "loss": 35.222, "step": 11404 }, { "epoch": 271.5492537313433, "grad_norm": 26.909713745117188, "learning_rate": 9.594387755102041e-06, "loss": 34.6982, "step": 11405 }, { "epoch": 271.5731343283582, "grad_norm": 26.60768699645996, "learning_rate": 9.593537414965988e-06, "loss": 34.5211, "step": 11406 }, { "epoch": 271.5970149253731, "grad_norm": 23.74742889404297, "learning_rate": 9.592687074829933e-06, "loss": 36.3786, "step": 11407 }, { "epoch": 271.6208955223881, "grad_norm": 30.495195388793945, "learning_rate": 9.591836734693878e-06, "loss": 35.8866, "step": 11408 }, { "epoch": 271.644776119403, "grad_norm": 24.703819274902344, "learning_rate": 9.590986394557823e-06, "loss": 35.5444, "step": 11409 }, { "epoch": 271.6686567164179, "grad_norm": 32.43016052246094, "learning_rate": 9.590136054421769e-06, "loss": 34.7922, "step": 11410 }, { "epoch": 271.6925373134328, "grad_norm": 28.583765029907227, "learning_rate": 9.589285714285716e-06, "loss": 35.8122, "step": 11411 }, { "epoch": 271.7164179104478, "grad_norm": 23.935251235961914, "learning_rate": 9.58843537414966e-06, "loss": 35.6513, "step": 11412 }, { "epoch": 271.7402985074627, "grad_norm": 20.8134822845459, "learning_rate": 9.587585034013606e-06, "loss": 35.5131, "step": 11413 }, { "epoch": 271.7641791044776, "grad_norm": 29.37078857421875, "learning_rate": 9.586734693877551e-06, "loss": 34.3373, "step": 11414 }, { "epoch": 271.78805970149256, "grad_norm": 21.80526351928711, "learning_rate": 9.585884353741496e-06, "loss": 36.0305, "step": 11415 }, { "epoch": 271.81194029850747, "grad_norm": 33.87484359741211, "learning_rate": 9.585034013605443e-06, "loss": 34.1205, "step": 11416 }, { "epoch": 271.8358208955224, "grad_norm": 28.685937881469727, "learning_rate": 9.584183673469388e-06, "loss": 35.9275, "step": 11417 }, { "epoch": 271.85970149253734, "grad_norm": 23.839086532592773, "learning_rate": 9.583333333333335e-06, "loss": 33.1174, "step": 11418 }, { "epoch": 271.88358208955225, "grad_norm": 24.347623825073242, "learning_rate": 9.582482993197279e-06, "loss": 35.3959, "step": 11419 }, { "epoch": 271.90746268656716, "grad_norm": 24.888187408447266, "learning_rate": 9.581632653061226e-06, "loss": 34.5073, "step": 11420 }, { "epoch": 271.93134328358207, "grad_norm": 19.496639251708984, "learning_rate": 9.580782312925171e-06, "loss": 35.7947, "step": 11421 }, { "epoch": 271.95522388059703, "grad_norm": 27.817222595214844, "learning_rate": 9.579931972789116e-06, "loss": 35.4779, "step": 11422 }, { "epoch": 271.97910447761194, "grad_norm": 20.708219528198242, "learning_rate": 9.579081632653063e-06, "loss": 35.4572, "step": 11423 }, { "epoch": 272.0, "grad_norm": 29.297521591186523, "learning_rate": 9.578231292517007e-06, "loss": 30.6278, "step": 11424 }, { "epoch": 272.0238805970149, "grad_norm": 29.625965118408203, "learning_rate": 9.577380952380953e-06, "loss": 35.0764, "step": 11425 }, { "epoch": 272.0477611940299, "grad_norm": 25.00616455078125, "learning_rate": 9.576530612244899e-06, "loss": 34.7385, "step": 11426 }, { "epoch": 272.0716417910448, "grad_norm": 24.672815322875977, "learning_rate": 9.575680272108844e-06, "loss": 34.5977, "step": 11427 }, { "epoch": 272.0955223880597, "grad_norm": 26.59151268005371, "learning_rate": 9.57482993197279e-06, "loss": 34.7266, "step": 11428 }, { "epoch": 272.1194029850746, "grad_norm": 20.41710662841797, "learning_rate": 9.573979591836736e-06, "loss": 34.9463, "step": 11429 }, { "epoch": 272.14328358208957, "grad_norm": 29.853113174438477, "learning_rate": 9.573129251700681e-06, "loss": 35.3496, "step": 11430 }, { "epoch": 272.1671641791045, "grad_norm": 24.686464309692383, "learning_rate": 9.572278911564626e-06, "loss": 35.0309, "step": 11431 }, { "epoch": 272.1910447761194, "grad_norm": 27.58829689025879, "learning_rate": 9.571428571428573e-06, "loss": 34.8087, "step": 11432 }, { "epoch": 272.21492537313435, "grad_norm": 26.95330810546875, "learning_rate": 9.570578231292518e-06, "loss": 35.4672, "step": 11433 }, { "epoch": 272.23880597014926, "grad_norm": 20.95945167541504, "learning_rate": 9.569727891156464e-06, "loss": 34.7029, "step": 11434 }, { "epoch": 272.26268656716417, "grad_norm": 24.166494369506836, "learning_rate": 9.568877551020409e-06, "loss": 35.6845, "step": 11435 }, { "epoch": 272.28656716417913, "grad_norm": 23.78201675415039, "learning_rate": 9.568027210884354e-06, "loss": 35.3705, "step": 11436 }, { "epoch": 272.31044776119404, "grad_norm": 20.939838409423828, "learning_rate": 9.567176870748301e-06, "loss": 34.8826, "step": 11437 }, { "epoch": 272.33432835820895, "grad_norm": 24.9542236328125, "learning_rate": 9.566326530612246e-06, "loss": 34.1938, "step": 11438 }, { "epoch": 272.35820895522386, "grad_norm": 21.42974090576172, "learning_rate": 9.565476190476191e-06, "loss": 36.1493, "step": 11439 }, { "epoch": 272.3820895522388, "grad_norm": 27.042057037353516, "learning_rate": 9.564625850340137e-06, "loss": 34.8768, "step": 11440 }, { "epoch": 272.40597014925373, "grad_norm": 20.202251434326172, "learning_rate": 9.563775510204082e-06, "loss": 34.2451, "step": 11441 }, { "epoch": 272.42985074626864, "grad_norm": 23.43889045715332, "learning_rate": 9.562925170068029e-06, "loss": 35.0835, "step": 11442 }, { "epoch": 272.4537313432836, "grad_norm": 22.16297721862793, "learning_rate": 9.562074829931974e-06, "loss": 34.7922, "step": 11443 }, { "epoch": 272.4776119402985, "grad_norm": 25.513879776000977, "learning_rate": 9.561224489795919e-06, "loss": 35.4338, "step": 11444 }, { "epoch": 272.5014925373134, "grad_norm": 21.959760665893555, "learning_rate": 9.560374149659864e-06, "loss": 35.7945, "step": 11445 }, { "epoch": 272.52537313432833, "grad_norm": 25.527957916259766, "learning_rate": 9.559523809523811e-06, "loss": 34.7416, "step": 11446 }, { "epoch": 272.5492537313433, "grad_norm": 21.50974464416504, "learning_rate": 9.558673469387756e-06, "loss": 34.3238, "step": 11447 }, { "epoch": 272.5731343283582, "grad_norm": 22.119808197021484, "learning_rate": 9.557823129251701e-06, "loss": 35.0855, "step": 11448 }, { "epoch": 272.5970149253731, "grad_norm": 19.815471649169922, "learning_rate": 9.556972789115647e-06, "loss": 34.5338, "step": 11449 }, { "epoch": 272.6208955223881, "grad_norm": 23.734107971191406, "learning_rate": 9.556122448979592e-06, "loss": 35.5892, "step": 11450 }, { "epoch": 272.644776119403, "grad_norm": 18.64801788330078, "learning_rate": 9.555272108843539e-06, "loss": 35.9366, "step": 11451 }, { "epoch": 272.6686567164179, "grad_norm": 26.981422424316406, "learning_rate": 9.554421768707484e-06, "loss": 35.514, "step": 11452 }, { "epoch": 272.6925373134328, "grad_norm": 21.583669662475586, "learning_rate": 9.55357142857143e-06, "loss": 35.1847, "step": 11453 }, { "epoch": 272.7164179104478, "grad_norm": 20.58284568786621, "learning_rate": 9.552721088435374e-06, "loss": 35.5298, "step": 11454 }, { "epoch": 272.7402985074627, "grad_norm": 18.506633758544922, "learning_rate": 9.55187074829932e-06, "loss": 35.2539, "step": 11455 }, { "epoch": 272.7641791044776, "grad_norm": 20.344755172729492, "learning_rate": 9.551020408163266e-06, "loss": 34.4507, "step": 11456 }, { "epoch": 272.78805970149256, "grad_norm": 21.053865432739258, "learning_rate": 9.550170068027212e-06, "loss": 35.0848, "step": 11457 }, { "epoch": 272.81194029850747, "grad_norm": 19.17555809020996, "learning_rate": 9.549319727891157e-06, "loss": 34.5821, "step": 11458 }, { "epoch": 272.8358208955224, "grad_norm": 19.503799438476562, "learning_rate": 9.548469387755102e-06, "loss": 35.4881, "step": 11459 }, { "epoch": 272.85970149253734, "grad_norm": 22.57611846923828, "learning_rate": 9.547619047619049e-06, "loss": 34.5947, "step": 11460 }, { "epoch": 272.88358208955225, "grad_norm": 16.201906204223633, "learning_rate": 9.546768707482994e-06, "loss": 33.9073, "step": 11461 }, { "epoch": 272.90746268656716, "grad_norm": 29.876388549804688, "learning_rate": 9.54591836734694e-06, "loss": 34.9881, "step": 11462 }, { "epoch": 272.93134328358207, "grad_norm": 21.188796997070312, "learning_rate": 9.545068027210885e-06, "loss": 33.9589, "step": 11463 }, { "epoch": 272.95522388059703, "grad_norm": 24.822237014770508, "learning_rate": 9.54421768707483e-06, "loss": 34.5605, "step": 11464 }, { "epoch": 272.97910447761194, "grad_norm": 21.180675506591797, "learning_rate": 9.543367346938777e-06, "loss": 34.6804, "step": 11465 }, { "epoch": 273.0, "grad_norm": 22.525537490844727, "learning_rate": 9.542517006802722e-06, "loss": 31.2545, "step": 11466 }, { "epoch": 273.0238805970149, "grad_norm": 23.992300033569336, "learning_rate": 9.541666666666669e-06, "loss": 35.4219, "step": 11467 }, { "epoch": 273.0477611940299, "grad_norm": 16.92515754699707, "learning_rate": 9.540816326530612e-06, "loss": 33.5769, "step": 11468 }, { "epoch": 273.0716417910448, "grad_norm": 23.432153701782227, "learning_rate": 9.539965986394557e-06, "loss": 34.318, "step": 11469 }, { "epoch": 273.0955223880597, "grad_norm": 19.189620971679688, "learning_rate": 9.539115646258504e-06, "loss": 34.2028, "step": 11470 }, { "epoch": 273.1194029850746, "grad_norm": 19.115657806396484, "learning_rate": 9.53826530612245e-06, "loss": 34.2054, "step": 11471 }, { "epoch": 273.14328358208957, "grad_norm": 21.3024845123291, "learning_rate": 9.537414965986396e-06, "loss": 35.7482, "step": 11472 }, { "epoch": 273.1671641791045, "grad_norm": 16.934879302978516, "learning_rate": 9.536564625850342e-06, "loss": 35.7134, "step": 11473 }, { "epoch": 273.1910447761194, "grad_norm": 22.143213272094727, "learning_rate": 9.535714285714287e-06, "loss": 35.191, "step": 11474 }, { "epoch": 273.21492537313435, "grad_norm": 18.863143920898438, "learning_rate": 9.534863945578232e-06, "loss": 34.7234, "step": 11475 }, { "epoch": 273.23880597014926, "grad_norm": 16.55891990661621, "learning_rate": 9.534013605442177e-06, "loss": 36.2902, "step": 11476 }, { "epoch": 273.26268656716417, "grad_norm": 19.711896896362305, "learning_rate": 9.533163265306124e-06, "loss": 34.8988, "step": 11477 }, { "epoch": 273.28656716417913, "grad_norm": 13.028790473937988, "learning_rate": 9.53231292517007e-06, "loss": 35.5146, "step": 11478 }, { "epoch": 273.31044776119404, "grad_norm": 24.55320930480957, "learning_rate": 9.531462585034015e-06, "loss": 35.4407, "step": 11479 }, { "epoch": 273.33432835820895, "grad_norm": 18.43265151977539, "learning_rate": 9.53061224489796e-06, "loss": 33.798, "step": 11480 }, { "epoch": 273.35820895522386, "grad_norm": 22.450809478759766, "learning_rate": 9.529761904761905e-06, "loss": 35.1897, "step": 11481 }, { "epoch": 273.3820895522388, "grad_norm": 24.19524574279785, "learning_rate": 9.528911564625852e-06, "loss": 34.1183, "step": 11482 }, { "epoch": 273.40597014925373, "grad_norm": 17.7977352142334, "learning_rate": 9.528061224489797e-06, "loss": 35.96, "step": 11483 }, { "epoch": 273.42985074626864, "grad_norm": 28.38298225402832, "learning_rate": 9.527210884353742e-06, "loss": 34.8416, "step": 11484 }, { "epoch": 273.4537313432836, "grad_norm": 21.169815063476562, "learning_rate": 9.526360544217687e-06, "loss": 34.7033, "step": 11485 }, { "epoch": 273.4776119402985, "grad_norm": 29.524351119995117, "learning_rate": 9.525510204081634e-06, "loss": 35.1072, "step": 11486 }, { "epoch": 273.5014925373134, "grad_norm": 22.241783142089844, "learning_rate": 9.52465986394558e-06, "loss": 35.8894, "step": 11487 }, { "epoch": 273.52537313432833, "grad_norm": 29.821815490722656, "learning_rate": 9.523809523809525e-06, "loss": 35.7035, "step": 11488 }, { "epoch": 273.5492537313433, "grad_norm": 25.787349700927734, "learning_rate": 9.52295918367347e-06, "loss": 34.6796, "step": 11489 }, { "epoch": 273.5731343283582, "grad_norm": 32.30319595336914, "learning_rate": 9.522108843537415e-06, "loss": 34.7891, "step": 11490 }, { "epoch": 273.5970149253731, "grad_norm": 27.29175567626953, "learning_rate": 9.521258503401362e-06, "loss": 35.5402, "step": 11491 }, { "epoch": 273.6208955223881, "grad_norm": 30.5361328125, "learning_rate": 9.520408163265307e-06, "loss": 35.7587, "step": 11492 }, { "epoch": 273.644776119403, "grad_norm": 24.02892303466797, "learning_rate": 9.519557823129252e-06, "loss": 34.6691, "step": 11493 }, { "epoch": 273.6686567164179, "grad_norm": 29.685543060302734, "learning_rate": 9.518707482993198e-06, "loss": 34.6198, "step": 11494 }, { "epoch": 273.6925373134328, "grad_norm": 23.516645431518555, "learning_rate": 9.517857142857143e-06, "loss": 35.3945, "step": 11495 }, { "epoch": 273.7164179104478, "grad_norm": 30.824016571044922, "learning_rate": 9.51700680272109e-06, "loss": 34.7043, "step": 11496 }, { "epoch": 273.7402985074627, "grad_norm": 20.917694091796875, "learning_rate": 9.516156462585035e-06, "loss": 34.2073, "step": 11497 }, { "epoch": 273.7641791044776, "grad_norm": 41.69342041015625, "learning_rate": 9.51530612244898e-06, "loss": 35.1896, "step": 11498 }, { "epoch": 273.78805970149256, "grad_norm": 31.923019409179688, "learning_rate": 9.514455782312925e-06, "loss": 35.6918, "step": 11499 }, { "epoch": 273.81194029850747, "grad_norm": 31.552574157714844, "learning_rate": 9.513605442176872e-06, "loss": 35.462, "step": 11500 }, { "epoch": 273.8358208955224, "grad_norm": 31.041627883911133, "learning_rate": 9.512755102040817e-06, "loss": 35.9564, "step": 11501 }, { "epoch": 273.85970149253734, "grad_norm": 23.8511962890625, "learning_rate": 9.511904761904763e-06, "loss": 33.7351, "step": 11502 }, { "epoch": 273.88358208955225, "grad_norm": 22.056659698486328, "learning_rate": 9.511054421768708e-06, "loss": 34.0475, "step": 11503 }, { "epoch": 273.90746268656716, "grad_norm": 31.377748489379883, "learning_rate": 9.510204081632653e-06, "loss": 34.8224, "step": 11504 }, { "epoch": 273.93134328358207, "grad_norm": 25.686439514160156, "learning_rate": 9.5093537414966e-06, "loss": 34.9713, "step": 11505 }, { "epoch": 273.95522388059703, "grad_norm": 31.768295288085938, "learning_rate": 9.508503401360545e-06, "loss": 34.3472, "step": 11506 }, { "epoch": 273.97910447761194, "grad_norm": 30.48211669921875, "learning_rate": 9.50765306122449e-06, "loss": 33.5255, "step": 11507 }, { "epoch": 274.0, "grad_norm": 24.9478759765625, "learning_rate": 9.506802721088436e-06, "loss": 31.1891, "step": 11508 }, { "epoch": 274.0238805970149, "grad_norm": 26.062694549560547, "learning_rate": 9.50595238095238e-06, "loss": 35.456, "step": 11509 }, { "epoch": 274.0477611940299, "grad_norm": 28.693044662475586, "learning_rate": 9.505102040816328e-06, "loss": 35.1406, "step": 11510 }, { "epoch": 274.0716417910448, "grad_norm": 23.191530227661133, "learning_rate": 9.504251700680273e-06, "loss": 34.7138, "step": 11511 }, { "epoch": 274.0955223880597, "grad_norm": 32.414337158203125, "learning_rate": 9.503401360544218e-06, "loss": 35.28, "step": 11512 }, { "epoch": 274.1194029850746, "grad_norm": 29.824779510498047, "learning_rate": 9.502551020408163e-06, "loss": 35.4824, "step": 11513 }, { "epoch": 274.14328358208957, "grad_norm": 27.0230712890625, "learning_rate": 9.50170068027211e-06, "loss": 35.4533, "step": 11514 }, { "epoch": 274.1671641791045, "grad_norm": 23.042133331298828, "learning_rate": 9.500850340136055e-06, "loss": 34.8574, "step": 11515 }, { "epoch": 274.1910447761194, "grad_norm": 27.555049896240234, "learning_rate": 9.5e-06, "loss": 35.2714, "step": 11516 }, { "epoch": 274.21492537313435, "grad_norm": 24.51102066040039, "learning_rate": 9.499149659863946e-06, "loss": 34.5256, "step": 11517 }, { "epoch": 274.23880597014926, "grad_norm": 35.179222106933594, "learning_rate": 9.498299319727891e-06, "loss": 35.1316, "step": 11518 }, { "epoch": 274.26268656716417, "grad_norm": 30.471803665161133, "learning_rate": 9.497448979591838e-06, "loss": 35.4382, "step": 11519 }, { "epoch": 274.28656716417913, "grad_norm": 24.4095401763916, "learning_rate": 9.496598639455783e-06, "loss": 34.6631, "step": 11520 }, { "epoch": 274.31044776119404, "grad_norm": 22.290611267089844, "learning_rate": 9.49574829931973e-06, "loss": 34.9445, "step": 11521 }, { "epoch": 274.33432835820895, "grad_norm": 28.604211807250977, "learning_rate": 9.494897959183675e-06, "loss": 33.9613, "step": 11522 }, { "epoch": 274.35820895522386, "grad_norm": 23.471830368041992, "learning_rate": 9.494047619047619e-06, "loss": 34.6696, "step": 11523 }, { "epoch": 274.3820895522388, "grad_norm": 30.45530128479004, "learning_rate": 9.493197278911566e-06, "loss": 35.0699, "step": 11524 }, { "epoch": 274.40597014925373, "grad_norm": 27.771160125732422, "learning_rate": 9.49234693877551e-06, "loss": 34.8244, "step": 11525 }, { "epoch": 274.42985074626864, "grad_norm": 29.891550064086914, "learning_rate": 9.491496598639458e-06, "loss": 35.4989, "step": 11526 }, { "epoch": 274.4537313432836, "grad_norm": 28.894603729248047, "learning_rate": 9.490646258503403e-06, "loss": 35.0306, "step": 11527 }, { "epoch": 274.4776119402985, "grad_norm": 26.922245025634766, "learning_rate": 9.489795918367348e-06, "loss": 34.8424, "step": 11528 }, { "epoch": 274.5014925373134, "grad_norm": 25.6401424407959, "learning_rate": 9.488945578231293e-06, "loss": 35.5089, "step": 11529 }, { "epoch": 274.52537313432833, "grad_norm": 28.86932945251465, "learning_rate": 9.488095238095238e-06, "loss": 33.8797, "step": 11530 }, { "epoch": 274.5492537313433, "grad_norm": 25.750967025756836, "learning_rate": 9.487244897959185e-06, "loss": 34.5739, "step": 11531 }, { "epoch": 274.5731343283582, "grad_norm": 31.21046257019043, "learning_rate": 9.48639455782313e-06, "loss": 35.3033, "step": 11532 }, { "epoch": 274.5970149253731, "grad_norm": NaN, "learning_rate": 9.485544217687076e-06, "loss": 55.0899, "step": 11533 }, { "epoch": 274.6208955223881, "grad_norm": 26.225570678710938, "learning_rate": 9.485544217687076e-06, "loss": 35.0351, "step": 11534 }, { "epoch": 274.644776119403, "grad_norm": 29.53644371032715, "learning_rate": 9.484693877551021e-06, "loss": 34.2676, "step": 11535 }, { "epoch": 274.6686567164179, "grad_norm": 31.77597999572754, "learning_rate": 9.483843537414966e-06, "loss": 35.2219, "step": 11536 }, { "epoch": 274.6925373134328, "grad_norm": 27.853591918945312, "learning_rate": 9.482993197278913e-06, "loss": 35.0949, "step": 11537 }, { "epoch": 274.7164179104478, "grad_norm": 24.07490348815918, "learning_rate": 9.482142857142858e-06, "loss": 33.7657, "step": 11538 }, { "epoch": 274.7402985074627, "grad_norm": 32.067989349365234, "learning_rate": 9.481292517006803e-06, "loss": 35.1784, "step": 11539 }, { "epoch": 274.7641791044776, "grad_norm": 24.953723907470703, "learning_rate": 9.480442176870749e-06, "loss": 34.16, "step": 11540 }, { "epoch": 274.78805970149256, "grad_norm": 30.517349243164062, "learning_rate": 9.479591836734695e-06, "loss": 35.2679, "step": 11541 }, { "epoch": 274.81194029850747, "grad_norm": 30.275558471679688, "learning_rate": 9.47874149659864e-06, "loss": 35.5045, "step": 11542 }, { "epoch": 274.8358208955224, "grad_norm": 27.648090362548828, "learning_rate": 9.477891156462586e-06, "loss": 33.7273, "step": 11543 }, { "epoch": 274.85970149253734, "grad_norm": 22.869770050048828, "learning_rate": 9.477040816326531e-06, "loss": 34.6546, "step": 11544 }, { "epoch": 274.88358208955225, "grad_norm": 28.749204635620117, "learning_rate": 9.476190476190476e-06, "loss": 33.961, "step": 11545 }, { "epoch": 274.90746268656716, "grad_norm": 23.774635314941406, "learning_rate": 9.475340136054423e-06, "loss": 35.5513, "step": 11546 }, { "epoch": 274.93134328358207, "grad_norm": 29.827241897583008, "learning_rate": 9.474489795918368e-06, "loss": 34.9178, "step": 11547 }, { "epoch": 274.95522388059703, "grad_norm": 27.671878814697266, "learning_rate": 9.473639455782314e-06, "loss": 35.3901, "step": 11548 }, { "epoch": 274.97910447761194, "grad_norm": NaN, "learning_rate": 9.472789115646259e-06, "loss": 52.21, "step": 11549 }, { "epoch": 275.0, "grad_norm": 24.934322357177734, "learning_rate": 9.472789115646259e-06, "loss": 32.1842, "step": 11550 }, { "epoch": 275.0238805970149, "grad_norm": 26.665868759155273, "learning_rate": 9.471938775510204e-06, "loss": 34.1989, "step": 11551 }, { "epoch": 275.0477611940299, "grad_norm": 25.216344833374023, "learning_rate": 9.471088435374151e-06, "loss": 33.7022, "step": 11552 }, { "epoch": 275.0716417910448, "grad_norm": 23.939922332763672, "learning_rate": 9.470238095238096e-06, "loss": 34.7447, "step": 11553 }, { "epoch": 275.0955223880597, "grad_norm": 30.293737411499023, "learning_rate": 9.469387755102041e-06, "loss": 33.8805, "step": 11554 }, { "epoch": 275.1194029850746, "grad_norm": 26.180225372314453, "learning_rate": 9.468537414965986e-06, "loss": 34.3898, "step": 11555 }, { "epoch": 275.14328358208957, "grad_norm": 29.93408203125, "learning_rate": 9.467687074829933e-06, "loss": 35.4292, "step": 11556 }, { "epoch": 275.1671641791045, "grad_norm": 26.23850440979004, "learning_rate": 9.466836734693879e-06, "loss": 35.715, "step": 11557 }, { "epoch": 275.1910447761194, "grad_norm": 27.489974975585938, "learning_rate": 9.465986394557824e-06, "loss": 35.464, "step": 11558 }, { "epoch": 275.21492537313435, "grad_norm": 22.571842193603516, "learning_rate": 9.465136054421769e-06, "loss": 34.3222, "step": 11559 }, { "epoch": 275.23880597014926, "grad_norm": 29.974586486816406, "learning_rate": 9.464285714285714e-06, "loss": 34.1954, "step": 11560 }, { "epoch": 275.26268656716417, "grad_norm": 25.143552780151367, "learning_rate": 9.463435374149661e-06, "loss": 35.0162, "step": 11561 }, { "epoch": 275.28656716417913, "grad_norm": 31.299367904663086, "learning_rate": 9.462585034013606e-06, "loss": 34.9588, "step": 11562 }, { "epoch": 275.31044776119404, "grad_norm": 28.38913345336914, "learning_rate": 9.461734693877551e-06, "loss": 33.5666, "step": 11563 }, { "epoch": 275.33432835820895, "grad_norm": 26.747886657714844, "learning_rate": 9.460884353741497e-06, "loss": 35.7584, "step": 11564 }, { "epoch": 275.35820895522386, "grad_norm": 24.418148040771484, "learning_rate": 9.460034013605442e-06, "loss": 35.9323, "step": 11565 }, { "epoch": 275.3820895522388, "grad_norm": 27.281307220458984, "learning_rate": 9.459183673469389e-06, "loss": 33.2848, "step": 11566 }, { "epoch": 275.40597014925373, "grad_norm": 21.456995010375977, "learning_rate": 9.458333333333334e-06, "loss": 34.8987, "step": 11567 }, { "epoch": 275.42985074626864, "grad_norm": 27.553138732910156, "learning_rate": 9.457482993197281e-06, "loss": 33.2433, "step": 11568 }, { "epoch": 275.4537313432836, "grad_norm": 23.411741256713867, "learning_rate": 9.456632653061224e-06, "loss": 34.738, "step": 11569 }, { "epoch": 275.4776119402985, "grad_norm": 27.484281539916992, "learning_rate": 9.455782312925171e-06, "loss": 33.6812, "step": 11570 }, { "epoch": 275.5014925373134, "grad_norm": 27.311189651489258, "learning_rate": 9.454931972789116e-06, "loss": 34.7603, "step": 11571 }, { "epoch": 275.52537313432833, "grad_norm": 25.987213134765625, "learning_rate": 9.454081632653062e-06, "loss": 35.9721, "step": 11572 }, { "epoch": 275.5492537313433, "grad_norm": 24.543262481689453, "learning_rate": 9.453231292517009e-06, "loss": 35.1705, "step": 11573 }, { "epoch": 275.5731343283582, "grad_norm": NaN, "learning_rate": 9.452380952380952e-06, "loss": 42.9772, "step": 11574 }, { "epoch": 275.5970149253731, "grad_norm": 30.56894874572754, "learning_rate": 9.452380952380952e-06, "loss": 34.7541, "step": 11575 }, { "epoch": 275.6208955223881, "grad_norm": 24.31935691833496, "learning_rate": 9.451530612244899e-06, "loss": 35.306, "step": 11576 }, { "epoch": 275.644776119403, "grad_norm": 26.2707462310791, "learning_rate": 9.450680272108844e-06, "loss": 35.2843, "step": 11577 }, { "epoch": 275.6686567164179, "grad_norm": 26.78156089782715, "learning_rate": 9.449829931972791e-06, "loss": 33.7731, "step": 11578 }, { "epoch": 275.6925373134328, "grad_norm": 25.12327003479004, "learning_rate": 9.448979591836736e-06, "loss": 35.9485, "step": 11579 }, { "epoch": 275.7164179104478, "grad_norm": 22.33073616027832, "learning_rate": 9.448129251700681e-06, "loss": 34.9512, "step": 11580 }, { "epoch": 275.7402985074627, "grad_norm": 28.61358642578125, "learning_rate": 9.447278911564627e-06, "loss": 35.087, "step": 11581 }, { "epoch": 275.7641791044776, "grad_norm": 24.46397590637207, "learning_rate": 9.446428571428572e-06, "loss": 35.7263, "step": 11582 }, { "epoch": 275.78805970149256, "grad_norm": 30.075510025024414, "learning_rate": 9.445578231292519e-06, "loss": 35.1137, "step": 11583 }, { "epoch": 275.81194029850747, "grad_norm": 24.947879791259766, "learning_rate": 9.444727891156464e-06, "loss": 34.9995, "step": 11584 }, { "epoch": 275.8358208955224, "grad_norm": 24.197057723999023, "learning_rate": 9.44387755102041e-06, "loss": 35.6346, "step": 11585 }, { "epoch": 275.85970149253734, "grad_norm": 22.781902313232422, "learning_rate": 9.443027210884354e-06, "loss": 36.1519, "step": 11586 }, { "epoch": 275.88358208955225, "grad_norm": 25.115249633789062, "learning_rate": 9.4421768707483e-06, "loss": 36.1326, "step": 11587 }, { "epoch": 275.90746268656716, "grad_norm": 21.32830238342285, "learning_rate": 9.441326530612246e-06, "loss": 34.9455, "step": 11588 }, { "epoch": 275.93134328358207, "grad_norm": 23.063034057617188, "learning_rate": 9.440476190476192e-06, "loss": 35.7374, "step": 11589 }, { "epoch": 275.95522388059703, "grad_norm": 16.17242431640625, "learning_rate": 9.439625850340137e-06, "loss": 34.7554, "step": 11590 }, { "epoch": 275.97910447761194, "grad_norm": 28.204742431640625, "learning_rate": 9.438775510204082e-06, "loss": 33.9266, "step": 11591 }, { "epoch": 276.0, "grad_norm": 21.808975219726562, "learning_rate": 9.437925170068027e-06, "loss": 31.0028, "step": 11592 }, { "epoch": 276.0238805970149, "grad_norm": 25.250585556030273, "learning_rate": 9.437074829931974e-06, "loss": 34.3719, "step": 11593 }, { "epoch": 276.0477611940299, "grad_norm": 22.951576232910156, "learning_rate": 9.43622448979592e-06, "loss": 33.6795, "step": 11594 }, { "epoch": 276.0716417910448, "grad_norm": 19.92416000366211, "learning_rate": 9.435374149659865e-06, "loss": 35.4251, "step": 11595 }, { "epoch": 276.0955223880597, "grad_norm": 22.649166107177734, "learning_rate": 9.43452380952381e-06, "loss": 33.3365, "step": 11596 }, { "epoch": 276.1194029850746, "grad_norm": 20.024242401123047, "learning_rate": 9.433673469387757e-06, "loss": 35.0393, "step": 11597 }, { "epoch": 276.14328358208957, "grad_norm": 16.586639404296875, "learning_rate": 9.432823129251702e-06, "loss": 34.7588, "step": 11598 }, { "epoch": 276.1671641791045, "grad_norm": 17.571060180664062, "learning_rate": 9.431972789115647e-06, "loss": 34.5063, "step": 11599 }, { "epoch": 276.1910447761194, "grad_norm": 15.963066101074219, "learning_rate": 9.431122448979592e-06, "loss": 35.7218, "step": 11600 }, { "epoch": 276.21492537313435, "grad_norm": 17.378015518188477, "learning_rate": 9.430272108843537e-06, "loss": 35.3853, "step": 11601 }, { "epoch": 276.23880597014926, "grad_norm": 14.895105361938477, "learning_rate": 9.429421768707484e-06, "loss": 35.9053, "step": 11602 }, { "epoch": 276.26268656716417, "grad_norm": 17.460681915283203, "learning_rate": 9.42857142857143e-06, "loss": 34.0455, "step": 11603 }, { "epoch": 276.28656716417913, "grad_norm": 15.1104154586792, "learning_rate": 9.427721088435375e-06, "loss": 34.0612, "step": 11604 }, { "epoch": 276.31044776119404, "grad_norm": 15.858449935913086, "learning_rate": 9.42687074829932e-06, "loss": 34.4224, "step": 11605 }, { "epoch": 276.33432835820895, "grad_norm": 14.354479789733887, "learning_rate": 9.426020408163265e-06, "loss": 35.9808, "step": 11606 }, { "epoch": 276.35820895522386, "grad_norm": 15.939604759216309, "learning_rate": 9.425170068027212e-06, "loss": 34.1325, "step": 11607 }, { "epoch": 276.3820895522388, "grad_norm": 17.995344161987305, "learning_rate": 9.424319727891157e-06, "loss": 36.2056, "step": 11608 }, { "epoch": 276.40597014925373, "grad_norm": 14.723787307739258, "learning_rate": 9.423469387755102e-06, "loss": 34.8778, "step": 11609 }, { "epoch": 276.42985074626864, "grad_norm": 17.433948516845703, "learning_rate": 9.422619047619048e-06, "loss": 34.4876, "step": 11610 }, { "epoch": 276.4537313432836, "grad_norm": 15.472698211669922, "learning_rate": 9.421768707482995e-06, "loss": 35.319, "step": 11611 }, { "epoch": 276.4776119402985, "grad_norm": 15.433340072631836, "learning_rate": 9.42091836734694e-06, "loss": 33.6886, "step": 11612 }, { "epoch": 276.5014925373134, "grad_norm": 17.547300338745117, "learning_rate": 9.420068027210885e-06, "loss": 35.1445, "step": 11613 }, { "epoch": 276.52537313432833, "grad_norm": 16.011526107788086, "learning_rate": 9.41921768707483e-06, "loss": 34.8026, "step": 11614 }, { "epoch": 276.5492537313433, "grad_norm": 15.494392395019531, "learning_rate": 9.418367346938775e-06, "loss": 35.8802, "step": 11615 }, { "epoch": 276.5731343283582, "grad_norm": 14.229571342468262, "learning_rate": 9.417517006802722e-06, "loss": 35.7169, "step": 11616 }, { "epoch": 276.5970149253731, "grad_norm": 16.28061294555664, "learning_rate": 9.416666666666667e-06, "loss": 35.9106, "step": 11617 }, { "epoch": 276.6208955223881, "grad_norm": 13.186849594116211, "learning_rate": 9.415816326530614e-06, "loss": 33.2318, "step": 11618 }, { "epoch": 276.644776119403, "grad_norm": 17.973485946655273, "learning_rate": 9.414965986394558e-06, "loss": 35.7487, "step": 11619 }, { "epoch": 276.6686567164179, "grad_norm": 16.334392547607422, "learning_rate": 9.414115646258503e-06, "loss": 34.507, "step": 11620 }, { "epoch": 276.6925373134328, "grad_norm": 15.271941184997559, "learning_rate": 9.41326530612245e-06, "loss": 33.8744, "step": 11621 }, { "epoch": 276.7164179104478, "grad_norm": 19.46748161315918, "learning_rate": 9.412414965986395e-06, "loss": 36.0643, "step": 11622 }, { "epoch": 276.7402985074627, "grad_norm": 15.582892417907715, "learning_rate": 9.411564625850342e-06, "loss": 35.6241, "step": 11623 }, { "epoch": 276.7641791044776, "grad_norm": 23.94053077697754, "learning_rate": 9.410714285714286e-06, "loss": 33.2915, "step": 11624 }, { "epoch": 276.78805970149256, "grad_norm": 20.39919090270996, "learning_rate": 9.409863945578232e-06, "loss": 34.942, "step": 11625 }, { "epoch": 276.81194029850747, "grad_norm": 15.005172729492188, "learning_rate": 9.409013605442178e-06, "loss": 34.5648, "step": 11626 }, { "epoch": 276.8358208955224, "grad_norm": 20.01251983642578, "learning_rate": 9.408163265306123e-06, "loss": 35.9609, "step": 11627 }, { "epoch": 276.85970149253734, "grad_norm": 19.874052047729492, "learning_rate": 9.40731292517007e-06, "loss": 34.1502, "step": 11628 }, { "epoch": 276.88358208955225, "grad_norm": 13.759737968444824, "learning_rate": 9.406462585034015e-06, "loss": 35.1191, "step": 11629 }, { "epoch": 276.90746268656716, "grad_norm": 16.14326286315918, "learning_rate": 9.40561224489796e-06, "loss": 35.1842, "step": 11630 }, { "epoch": 276.93134328358207, "grad_norm": 13.732664108276367, "learning_rate": 9.404761904761905e-06, "loss": 35.4432, "step": 11631 }, { "epoch": 276.95522388059703, "grad_norm": 15.850354194641113, "learning_rate": 9.403911564625852e-06, "loss": 33.7583, "step": 11632 }, { "epoch": 276.97910447761194, "grad_norm": 18.14269256591797, "learning_rate": 9.403061224489797e-06, "loss": 35.4636, "step": 11633 }, { "epoch": 277.0, "grad_norm": 14.818315505981445, "learning_rate": 9.402210884353743e-06, "loss": 30.0158, "step": 11634 }, { "epoch": 277.0238805970149, "grad_norm": 14.796257019042969, "learning_rate": 9.401360544217688e-06, "loss": 35.0704, "step": 11635 }, { "epoch": 277.0477611940299, "grad_norm": 12.80022144317627, "learning_rate": 9.400510204081633e-06, "loss": 35.1998, "step": 11636 }, { "epoch": 277.0716417910448, "grad_norm": 19.435375213623047, "learning_rate": 9.39965986394558e-06, "loss": 34.3463, "step": 11637 }, { "epoch": 277.0955223880597, "grad_norm": 13.989315032958984, "learning_rate": 9.398809523809525e-06, "loss": 33.7221, "step": 11638 }, { "epoch": 277.1194029850746, "grad_norm": 17.131755828857422, "learning_rate": 9.39795918367347e-06, "loss": 34.3613, "step": 11639 }, { "epoch": 277.14328358208957, "grad_norm": 16.77277946472168, "learning_rate": 9.397108843537416e-06, "loss": 34.9621, "step": 11640 }, { "epoch": 277.1671641791045, "grad_norm": 16.701374053955078, "learning_rate": 9.39625850340136e-06, "loss": 35.6076, "step": 11641 }, { "epoch": 277.1910447761194, "grad_norm": 16.80859375, "learning_rate": 9.395408163265308e-06, "loss": 33.6321, "step": 11642 }, { "epoch": 277.21492537313435, "grad_norm": 16.263702392578125, "learning_rate": 9.394557823129253e-06, "loss": 34.1421, "step": 11643 }, { "epoch": 277.23880597014926, "grad_norm": 19.730012893676758, "learning_rate": 9.393707482993198e-06, "loss": 34.7567, "step": 11644 }, { "epoch": 277.26268656716417, "grad_norm": 16.331052780151367, "learning_rate": 9.392857142857143e-06, "loss": 35.5914, "step": 11645 }, { "epoch": 277.28656716417913, "grad_norm": NaN, "learning_rate": 9.392006802721088e-06, "loss": 38.8077, "step": 11646 }, { "epoch": 277.31044776119404, "grad_norm": 16.834171295166016, "learning_rate": 9.392006802721088e-06, "loss": 34.0246, "step": 11647 }, { "epoch": 277.33432835820895, "grad_norm": 15.806410789489746, "learning_rate": 9.391156462585035e-06, "loss": 33.9759, "step": 11648 }, { "epoch": 277.35820895522386, "grad_norm": 16.6479549407959, "learning_rate": 9.39030612244898e-06, "loss": 35.1363, "step": 11649 }, { "epoch": 277.3820895522388, "grad_norm": 20.180774688720703, "learning_rate": 9.389455782312926e-06, "loss": 35.4411, "step": 11650 }, { "epoch": 277.40597014925373, "grad_norm": 18.839466094970703, "learning_rate": 9.388605442176871e-06, "loss": 34.8252, "step": 11651 }, { "epoch": 277.42985074626864, "grad_norm": 15.500970840454102, "learning_rate": 9.387755102040818e-06, "loss": 35.407, "step": 11652 }, { "epoch": 277.4537313432836, "grad_norm": 20.913833618164062, "learning_rate": 9.386904761904763e-06, "loss": 34.4692, "step": 11653 }, { "epoch": 277.4776119402985, "grad_norm": 14.663201332092285, "learning_rate": 9.386054421768708e-06, "loss": 34.8329, "step": 11654 }, { "epoch": 277.5014925373134, "grad_norm": 19.904327392578125, "learning_rate": 9.385204081632653e-06, "loss": 34.5005, "step": 11655 }, { "epoch": 277.52537313432833, "grad_norm": 16.9540958404541, "learning_rate": 9.384353741496599e-06, "loss": 34.4333, "step": 11656 }, { "epoch": 277.5492537313433, "grad_norm": 17.932445526123047, "learning_rate": 9.383503401360545e-06, "loss": 35.8671, "step": 11657 }, { "epoch": 277.5731343283582, "grad_norm": 17.281888961791992, "learning_rate": 9.38265306122449e-06, "loss": 34.37, "step": 11658 }, { "epoch": 277.5970149253731, "grad_norm": 16.86711883544922, "learning_rate": 9.381802721088436e-06, "loss": 33.9623, "step": 11659 }, { "epoch": 277.6208955223881, "grad_norm": 19.224029541015625, "learning_rate": 9.380952380952381e-06, "loss": 35.3118, "step": 11660 }, { "epoch": 277.644776119403, "grad_norm": 14.79770278930664, "learning_rate": 9.380102040816326e-06, "loss": 34.5133, "step": 11661 }, { "epoch": 277.6686567164179, "grad_norm": 19.909706115722656, "learning_rate": 9.379251700680273e-06, "loss": 34.6881, "step": 11662 }, { "epoch": 277.6925373134328, "grad_norm": 16.60921287536621, "learning_rate": 9.378401360544218e-06, "loss": 33.7788, "step": 11663 }, { "epoch": 277.7164179104478, "grad_norm": 15.984688758850098, "learning_rate": 9.377551020408164e-06, "loss": 35.8545, "step": 11664 }, { "epoch": 277.7402985074627, "grad_norm": 15.258697509765625, "learning_rate": 9.376700680272109e-06, "loss": 34.3763, "step": 11665 }, { "epoch": 277.7641791044776, "grad_norm": 18.777162551879883, "learning_rate": 9.375850340136056e-06, "loss": 34.5487, "step": 11666 }, { "epoch": 277.78805970149256, "grad_norm": 17.217514038085938, "learning_rate": 9.375000000000001e-06, "loss": 34.7693, "step": 11667 }, { "epoch": 277.81194029850747, "grad_norm": 16.63068389892578, "learning_rate": 9.374149659863946e-06, "loss": 35.4646, "step": 11668 }, { "epoch": 277.8358208955224, "grad_norm": 16.24852752685547, "learning_rate": 9.373299319727891e-06, "loss": 36.0635, "step": 11669 }, { "epoch": 277.85970149253734, "grad_norm": 22.07871437072754, "learning_rate": 9.372448979591836e-06, "loss": 35.4148, "step": 11670 }, { "epoch": 277.88358208955225, "grad_norm": 16.83747673034668, "learning_rate": 9.371598639455783e-06, "loss": 34.9176, "step": 11671 }, { "epoch": 277.90746268656716, "grad_norm": 19.011245727539062, "learning_rate": 9.370748299319729e-06, "loss": 35.0717, "step": 11672 }, { "epoch": 277.93134328358207, "grad_norm": 18.960601806640625, "learning_rate": 9.369897959183675e-06, "loss": 35.5138, "step": 11673 }, { "epoch": 277.95522388059703, "grad_norm": 16.586849212646484, "learning_rate": 9.36904761904762e-06, "loss": 35.1342, "step": 11674 }, { "epoch": 277.97910447761194, "grad_norm": 26.47035789489746, "learning_rate": 9.368197278911564e-06, "loss": 35.3914, "step": 11675 }, { "epoch": 278.0, "grad_norm": 15.833699226379395, "learning_rate": 9.367346938775511e-06, "loss": 29.716, "step": 11676 }, { "epoch": 278.0238805970149, "grad_norm": 21.678312301635742, "learning_rate": 9.366496598639456e-06, "loss": 35.6104, "step": 11677 }, { "epoch": 278.0477611940299, "grad_norm": 18.99468421936035, "learning_rate": 9.365646258503403e-06, "loss": 35.0982, "step": 11678 }, { "epoch": 278.0716417910448, "grad_norm": 17.7320556640625, "learning_rate": 9.364795918367348e-06, "loss": 35.9922, "step": 11679 }, { "epoch": 278.0955223880597, "grad_norm": 21.59501838684082, "learning_rate": 9.363945578231294e-06, "loss": 35.4113, "step": 11680 }, { "epoch": 278.1194029850746, "grad_norm": 17.88007164001465, "learning_rate": 9.363095238095239e-06, "loss": 36.0974, "step": 11681 }, { "epoch": 278.14328358208957, "grad_norm": 17.10157585144043, "learning_rate": 9.362244897959184e-06, "loss": 32.7194, "step": 11682 }, { "epoch": 278.1671641791045, "grad_norm": 27.03644561767578, "learning_rate": 9.361394557823131e-06, "loss": 34.95, "step": 11683 }, { "epoch": 278.1910447761194, "grad_norm": 15.274543762207031, "learning_rate": 9.360544217687076e-06, "loss": 34.1545, "step": 11684 }, { "epoch": 278.21492537313435, "grad_norm": 24.165719985961914, "learning_rate": 9.359693877551021e-06, "loss": 35.3858, "step": 11685 }, { "epoch": 278.23880597014926, "grad_norm": 17.6593017578125, "learning_rate": 9.358843537414966e-06, "loss": 34.9821, "step": 11686 }, { "epoch": 278.26268656716417, "grad_norm": 19.354694366455078, "learning_rate": 9.357993197278913e-06, "loss": 34.3421, "step": 11687 }, { "epoch": 278.28656716417913, "grad_norm": 16.19529151916504, "learning_rate": 9.357142857142859e-06, "loss": 34.8048, "step": 11688 }, { "epoch": 278.31044776119404, "grad_norm": 19.56124496459961, "learning_rate": 9.356292517006804e-06, "loss": 34.841, "step": 11689 }, { "epoch": 278.33432835820895, "grad_norm": 16.454235076904297, "learning_rate": 9.355442176870749e-06, "loss": 35.5673, "step": 11690 }, { "epoch": 278.35820895522386, "grad_norm": 18.578588485717773, "learning_rate": 9.354591836734694e-06, "loss": 35.3196, "step": 11691 }, { "epoch": 278.3820895522388, "grad_norm": 20.52625274658203, "learning_rate": 9.353741496598641e-06, "loss": 35.4304, "step": 11692 }, { "epoch": 278.40597014925373, "grad_norm": 15.972278594970703, "learning_rate": 9.352891156462586e-06, "loss": 35.0974, "step": 11693 }, { "epoch": 278.42985074626864, "grad_norm": 15.163324356079102, "learning_rate": 9.352040816326531e-06, "loss": 33.9505, "step": 11694 }, { "epoch": 278.4537313432836, "grad_norm": 16.178781509399414, "learning_rate": 9.351190476190477e-06, "loss": 33.7124, "step": 11695 }, { "epoch": 278.4776119402985, "grad_norm": 22.499116897583008, "learning_rate": 9.350340136054422e-06, "loss": 35.1534, "step": 11696 }, { "epoch": 278.5014925373134, "grad_norm": 16.468137741088867, "learning_rate": 9.349489795918369e-06, "loss": 35.167, "step": 11697 }, { "epoch": 278.52537313432833, "grad_norm": 17.90869140625, "learning_rate": 9.348639455782314e-06, "loss": 34.582, "step": 11698 }, { "epoch": 278.5492537313433, "grad_norm": 17.6704044342041, "learning_rate": 9.347789115646259e-06, "loss": 33.1312, "step": 11699 }, { "epoch": 278.5731343283582, "grad_norm": 22.600032806396484, "learning_rate": 9.346938775510204e-06, "loss": 35.5801, "step": 11700 }, { "epoch": 278.5970149253731, "grad_norm": 14.013952255249023, "learning_rate": 9.34608843537415e-06, "loss": 34.7605, "step": 11701 }, { "epoch": 278.6208955223881, "grad_norm": 31.57529067993164, "learning_rate": 9.345238095238096e-06, "loss": 34.4726, "step": 11702 }, { "epoch": 278.644776119403, "grad_norm": 22.25636100769043, "learning_rate": 9.344387755102042e-06, "loss": 35.6146, "step": 11703 }, { "epoch": 278.6686567164179, "grad_norm": 24.68794059753418, "learning_rate": 9.343537414965987e-06, "loss": 35.6026, "step": 11704 }, { "epoch": 278.6925373134328, "grad_norm": 21.93093490600586, "learning_rate": 9.342687074829932e-06, "loss": 34.4607, "step": 11705 }, { "epoch": 278.7164179104478, "grad_norm": 22.90749740600586, "learning_rate": 9.341836734693879e-06, "loss": 35.0604, "step": 11706 }, { "epoch": 278.7402985074627, "grad_norm": 23.237947463989258, "learning_rate": 9.340986394557824e-06, "loss": 32.8543, "step": 11707 }, { "epoch": 278.7641791044776, "grad_norm": 17.744321823120117, "learning_rate": 9.34013605442177e-06, "loss": 34.201, "step": 11708 }, { "epoch": 278.78805970149256, "grad_norm": 34.52104568481445, "learning_rate": 9.339285714285715e-06, "loss": 35.2452, "step": 11709 }, { "epoch": 278.81194029850747, "grad_norm": 25.336421966552734, "learning_rate": 9.33843537414966e-06, "loss": 34.8533, "step": 11710 }, { "epoch": 278.8358208955224, "grad_norm": 26.732851028442383, "learning_rate": 9.337585034013607e-06, "loss": 34.9734, "step": 11711 }, { "epoch": 278.85970149253734, "grad_norm": 22.55652618408203, "learning_rate": 9.336734693877552e-06, "loss": 34.6144, "step": 11712 }, { "epoch": 278.88358208955225, "grad_norm": 27.771093368530273, "learning_rate": 9.335884353741497e-06, "loss": 34.7722, "step": 11713 }, { "epoch": 278.90746268656716, "grad_norm": 17.773391723632812, "learning_rate": 9.335034013605442e-06, "loss": 33.6954, "step": 11714 }, { "epoch": 278.93134328358207, "grad_norm": 21.976579666137695, "learning_rate": 9.334183673469387e-06, "loss": 34.4679, "step": 11715 }, { "epoch": 278.95522388059703, "grad_norm": 20.813447952270508, "learning_rate": 9.333333333333334e-06, "loss": 34.7549, "step": 11716 }, { "epoch": 278.97910447761194, "grad_norm": 16.850330352783203, "learning_rate": 9.33248299319728e-06, "loss": 34.9686, "step": 11717 }, { "epoch": 279.0, "grad_norm": 19.154523849487305, "learning_rate": 9.331632653061225e-06, "loss": 30.0758, "step": 11718 }, { "epoch": 279.0238805970149, "grad_norm": 20.489709854125977, "learning_rate": 9.33078231292517e-06, "loss": 34.4739, "step": 11719 }, { "epoch": 279.0477611940299, "grad_norm": 16.187599182128906, "learning_rate": 9.329931972789117e-06, "loss": 34.3763, "step": 11720 }, { "epoch": 279.0716417910448, "grad_norm": 29.30412483215332, "learning_rate": 9.329081632653062e-06, "loss": 35.046, "step": 11721 }, { "epoch": 279.0955223880597, "grad_norm": 17.451623916625977, "learning_rate": 9.328231292517007e-06, "loss": 34.2565, "step": 11722 }, { "epoch": 279.1194029850746, "grad_norm": 25.524045944213867, "learning_rate": 9.327380952380954e-06, "loss": 34.9649, "step": 11723 }, { "epoch": 279.14328358208957, "grad_norm": 20.078102111816406, "learning_rate": 9.326530612244898e-06, "loss": 33.2472, "step": 11724 }, { "epoch": 279.1671641791045, "grad_norm": 24.161420822143555, "learning_rate": 9.325680272108845e-06, "loss": 33.3308, "step": 11725 }, { "epoch": 279.1910447761194, "grad_norm": 22.15292739868164, "learning_rate": 9.32482993197279e-06, "loss": 35.3597, "step": 11726 }, { "epoch": 279.21492537313435, "grad_norm": 20.223554611206055, "learning_rate": 9.323979591836737e-06, "loss": 34.2928, "step": 11727 }, { "epoch": 279.23880597014926, "grad_norm": 23.808414459228516, "learning_rate": 9.323129251700682e-06, "loss": 34.2218, "step": 11728 }, { "epoch": 279.26268656716417, "grad_norm": 15.967622756958008, "learning_rate": 9.322278911564627e-06, "loss": 36.2048, "step": 11729 }, { "epoch": 279.28656716417913, "grad_norm": 25.359920501708984, "learning_rate": 9.321428571428572e-06, "loss": 34.3644, "step": 11730 }, { "epoch": 279.31044776119404, "grad_norm": 19.8580265045166, "learning_rate": 9.320578231292517e-06, "loss": 35.0638, "step": 11731 }, { "epoch": 279.33432835820895, "grad_norm": 17.405927658081055, "learning_rate": 9.319727891156464e-06, "loss": 33.4174, "step": 11732 }, { "epoch": 279.35820895522386, "grad_norm": 26.111482620239258, "learning_rate": 9.31887755102041e-06, "loss": 32.7717, "step": 11733 }, { "epoch": 279.3820895522388, "grad_norm": 17.716827392578125, "learning_rate": 9.318027210884355e-06, "loss": 34.6962, "step": 11734 }, { "epoch": 279.40597014925373, "grad_norm": 26.403427124023438, "learning_rate": 9.3171768707483e-06, "loss": 34.4502, "step": 11735 }, { "epoch": 279.42985074626864, "grad_norm": 20.0697021484375, "learning_rate": 9.316326530612245e-06, "loss": 35.182, "step": 11736 }, { "epoch": 279.4537313432836, "grad_norm": 25.31626319885254, "learning_rate": 9.315476190476192e-06, "loss": 33.6727, "step": 11737 }, { "epoch": 279.4776119402985, "grad_norm": 17.396921157836914, "learning_rate": 9.314625850340137e-06, "loss": 34.9511, "step": 11738 }, { "epoch": 279.5014925373134, "grad_norm": 18.441740036010742, "learning_rate": 9.313775510204082e-06, "loss": 34.894, "step": 11739 }, { "epoch": 279.52537313432833, "grad_norm": 26.152395248413086, "learning_rate": 9.312925170068028e-06, "loss": 34.4218, "step": 11740 }, { "epoch": 279.5492537313433, "grad_norm": 17.21263313293457, "learning_rate": 9.312074829931974e-06, "loss": 34.8912, "step": 11741 }, { "epoch": 279.5731343283582, "grad_norm": 29.952415466308594, "learning_rate": 9.31122448979592e-06, "loss": 35.3389, "step": 11742 }, { "epoch": 279.5970149253731, "grad_norm": 19.48375129699707, "learning_rate": 9.310374149659865e-06, "loss": 34.5897, "step": 11743 }, { "epoch": 279.6208955223881, "grad_norm": 30.186059951782227, "learning_rate": 9.30952380952381e-06, "loss": 34.9371, "step": 11744 }, { "epoch": 279.644776119403, "grad_norm": 21.69789695739746, "learning_rate": 9.308673469387755e-06, "loss": 35.4772, "step": 11745 }, { "epoch": 279.6686567164179, "grad_norm": 26.690025329589844, "learning_rate": 9.307823129251702e-06, "loss": 35.0228, "step": 11746 }, { "epoch": 279.6925373134328, "grad_norm": 20.931751251220703, "learning_rate": 9.306972789115647e-06, "loss": 35.1266, "step": 11747 }, { "epoch": 279.7164179104478, "grad_norm": 20.397497177124023, "learning_rate": 9.306122448979593e-06, "loss": 34.6833, "step": 11748 }, { "epoch": 279.7402985074627, "grad_norm": 19.892305374145508, "learning_rate": 9.305272108843538e-06, "loss": 35.8163, "step": 11749 }, { "epoch": 279.7641791044776, "grad_norm": 17.510250091552734, "learning_rate": 9.304421768707483e-06, "loss": 35.9888, "step": 11750 }, { "epoch": 279.78805970149256, "grad_norm": 14.105299949645996, "learning_rate": 9.30357142857143e-06, "loss": 35.3041, "step": 11751 }, { "epoch": 279.81194029850747, "grad_norm": 19.23472785949707, "learning_rate": 9.302721088435375e-06, "loss": 34.6608, "step": 11752 }, { "epoch": 279.8358208955224, "grad_norm": 15.51583194732666, "learning_rate": 9.30187074829932e-06, "loss": 35.311, "step": 11753 }, { "epoch": 279.85970149253734, "grad_norm": 19.868854522705078, "learning_rate": 9.301020408163265e-06, "loss": 34.7399, "step": 11754 }, { "epoch": 279.88358208955225, "grad_norm": 16.499805450439453, "learning_rate": 9.30017006802721e-06, "loss": 34.7259, "step": 11755 }, { "epoch": 279.90746268656716, "grad_norm": 20.722089767456055, "learning_rate": 9.299319727891158e-06, "loss": 34.8558, "step": 11756 }, { "epoch": 279.93134328358207, "grad_norm": 15.864660263061523, "learning_rate": 9.298469387755103e-06, "loss": 34.9152, "step": 11757 }, { "epoch": 279.95522388059703, "grad_norm": 19.87424087524414, "learning_rate": 9.297619047619048e-06, "loss": 36.4404, "step": 11758 }, { "epoch": 279.97910447761194, "grad_norm": 16.940832138061523, "learning_rate": 9.296768707482993e-06, "loss": 34.991, "step": 11759 }, { "epoch": 280.0, "grad_norm": 18.82170295715332, "learning_rate": 9.29591836734694e-06, "loss": 30.1988, "step": 11760 }, { "epoch": 280.0, "step": 11760, "total_flos": 5.781277428825138e+17, "train_loss": 2.5059400242202137, "train_runtime": 25611.5463, "train_samples_per_second": 58.511, "train_steps_per_second": 0.459 }, { "epoch": 280.0238805970149, "grad_norm": 17.001306533813477, "learning_rate": 1e-05, "loss": 35.8188, "step": 11761 }, { "epoch": 280.0477611940299, "grad_norm": 219.44827270507812, "learning_rate": 9.99920634920635e-06, "loss": 39.925, "step": 11762 }, { "epoch": 280.0716417910448, "grad_norm": 118.753662109375, "learning_rate": 9.998412698412699e-06, "loss": 38.0303, "step": 11763 }, { "epoch": 280.0955223880597, "grad_norm": 56.105350494384766, "learning_rate": 9.997619047619048e-06, "loss": 37.3487, "step": 11764 }, { "epoch": 280.1194029850746, "grad_norm": 42.488067626953125, "learning_rate": 9.996825396825399e-06, "loss": 35.26, "step": 11765 }, { "epoch": 280.14328358208957, "grad_norm": 57.162506103515625, "learning_rate": 9.996031746031746e-06, "loss": 35.7255, "step": 11766 }, { "epoch": 280.1671641791045, "grad_norm": 52.685462951660156, "learning_rate": 9.995238095238095e-06, "loss": 35.8206, "step": 11767 }, { "epoch": 280.1910447761194, "grad_norm": 37.78727340698242, "learning_rate": 9.994444444444446e-06, "loss": 35.517, "step": 11768 }, { "epoch": 280.21492537313435, "grad_norm": 39.62852478027344, "learning_rate": 9.993650793650793e-06, "loss": 34.4672, "step": 11769 }, { "epoch": 280.23880597014926, "grad_norm": 32.611328125, "learning_rate": 9.992857142857144e-06, "loss": 35.009, "step": 11770 }, { "epoch": 280.26268656716417, "grad_norm": 25.22555923461914, "learning_rate": 9.992063492063493e-06, "loss": 34.7649, "step": 11771 }, { "epoch": 280.28656716417913, "grad_norm": 25.276588439941406, "learning_rate": 9.991269841269842e-06, "loss": 35.933, "step": 11772 }, { "epoch": 280.31044776119404, "grad_norm": 27.605308532714844, "learning_rate": 9.990476190476191e-06, "loss": 35.0031, "step": 11773 }, { "epoch": 280.33432835820895, "grad_norm": 24.355487823486328, "learning_rate": 9.98968253968254e-06, "loss": 34.7315, "step": 11774 }, { "epoch": 280.35820895522386, "grad_norm": 20.254823684692383, "learning_rate": 9.98888888888889e-06, "loss": 34.4345, "step": 11775 }, { "epoch": 280.3820895522388, "grad_norm": 17.66265869140625, "learning_rate": 9.988095238095239e-06, "loss": 34.0289, "step": 11776 }, { "epoch": 280.40597014925373, "grad_norm": 17.804201126098633, "learning_rate": 9.987301587301588e-06, "loss": 34.3589, "step": 11777 }, { "epoch": 280.42985074626864, "grad_norm": 16.00823974609375, "learning_rate": 9.986507936507937e-06, "loss": 35.3401, "step": 11778 }, { "epoch": 280.4537313432836, "grad_norm": 19.54131507873535, "learning_rate": 9.985714285714286e-06, "loss": 35.1887, "step": 11779 }, { "epoch": 280.4776119402985, "grad_norm": 14.041351318359375, "learning_rate": 9.984920634920637e-06, "loss": 35.2667, "step": 11780 }, { "epoch": 280.5014925373134, "grad_norm": 20.99547004699707, "learning_rate": 9.984126984126986e-06, "loss": 33.7162, "step": 11781 }, { "epoch": 280.52537313432833, "grad_norm": 18.124479293823242, "learning_rate": 9.983333333333333e-06, "loss": 34.3665, "step": 11782 }, { "epoch": 280.5492537313433, "grad_norm": 19.564178466796875, "learning_rate": 9.982539682539684e-06, "loss": 34.558, "step": 11783 }, { "epoch": 280.5731343283582, "grad_norm": 24.882999420166016, "learning_rate": 9.981746031746033e-06, "loss": 35.1123, "step": 11784 }, { "epoch": 280.5970149253731, "grad_norm": 15.504097938537598, "learning_rate": 9.980952380952382e-06, "loss": 34.8684, "step": 11785 }, { "epoch": 280.6208955223881, "grad_norm": 22.50943374633789, "learning_rate": 9.980158730158731e-06, "loss": 33.4909, "step": 11786 }, { "epoch": 280.644776119403, "grad_norm": 21.798898696899414, "learning_rate": 9.97936507936508e-06, "loss": 35.8089, "step": 11787 }, { "epoch": 280.6686567164179, "grad_norm": 19.085386276245117, "learning_rate": 9.97857142857143e-06, "loss": 35.2489, "step": 11788 }, { "epoch": 280.6925373134328, "grad_norm": 17.447267532348633, "learning_rate": 9.977777777777778e-06, "loss": 36.1361, "step": 11789 }, { "epoch": 280.7164179104478, "grad_norm": 19.983989715576172, "learning_rate": 9.976984126984128e-06, "loss": 34.7194, "step": 11790 }, { "epoch": 280.7402985074627, "grad_norm": 20.92411994934082, "learning_rate": 9.976190476190477e-06, "loss": 33.9958, "step": 11791 }, { "epoch": 280.7641791044776, "grad_norm": 14.108833312988281, "learning_rate": 9.975396825396826e-06, "loss": 34.6663, "step": 11792 }, { "epoch": 280.78805970149256, "grad_norm": 16.30893325805664, "learning_rate": 9.974603174603176e-06, "loss": 35.4368, "step": 11793 }, { "epoch": 280.81194029850747, "grad_norm": 16.229223251342773, "learning_rate": 9.973809523809524e-06, "loss": 34.9174, "step": 11794 }, { "epoch": 280.8358208955224, "grad_norm": 15.053704261779785, "learning_rate": 9.973015873015875e-06, "loss": 34.6009, "step": 11795 }, { "epoch": 280.85970149253734, "grad_norm": 19.644737243652344, "learning_rate": 9.972222222222224e-06, "loss": 34.9756, "step": 11796 }, { "epoch": 280.88358208955225, "grad_norm": NaN, "learning_rate": 9.971428571428571e-06, "loss": 59.2668, "step": 11797 }, { "epoch": 280.90746268656716, "grad_norm": 15.495433807373047, "learning_rate": 9.971428571428571e-06, "loss": 34.7684, "step": 11798 }, { "epoch": 280.93134328358207, "grad_norm": 14.9066162109375, "learning_rate": 9.970634920634922e-06, "loss": 35.7027, "step": 11799 }, { "epoch": 280.95522388059703, "grad_norm": 15.656798362731934, "learning_rate": 9.969841269841271e-06, "loss": 34.5438, "step": 11800 }, { "epoch": 280.97910447761194, "grad_norm": 26.039445877075195, "learning_rate": 9.96904761904762e-06, "loss": 34.1163, "step": 11801 }, { "epoch": 281.0, "grad_norm": 13.834368705749512, "learning_rate": 9.968253968253969e-06, "loss": 29.3649, "step": 11802 }, { "epoch": 281.0238805970149, "grad_norm": 24.26058578491211, "learning_rate": 9.967460317460318e-06, "loss": 35.4255, "step": 11803 }, { "epoch": 281.0477611940299, "grad_norm": 21.886337280273438, "learning_rate": 9.966666666666667e-06, "loss": 35.5066, "step": 11804 }, { "epoch": 281.0716417910448, "grad_norm": 17.188631057739258, "learning_rate": 9.965873015873016e-06, "loss": 35.172, "step": 11805 }, { "epoch": 281.0955223880597, "grad_norm": 26.414350509643555, "learning_rate": 9.965079365079365e-06, "loss": 33.3201, "step": 11806 }, { "epoch": 281.1194029850746, "grad_norm": 18.171688079833984, "learning_rate": 9.964285714285714e-06, "loss": 34.0477, "step": 11807 }, { "epoch": 281.14328358208957, "grad_norm": 31.080293655395508, "learning_rate": 9.963492063492064e-06, "loss": 34.5627, "step": 11808 }, { "epoch": 281.1671641791045, "grad_norm": 21.397998809814453, "learning_rate": 9.962698412698414e-06, "loss": 35.6122, "step": 11809 }, { "epoch": 281.1910447761194, "grad_norm": 23.92205047607422, "learning_rate": 9.961904761904763e-06, "loss": 34.1777, "step": 11810 }, { "epoch": 281.21492537313435, "grad_norm": 22.434926986694336, "learning_rate": 9.96111111111111e-06, "loss": 36.6884, "step": 11811 }, { "epoch": 281.23880597014926, "grad_norm": 18.58843231201172, "learning_rate": 9.960317460317462e-06, "loss": 34.3689, "step": 11812 }, { "epoch": 281.26268656716417, "grad_norm": 22.569429397583008, "learning_rate": 9.95952380952381e-06, "loss": 34.1493, "step": 11813 }, { "epoch": 281.28656716417913, "grad_norm": 18.652563095092773, "learning_rate": 9.95873015873016e-06, "loss": 35.4785, "step": 11814 }, { "epoch": 281.31044776119404, "grad_norm": 18.981735229492188, "learning_rate": 9.957936507936509e-06, "loss": 35.4806, "step": 11815 }, { "epoch": 281.33432835820895, "grad_norm": 22.05530548095703, "learning_rate": 9.957142857142858e-06, "loss": 33.4935, "step": 11816 }, { "epoch": 281.35820895522386, "grad_norm": 15.490934371948242, "learning_rate": 9.956349206349207e-06, "loss": 36.5091, "step": 11817 }, { "epoch": 281.3820895522388, "grad_norm": 29.51089096069336, "learning_rate": 9.955555555555556e-06, "loss": 35.3114, "step": 11818 }, { "epoch": 281.40597014925373, "grad_norm": 21.18665885925293, "learning_rate": 9.954761904761905e-06, "loss": 34.1899, "step": 11819 }, { "epoch": 281.42985074626864, "grad_norm": 26.58310317993164, "learning_rate": 9.953968253968254e-06, "loss": 34.4679, "step": 11820 }, { "epoch": 281.4537313432836, "grad_norm": 18.942975997924805, "learning_rate": 9.953174603174603e-06, "loss": 33.8813, "step": 11821 }, { "epoch": 281.4776119402985, "grad_norm": 20.89089012145996, "learning_rate": 9.952380952380954e-06, "loss": 35.0164, "step": 11822 }, { "epoch": 281.5014925373134, "grad_norm": 27.045583724975586, "learning_rate": 9.951587301587301e-06, "loss": 34.6906, "step": 11823 }, { "epoch": 281.52537313432833, "grad_norm": 17.110809326171875, "learning_rate": 9.950793650793652e-06, "loss": 35.6123, "step": 11824 }, { "epoch": 281.5492537313433, "grad_norm": 22.350217819213867, "learning_rate": 9.950000000000001e-06, "loss": 34.3026, "step": 11825 }, { "epoch": 281.5731343283582, "grad_norm": 19.359451293945312, "learning_rate": 9.94920634920635e-06, "loss": 34.8374, "step": 11826 }, { "epoch": 281.5970149253731, "grad_norm": 17.76999855041504, "learning_rate": 9.9484126984127e-06, "loss": 35.6175, "step": 11827 }, { "epoch": 281.6208955223881, "grad_norm": 29.642032623291016, "learning_rate": 9.947619047619049e-06, "loss": 33.6645, "step": 11828 }, { "epoch": 281.644776119403, "grad_norm": 19.500289916992188, "learning_rate": 9.946825396825398e-06, "loss": 34.8721, "step": 11829 }, { "epoch": 281.6686567164179, "grad_norm": 25.8712215423584, "learning_rate": 9.946031746031747e-06, "loss": 33.729, "step": 11830 }, { "epoch": 281.6925373134328, "grad_norm": 19.624027252197266, "learning_rate": 9.945238095238096e-06, "loss": 33.6072, "step": 11831 }, { "epoch": 281.7164179104478, "grad_norm": 30.332162857055664, "learning_rate": 9.944444444444445e-06, "loss": 35.7628, "step": 11832 }, { "epoch": 281.7402985074627, "grad_norm": 19.511499404907227, "learning_rate": 9.943650793650794e-06, "loss": 35.0269, "step": 11833 }, { "epoch": 281.7641791044776, "grad_norm": 27.628700256347656, "learning_rate": 9.942857142857145e-06, "loss": 34.9568, "step": 11834 }, { "epoch": 281.78805970149256, "grad_norm": 24.200483322143555, "learning_rate": 9.942063492063492e-06, "loss": 34.3099, "step": 11835 }, { "epoch": 281.81194029850747, "grad_norm": 19.15821647644043, "learning_rate": 9.941269841269841e-06, "loss": 35.4079, "step": 11836 }, { "epoch": 281.8358208955224, "grad_norm": 27.890596389770508, "learning_rate": 9.940476190476192e-06, "loss": 34.0082, "step": 11837 }, { "epoch": 281.85970149253734, "grad_norm": 20.02274513244629, "learning_rate": 9.939682539682541e-06, "loss": 35.1607, "step": 11838 }, { "epoch": 281.88358208955225, "grad_norm": 30.761608123779297, "learning_rate": 9.93888888888889e-06, "loss": 34.1874, "step": 11839 }, { "epoch": 281.90746268656716, "grad_norm": 20.60077667236328, "learning_rate": 9.93809523809524e-06, "loss": 34.3755, "step": 11840 }, { "epoch": 281.93134328358207, "grad_norm": 27.628490447998047, "learning_rate": 9.937301587301588e-06, "loss": 33.6654, "step": 11841 }, { "epoch": 281.95522388059703, "grad_norm": 18.82782745361328, "learning_rate": 9.936507936507937e-06, "loss": 33.9478, "step": 11842 }, { "epoch": 281.97910447761194, "grad_norm": 30.738174438476562, "learning_rate": 9.935714285714286e-06, "loss": 34.7518, "step": 11843 }, { "epoch": 282.0, "grad_norm": 17.398067474365234, "learning_rate": 9.934920634920636e-06, "loss": 29.9627, "step": 11844 }, { "epoch": 282.0238805970149, "grad_norm": 31.6385498046875, "learning_rate": 9.934126984126985e-06, "loss": 34.7136, "step": 11845 }, { "epoch": 282.0477611940299, "grad_norm": 18.935577392578125, "learning_rate": 9.933333333333334e-06, "loss": 34.1334, "step": 11846 }, { "epoch": 282.0716417910448, "grad_norm": 26.17972183227539, "learning_rate": 9.932539682539684e-06, "loss": 34.052, "step": 11847 }, { "epoch": 282.0955223880597, "grad_norm": 20.300334930419922, "learning_rate": 9.931746031746032e-06, "loss": 33.9338, "step": 11848 }, { "epoch": 282.1194029850746, "grad_norm": 22.307647705078125, "learning_rate": 9.930952380952383e-06, "loss": 34.0204, "step": 11849 }, { "epoch": 282.14328358208957, "grad_norm": 23.589879989624023, "learning_rate": 9.930158730158732e-06, "loss": 34.595, "step": 11850 }, { "epoch": 282.1671641791045, "grad_norm": 19.313642501831055, "learning_rate": 9.929365079365079e-06, "loss": 34.9566, "step": 11851 }, { "epoch": 282.1910447761194, "grad_norm": 33.995906829833984, "learning_rate": 9.92857142857143e-06, "loss": 35.4147, "step": 11852 }, { "epoch": 282.21492537313435, "grad_norm": 21.858556747436523, "learning_rate": 9.927777777777779e-06, "loss": 35.2846, "step": 11853 }, { "epoch": 282.23880597014926, "grad_norm": 32.75117874145508, "learning_rate": 9.926984126984128e-06, "loss": 34.3541, "step": 11854 }, { "epoch": 282.26268656716417, "grad_norm": 22.43109893798828, "learning_rate": 9.926190476190477e-06, "loss": 34.3842, "step": 11855 }, { "epoch": 282.28656716417913, "grad_norm": 40.62053298950195, "learning_rate": 9.925396825396826e-06, "loss": 34.3303, "step": 11856 }, { "epoch": 282.31044776119404, "grad_norm": 33.663394927978516, "learning_rate": 9.924603174603175e-06, "loss": 35.9298, "step": 11857 }, { "epoch": 282.33432835820895, "grad_norm": 30.64558982849121, "learning_rate": 9.923809523809524e-06, "loss": 34.546, "step": 11858 }, { "epoch": 282.35820895522386, "grad_norm": 29.543611526489258, "learning_rate": 9.923015873015875e-06, "loss": 34.5038, "step": 11859 }, { "epoch": 282.3820895522388, "grad_norm": 29.16376495361328, "learning_rate": 9.922222222222222e-06, "loss": 35.8454, "step": 11860 }, { "epoch": 282.40597014925373, "grad_norm": 22.400897979736328, "learning_rate": 9.921428571428572e-06, "loss": 33.9821, "step": 11861 }, { "epoch": 282.42985074626864, "grad_norm": 33.786170959472656, "learning_rate": 9.920634920634922e-06, "loss": 35.0368, "step": 11862 }, { "epoch": 282.4537313432836, "grad_norm": 29.242387771606445, "learning_rate": 9.91984126984127e-06, "loss": 34.7123, "step": 11863 }, { "epoch": 282.4776119402985, "grad_norm": 33.90926742553711, "learning_rate": 9.91904761904762e-06, "loss": 35.1735, "step": 11864 }, { "epoch": 282.5014925373134, "grad_norm": 29.547443389892578, "learning_rate": 9.91825396825397e-06, "loss": 33.1211, "step": 11865 }, { "epoch": 282.52537313432833, "grad_norm": 28.972431182861328, "learning_rate": 9.917460317460319e-06, "loss": 34.5466, "step": 11866 }, { "epoch": 282.5492537313433, "grad_norm": 24.426488876342773, "learning_rate": 9.916666666666668e-06, "loss": 34.1576, "step": 11867 }, { "epoch": 282.5731343283582, "grad_norm": 30.063594818115234, "learning_rate": 9.915873015873017e-06, "loss": 33.7799, "step": 11868 }, { "epoch": 282.5970149253731, "grad_norm": 27.062620162963867, "learning_rate": 9.915079365079366e-06, "loss": 34.6464, "step": 11869 }, { "epoch": 282.6208955223881, "grad_norm": 35.88241195678711, "learning_rate": 9.914285714285715e-06, "loss": 34.5536, "step": 11870 }, { "epoch": 282.644776119403, "grad_norm": 30.721355438232422, "learning_rate": 9.913492063492064e-06, "loss": 35.2244, "step": 11871 }, { "epoch": 282.6686567164179, "grad_norm": 26.038280487060547, "learning_rate": 9.912698412698413e-06, "loss": 34.6394, "step": 11872 }, { "epoch": 282.6925373134328, "grad_norm": 23.411800384521484, "learning_rate": 9.911904761904762e-06, "loss": 34.1463, "step": 11873 }, { "epoch": 282.7164179104478, "grad_norm": 34.48252868652344, "learning_rate": 9.911111111111113e-06, "loss": 35.2108, "step": 11874 }, { "epoch": 282.7402985074627, "grad_norm": 26.923267364501953, "learning_rate": 9.910317460317462e-06, "loss": 35.5689, "step": 11875 }, { "epoch": 282.7641791044776, "grad_norm": 32.60765075683594, "learning_rate": 9.90952380952381e-06, "loss": 35.3503, "step": 11876 }, { "epoch": 282.78805970149256, "grad_norm": 29.128307342529297, "learning_rate": 9.90873015873016e-06, "loss": 35.5577, "step": 11877 }, { "epoch": 282.81194029850747, "grad_norm": 26.722400665283203, "learning_rate": 9.90793650793651e-06, "loss": 33.9117, "step": 11878 }, { "epoch": 282.8358208955224, "grad_norm": 24.377363204956055, "learning_rate": 9.907142857142858e-06, "loss": 34.1131, "step": 11879 }, { "epoch": 282.85970149253734, "grad_norm": 31.27817153930664, "learning_rate": 9.906349206349207e-06, "loss": 34.8682, "step": 11880 }, { "epoch": 282.88358208955225, "grad_norm": 25.59556770324707, "learning_rate": 9.905555555555557e-06, "loss": 34.4288, "step": 11881 }, { "epoch": 282.90746268656716, "grad_norm": 31.731334686279297, "learning_rate": 9.904761904761906e-06, "loss": 36.0353, "step": 11882 }, { "epoch": 282.93134328358207, "grad_norm": 27.592634201049805, "learning_rate": 9.903968253968255e-06, "loss": 33.5116, "step": 11883 }, { "epoch": 282.95522388059703, "grad_norm": 30.299158096313477, "learning_rate": 9.903174603174604e-06, "loss": 34.5389, "step": 11884 }, { "epoch": 282.97910447761194, "grad_norm": 25.82707405090332, "learning_rate": 9.902380952380953e-06, "loss": 35.3144, "step": 11885 }, { "epoch": 283.0, "grad_norm": 26.001951217651367, "learning_rate": 9.901587301587302e-06, "loss": 29.8663, "step": 11886 }, { "epoch": 283.0238805970149, "grad_norm": 26.985275268554688, "learning_rate": 9.900793650793653e-06, "loss": 34.5218, "step": 11887 }, { "epoch": 283.0477611940299, "grad_norm": 27.25253677368164, "learning_rate": 9.9e-06, "loss": 34.0899, "step": 11888 }, { "epoch": 283.0716417910448, "grad_norm": 27.99782943725586, "learning_rate": 9.89920634920635e-06, "loss": 34.6255, "step": 11889 }, { "epoch": 283.0955223880597, "grad_norm": 31.501266479492188, "learning_rate": 9.8984126984127e-06, "loss": 34.5021, "step": 11890 }, { "epoch": 283.1194029850746, "grad_norm": 25.399730682373047, "learning_rate": 9.897619047619047e-06, "loss": 35.1958, "step": 11891 }, { "epoch": 283.14328358208957, "grad_norm": 33.786006927490234, "learning_rate": 9.896825396825398e-06, "loss": 35.4105, "step": 11892 }, { "epoch": 283.1671641791045, "grad_norm": 27.10110092163086, "learning_rate": 9.896031746031747e-06, "loss": 34.4679, "step": 11893 }, { "epoch": 283.1910447761194, "grad_norm": 30.048660278320312, "learning_rate": 9.895238095238096e-06, "loss": 34.3342, "step": 11894 }, { "epoch": 283.21492537313435, "grad_norm": 28.051061630249023, "learning_rate": 9.894444444444445e-06, "loss": 33.9177, "step": 11895 }, { "epoch": 283.23880597014926, "grad_norm": 24.222515106201172, "learning_rate": 9.893650793650794e-06, "loss": 34.614, "step": 11896 }, { "epoch": 283.26268656716417, "grad_norm": 22.9919490814209, "learning_rate": 9.892857142857143e-06, "loss": 35.0559, "step": 11897 }, { "epoch": 283.28656716417913, "grad_norm": 24.69420051574707, "learning_rate": 9.892063492063493e-06, "loss": 34.9936, "step": 11898 }, { "epoch": 283.31044776119404, "grad_norm": 21.76444435119629, "learning_rate": 9.891269841269842e-06, "loss": 34.3498, "step": 11899 }, { "epoch": 283.33432835820895, "grad_norm": 29.178211212158203, "learning_rate": 9.89047619047619e-06, "loss": 35.2396, "step": 11900 }, { "epoch": 283.35820895522386, "grad_norm": 24.143978118896484, "learning_rate": 9.88968253968254e-06, "loss": 34.9166, "step": 11901 }, { "epoch": 283.3820895522388, "grad_norm": 30.864849090576172, "learning_rate": 9.88888888888889e-06, "loss": 33.5803, "step": 11902 }, { "epoch": 283.40597014925373, "grad_norm": 23.996366500854492, "learning_rate": 9.88809523809524e-06, "loss": 35.4132, "step": 11903 }, { "epoch": 283.42985074626864, "grad_norm": 28.13095474243164, "learning_rate": 9.887301587301587e-06, "loss": 33.4004, "step": 11904 }, { "epoch": 283.4537313432836, "grad_norm": 24.495094299316406, "learning_rate": 9.886507936507938e-06, "loss": 35.3223, "step": 11905 }, { "epoch": 283.4776119402985, "grad_norm": 25.5897216796875, "learning_rate": 9.885714285714287e-06, "loss": 34.8353, "step": 11906 }, { "epoch": 283.5014925373134, "grad_norm": 19.90247344970703, "learning_rate": 9.884920634920636e-06, "loss": 35.1199, "step": 11907 }, { "epoch": 283.52537313432833, "grad_norm": 27.842309951782227, "learning_rate": 9.884126984126985e-06, "loss": 35.7175, "step": 11908 }, { "epoch": 283.5492537313433, "grad_norm": 22.010128021240234, "learning_rate": 9.883333333333334e-06, "loss": 34.0129, "step": 11909 }, { "epoch": 283.5731343283582, "grad_norm": 29.78243637084961, "learning_rate": 9.882539682539683e-06, "loss": 35.3333, "step": 11910 }, { "epoch": 283.5970149253731, "grad_norm": 25.89198112487793, "learning_rate": 9.881746031746032e-06, "loss": 33.2787, "step": 11911 }, { "epoch": 283.6208955223881, "grad_norm": 25.66374969482422, "learning_rate": 9.880952380952381e-06, "loss": 35.565, "step": 11912 }, { "epoch": 283.644776119403, "grad_norm": 23.362863540649414, "learning_rate": 9.88015873015873e-06, "loss": 32.2217, "step": 11913 }, { "epoch": 283.6686567164179, "grad_norm": 23.376296997070312, "learning_rate": 9.87936507936508e-06, "loss": 35.0079, "step": 11914 }, { "epoch": 283.6925373134328, "grad_norm": 21.015687942504883, "learning_rate": 9.87857142857143e-06, "loss": 34.5278, "step": 11915 }, { "epoch": 283.7164179104478, "grad_norm": 26.780033111572266, "learning_rate": 9.877777777777778e-06, "loss": 35.5065, "step": 11916 }, { "epoch": 283.7402985074627, "grad_norm": 21.850831985473633, "learning_rate": 9.876984126984128e-06, "loss": 34.1275, "step": 11917 }, { "epoch": 283.7641791044776, "grad_norm": 24.24028778076172, "learning_rate": 9.876190476190478e-06, "loss": 33.9131, "step": 11918 }, { "epoch": 283.78805970149256, "grad_norm": 20.287803649902344, "learning_rate": 9.875396825396825e-06, "loss": 34.0144, "step": 11919 }, { "epoch": 283.81194029850747, "grad_norm": 25.329547882080078, "learning_rate": 9.874603174603176e-06, "loss": 34.1357, "step": 11920 }, { "epoch": 283.8358208955224, "grad_norm": 23.08452033996582, "learning_rate": 9.873809523809525e-06, "loss": 35.355, "step": 11921 }, { "epoch": 283.85970149253734, "grad_norm": 23.316389083862305, "learning_rate": 9.873015873015874e-06, "loss": 35.2747, "step": 11922 }, { "epoch": 283.88358208955225, "grad_norm": 20.081113815307617, "learning_rate": 9.872222222222223e-06, "loss": 35.2304, "step": 11923 }, { "epoch": 283.90746268656716, "grad_norm": 23.772554397583008, "learning_rate": 9.871428571428572e-06, "loss": 34.5178, "step": 11924 }, { "epoch": 283.93134328358207, "grad_norm": 21.754993438720703, "learning_rate": 9.870634920634921e-06, "loss": 35.8339, "step": 11925 }, { "epoch": 283.95522388059703, "grad_norm": 21.76508331298828, "learning_rate": 9.86984126984127e-06, "loss": 33.1081, "step": 11926 }, { "epoch": 283.97910447761194, "grad_norm": 19.88758659362793, "learning_rate": 9.869047619047621e-06, "loss": 34.8188, "step": 11927 }, { "epoch": 284.0, "grad_norm": 21.30535125732422, "learning_rate": 9.868253968253968e-06, "loss": 30.312, "step": 11928 }, { "epoch": 284.0238805970149, "grad_norm": 22.3480224609375, "learning_rate": 9.867460317460317e-06, "loss": 34.8848, "step": 11929 }, { "epoch": 284.0477611940299, "grad_norm": 20.421735763549805, "learning_rate": 9.866666666666668e-06, "loss": 34.2704, "step": 11930 }, { "epoch": 284.0716417910448, "grad_norm": 20.93566131591797, "learning_rate": 9.865873015873017e-06, "loss": 35.1359, "step": 11931 }, { "epoch": 284.0955223880597, "grad_norm": 22.134910583496094, "learning_rate": 9.865079365079366e-06, "loss": 34.983, "step": 11932 }, { "epoch": 284.1194029850746, "grad_norm": 19.56775665283203, "learning_rate": 9.864285714285715e-06, "loss": 35.5462, "step": 11933 }, { "epoch": 284.14328358208957, "grad_norm": 20.169052124023438, "learning_rate": 9.863492063492065e-06, "loss": 33.3168, "step": 11934 }, { "epoch": 284.1671641791045, "grad_norm": 17.79511070251465, "learning_rate": 9.862698412698414e-06, "loss": 34.0852, "step": 11935 }, { "epoch": 284.1910447761194, "grad_norm": 22.726470947265625, "learning_rate": 9.861904761904763e-06, "loss": 34.2328, "step": 11936 }, { "epoch": 284.21492537313435, "grad_norm": 20.804378509521484, "learning_rate": 9.861111111111112e-06, "loss": 34.3468, "step": 11937 }, { "epoch": 284.23880597014926, "grad_norm": 19.055212020874023, "learning_rate": 9.86031746031746e-06, "loss": 34.7014, "step": 11938 }, { "epoch": 284.26268656716417, "grad_norm": 20.315317153930664, "learning_rate": 9.85952380952381e-06, "loss": 34.5101, "step": 11939 }, { "epoch": 284.28656716417913, "grad_norm": 16.841644287109375, "learning_rate": 9.858730158730159e-06, "loss": 33.8631, "step": 11940 }, { "epoch": 284.31044776119404, "grad_norm": 20.81892967224121, "learning_rate": 9.857936507936508e-06, "loss": 33.777, "step": 11941 }, { "epoch": 284.33432835820895, "grad_norm": 20.917980194091797, "learning_rate": 9.857142857142859e-06, "loss": 35.4712, "step": 11942 }, { "epoch": 284.35820895522386, "grad_norm": 18.239110946655273, "learning_rate": 9.856349206349208e-06, "loss": 34.7417, "step": 11943 }, { "epoch": 284.3820895522388, "grad_norm": 18.71514320373535, "learning_rate": 9.855555555555555e-06, "loss": 35.0344, "step": 11944 }, { "epoch": 284.40597014925373, "grad_norm": 16.92817497253418, "learning_rate": 9.854761904761906e-06, "loss": 35.4628, "step": 11945 }, { "epoch": 284.42985074626864, "grad_norm": 17.445419311523438, "learning_rate": 9.853968253968255e-06, "loss": 34.7541, "step": 11946 }, { "epoch": 284.4537313432836, "grad_norm": 12.863101959228516, "learning_rate": 9.853174603174604e-06, "loss": 34.9531, "step": 11947 }, { "epoch": 284.4776119402985, "grad_norm": 18.064464569091797, "learning_rate": 9.852380952380953e-06, "loss": 34.7739, "step": 11948 }, { "epoch": 284.5014925373134, "grad_norm": 18.38424301147461, "learning_rate": 9.851587301587302e-06, "loss": 34.4741, "step": 11949 }, { "epoch": 284.52537313432833, "grad_norm": 18.44971466064453, "learning_rate": 9.850793650793651e-06, "loss": 34.9774, "step": 11950 }, { "epoch": 284.5492537313433, "grad_norm": 15.091837882995605, "learning_rate": 9.85e-06, "loss": 34.9954, "step": 11951 }, { "epoch": 284.5731343283582, "grad_norm": 19.42574119567871, "learning_rate": 9.849206349206351e-06, "loss": 34.1234, "step": 11952 }, { "epoch": 284.5970149253731, "grad_norm": 19.454084396362305, "learning_rate": 9.848412698412699e-06, "loss": 34.8341, "step": 11953 }, { "epoch": 284.6208955223881, "grad_norm": 17.355350494384766, "learning_rate": 9.847619047619048e-06, "loss": 34.5434, "step": 11954 }, { "epoch": 284.644776119403, "grad_norm": 16.38776969909668, "learning_rate": 9.846825396825399e-06, "loss": 33.9971, "step": 11955 }, { "epoch": 284.6686567164179, "grad_norm": 17.23564338684082, "learning_rate": 9.846031746031746e-06, "loss": 35.3131, "step": 11956 }, { "epoch": 284.6925373134328, "grad_norm": 15.807302474975586, "learning_rate": 9.845238095238097e-06, "loss": 33.4787, "step": 11957 }, { "epoch": 284.7164179104478, "grad_norm": 19.17428207397461, "learning_rate": 9.844444444444446e-06, "loss": 34.3309, "step": 11958 }, { "epoch": 284.7402985074627, "grad_norm": 18.40250015258789, "learning_rate": 9.843650793650795e-06, "loss": 34.6634, "step": 11959 }, { "epoch": 284.7641791044776, "grad_norm": 18.37040138244629, "learning_rate": 9.842857142857144e-06, "loss": 34.5341, "step": 11960 }, { "epoch": 284.78805970149256, "grad_norm": 15.528295516967773, "learning_rate": 9.842063492063493e-06, "loss": 35.147, "step": 11961 }, { "epoch": 284.81194029850747, "grad_norm": 19.036741256713867, "learning_rate": 9.841269841269842e-06, "loss": 33.4691, "step": 11962 }, { "epoch": 284.8358208955224, "grad_norm": 17.90913200378418, "learning_rate": 9.840476190476191e-06, "loss": 34.1951, "step": 11963 }, { "epoch": 284.85970149253734, "grad_norm": 21.4915828704834, "learning_rate": 9.83968253968254e-06, "loss": 35.1062, "step": 11964 }, { "epoch": 284.88358208955225, "grad_norm": 16.04237174987793, "learning_rate": 9.83888888888889e-06, "loss": 33.4108, "step": 11965 }, { "epoch": 284.90746268656716, "grad_norm": 19.474821090698242, "learning_rate": 9.838095238095238e-06, "loss": 34.4099, "step": 11966 }, { "epoch": 284.93134328358207, "grad_norm": 20.65281105041504, "learning_rate": 9.837301587301588e-06, "loss": 34.8545, "step": 11967 }, { "epoch": 284.95522388059703, "grad_norm": 16.95366096496582, "learning_rate": 9.836507936507937e-06, "loss": 34.9288, "step": 11968 }, { "epoch": 284.97910447761194, "grad_norm": 17.942453384399414, "learning_rate": 9.835714285714286e-06, "loss": 36.3266, "step": 11969 }, { "epoch": 285.0, "grad_norm": 18.859390258789062, "learning_rate": 9.834920634920636e-06, "loss": 30.4459, "step": 11970 }, { "epoch": 285.0238805970149, "grad_norm": 16.060823440551758, "learning_rate": 9.834126984126986e-06, "loss": 34.7412, "step": 11971 }, { "epoch": 285.0477611940299, "grad_norm": 22.028331756591797, "learning_rate": 9.833333333333333e-06, "loss": 33.4896, "step": 11972 }, { "epoch": 285.0716417910448, "grad_norm": 24.3582820892334, "learning_rate": 9.832539682539684e-06, "loss": 34.7248, "step": 11973 }, { "epoch": 285.0955223880597, "grad_norm": 17.361093521118164, "learning_rate": 9.831746031746033e-06, "loss": 35.0884, "step": 11974 }, { "epoch": 285.1194029850746, "grad_norm": 21.988372802734375, "learning_rate": 9.830952380952382e-06, "loss": 35.6549, "step": 11975 }, { "epoch": 285.14328358208957, "grad_norm": 19.694623947143555, "learning_rate": 9.830158730158731e-06, "loss": 34.1964, "step": 11976 }, { "epoch": 285.1671641791045, "grad_norm": 16.763029098510742, "learning_rate": 9.82936507936508e-06, "loss": 33.226, "step": 11977 }, { "epoch": 285.1910447761194, "grad_norm": 18.90792465209961, "learning_rate": 9.828571428571429e-06, "loss": 35.8647, "step": 11978 }, { "epoch": 285.21492537313435, "grad_norm": 21.621692657470703, "learning_rate": 9.827777777777778e-06, "loss": 35.1498, "step": 11979 }, { "epoch": 285.23880597014926, "grad_norm": 15.991765975952148, "learning_rate": 9.826984126984129e-06, "loss": 34.9035, "step": 11980 }, { "epoch": 285.26268656716417, "grad_norm": 18.409725189208984, "learning_rate": 9.826190476190476e-06, "loss": 34.2189, "step": 11981 }, { "epoch": 285.28656716417913, "grad_norm": 23.432588577270508, "learning_rate": 9.825396825396825e-06, "loss": 35.0388, "step": 11982 }, { "epoch": 285.31044776119404, "grad_norm": 16.48472785949707, "learning_rate": 9.824603174603176e-06, "loss": 34.3214, "step": 11983 }, { "epoch": 285.33432835820895, "grad_norm": 25.051328659057617, "learning_rate": 9.823809523809524e-06, "loss": 34.3483, "step": 11984 }, { "epoch": 285.35820895522386, "grad_norm": 18.279516220092773, "learning_rate": 9.823015873015874e-06, "loss": 34.3283, "step": 11985 }, { "epoch": 285.3820895522388, "grad_norm": 18.019378662109375, "learning_rate": 9.822222222222223e-06, "loss": 34.4974, "step": 11986 }, { "epoch": 285.40597014925373, "grad_norm": NaN, "learning_rate": 9.821428571428573e-06, "loss": 48.2549, "step": 11987 }, { "epoch": 285.42985074626864, "grad_norm": 22.311250686645508, "learning_rate": 9.821428571428573e-06, "loss": 34.9845, "step": 11988 }, { "epoch": 285.4537313432836, "grad_norm": 19.11435890197754, "learning_rate": 9.820634920634922e-06, "loss": 34.9478, "step": 11989 }, { "epoch": 285.4776119402985, "grad_norm": 14.488481521606445, "learning_rate": 9.81984126984127e-06, "loss": 34.9031, "step": 11990 }, { "epoch": 285.5014925373134, "grad_norm": 21.968503952026367, "learning_rate": 9.81904761904762e-06, "loss": 34.9046, "step": 11991 }, { "epoch": 285.52537313432833, "grad_norm": 20.652868270874023, "learning_rate": 9.818253968253969e-06, "loss": 34.0136, "step": 11992 }, { "epoch": 285.5492537313433, "grad_norm": 15.696794509887695, "learning_rate": 9.817460317460318e-06, "loss": 33.358, "step": 11993 }, { "epoch": 285.5731343283582, "grad_norm": 25.74132537841797, "learning_rate": 9.816666666666667e-06, "loss": 36.0652, "step": 11994 }, { "epoch": 285.5970149253731, "grad_norm": 15.65168285369873, "learning_rate": 9.815873015873016e-06, "loss": 33.8315, "step": 11995 }, { "epoch": 285.6208955223881, "grad_norm": 20.603830337524414, "learning_rate": 9.815079365079367e-06, "loss": 35.1069, "step": 11996 }, { "epoch": 285.644776119403, "grad_norm": 21.205408096313477, "learning_rate": 9.814285714285716e-06, "loss": 34.5993, "step": 11997 }, { "epoch": 285.6686567164179, "grad_norm": 14.209935188293457, "learning_rate": 9.813492063492063e-06, "loss": 34.3532, "step": 11998 }, { "epoch": 285.6925373134328, "grad_norm": 19.43562126159668, "learning_rate": 9.812698412698414e-06, "loss": 34.0042, "step": 11999 }, { "epoch": 285.7164179104478, "grad_norm": 24.769792556762695, "learning_rate": 9.811904761904763e-06, "loss": 35.1111, "step": 12000 }, { "epoch": 285.7402985074627, "grad_norm": 14.365962028503418, "learning_rate": 9.811111111111112e-06, "loss": 34.8156, "step": 12001 }, { "epoch": 285.7641791044776, "grad_norm": 21.0401554107666, "learning_rate": 9.810317460317461e-06, "loss": 34.4963, "step": 12002 }, { "epoch": 285.78805970149256, "grad_norm": 18.862346649169922, "learning_rate": 9.80952380952381e-06, "loss": 34.5222, "step": 12003 }, { "epoch": 285.81194029850747, "grad_norm": 15.13908576965332, "learning_rate": 9.80873015873016e-06, "loss": 33.9945, "step": 12004 }, { "epoch": 285.8358208955224, "grad_norm": 18.187814712524414, "learning_rate": 9.807936507936509e-06, "loss": 33.7722, "step": 12005 }, { "epoch": 285.85970149253734, "grad_norm": 21.272560119628906, "learning_rate": 9.807142857142858e-06, "loss": 35.3827, "step": 12006 }, { "epoch": 285.88358208955225, "grad_norm": 15.041167259216309, "learning_rate": 9.806349206349207e-06, "loss": 34.2868, "step": 12007 }, { "epoch": 285.90746268656716, "grad_norm": 16.316268920898438, "learning_rate": 9.805555555555556e-06, "loss": 34.7403, "step": 12008 }, { "epoch": 285.93134328358207, "grad_norm": 17.840290069580078, "learning_rate": 9.804761904761907e-06, "loss": 34.6549, "step": 12009 }, { "epoch": 285.95522388059703, "grad_norm": 17.38083839416504, "learning_rate": 9.803968253968254e-06, "loss": 34.7757, "step": 12010 }, { "epoch": 285.97910447761194, "grad_norm": 16.983545303344727, "learning_rate": 9.803174603174605e-06, "loss": 33.8752, "step": 12011 }, { "epoch": 286.0, "grad_norm": 13.664655685424805, "learning_rate": 9.802380952380954e-06, "loss": 29.3307, "step": 12012 }, { "epoch": 286.0238805970149, "grad_norm": 21.884868621826172, "learning_rate": 9.801587301587301e-06, "loss": 35.0974, "step": 12013 }, { "epoch": 286.0477611940299, "grad_norm": 15.397043228149414, "learning_rate": 9.800793650793652e-06, "loss": 35.2422, "step": 12014 }, { "epoch": 286.0716417910448, "grad_norm": 20.124658584594727, "learning_rate": 9.800000000000001e-06, "loss": 33.7555, "step": 12015 }, { "epoch": 286.0955223880597, "grad_norm": NaN, "learning_rate": 9.79920634920635e-06, "loss": 32.9235, "step": 12016 }, { "epoch": 286.1194029850746, "grad_norm": 19.885358810424805, "learning_rate": 9.79920634920635e-06, "loss": 34.9683, "step": 12017 }, { "epoch": 286.14328358208957, "grad_norm": 15.506340980529785, "learning_rate": 9.7984126984127e-06, "loss": 34.3529, "step": 12018 }, { "epoch": 286.1671641791045, "grad_norm": 19.64795684814453, "learning_rate": 9.797619047619048e-06, "loss": 34.3434, "step": 12019 }, { "epoch": 286.1910447761194, "grad_norm": 14.998760223388672, "learning_rate": 9.796825396825397e-06, "loss": 35.0393, "step": 12020 }, { "epoch": 286.21492537313435, "grad_norm": 19.91847801208496, "learning_rate": 9.796031746031746e-06, "loss": 35.0371, "step": 12021 }, { "epoch": 286.23880597014926, "grad_norm": 16.710060119628906, "learning_rate": 9.795238095238097e-06, "loss": 33.9395, "step": 12022 }, { "epoch": 286.26268656716417, "grad_norm": 17.559694290161133, "learning_rate": 9.794444444444445e-06, "loss": 32.8833, "step": 12023 }, { "epoch": 286.28656716417913, "grad_norm": 23.84259796142578, "learning_rate": 9.793650793650794e-06, "loss": 36.1526, "step": 12024 }, { "epoch": 286.31044776119404, "grad_norm": 17.985380172729492, "learning_rate": 9.792857142857144e-06, "loss": 34.1914, "step": 12025 }, { "epoch": 286.33432835820895, "grad_norm": 16.1311092376709, "learning_rate": 9.792063492063494e-06, "loss": 34.7507, "step": 12026 }, { "epoch": 286.35820895522386, "grad_norm": 14.830122947692871, "learning_rate": 9.791269841269843e-06, "loss": 35.132, "step": 12027 }, { "epoch": 286.3820895522388, "grad_norm": 19.325284957885742, "learning_rate": 9.790476190476192e-06, "loss": 35.1478, "step": 12028 }, { "epoch": 286.40597014925373, "grad_norm": 19.017011642456055, "learning_rate": 9.78968253968254e-06, "loss": 34.5581, "step": 12029 }, { "epoch": 286.42985074626864, "grad_norm": 16.702796936035156, "learning_rate": 9.78888888888889e-06, "loss": 34.5856, "step": 12030 }, { "epoch": 286.4537313432836, "grad_norm": 16.359254837036133, "learning_rate": 9.788095238095239e-06, "loss": 35.202, "step": 12031 }, { "epoch": 286.4776119402985, "grad_norm": 17.346094131469727, "learning_rate": 9.787301587301588e-06, "loss": 34.7018, "step": 12032 }, { "epoch": 286.5014925373134, "grad_norm": 15.318071365356445, "learning_rate": 9.786507936507937e-06, "loss": 33.0346, "step": 12033 }, { "epoch": 286.52537313432833, "grad_norm": 14.456928253173828, "learning_rate": 9.785714285714286e-06, "loss": 35.2802, "step": 12034 }, { "epoch": 286.5492537313433, "grad_norm": 14.843634605407715, "learning_rate": 9.784920634920635e-06, "loss": 35.4394, "step": 12035 }, { "epoch": 286.5731343283582, "grad_norm": 15.709205627441406, "learning_rate": 9.784126984126984e-06, "loss": 33.557, "step": 12036 }, { "epoch": 286.5970149253731, "grad_norm": 17.61166000366211, "learning_rate": 9.783333333333335e-06, "loss": 35.2218, "step": 12037 }, { "epoch": 286.6208955223881, "grad_norm": 16.26697540283203, "learning_rate": 9.782539682539684e-06, "loss": 34.1617, "step": 12038 }, { "epoch": 286.644776119403, "grad_norm": 19.526288986206055, "learning_rate": 9.781746031746032e-06, "loss": 34.5198, "step": 12039 }, { "epoch": 286.6686567164179, "grad_norm": 18.525033950805664, "learning_rate": 9.780952380952382e-06, "loss": 34.8916, "step": 12040 }, { "epoch": 286.6925373134328, "grad_norm": 14.802464485168457, "learning_rate": 9.780158730158731e-06, "loss": 34.2814, "step": 12041 }, { "epoch": 286.7164179104478, "grad_norm": 13.708857536315918, "learning_rate": 9.779365079365079e-06, "loss": 34.0722, "step": 12042 }, { "epoch": 286.7402985074627, "grad_norm": 19.151531219482422, "learning_rate": 9.77857142857143e-06, "loss": 34.6693, "step": 12043 }, { "epoch": 286.7641791044776, "grad_norm": 19.884967803955078, "learning_rate": 9.777777777777779e-06, "loss": 36.2617, "step": 12044 }, { "epoch": 286.78805970149256, "grad_norm": 17.904062271118164, "learning_rate": 9.776984126984128e-06, "loss": 34.9496, "step": 12045 }, { "epoch": 286.81194029850747, "grad_norm": 15.217409133911133, "learning_rate": 9.776190476190477e-06, "loss": 32.5331, "step": 12046 }, { "epoch": 286.8358208955224, "grad_norm": 17.902698516845703, "learning_rate": 9.775396825396826e-06, "loss": 34.9822, "step": 12047 }, { "epoch": 286.85970149253734, "grad_norm": 16.96517562866211, "learning_rate": 9.774603174603175e-06, "loss": 34.4241, "step": 12048 }, { "epoch": 286.88358208955225, "grad_norm": 19.24843406677246, "learning_rate": 9.773809523809524e-06, "loss": 34.2901, "step": 12049 }, { "epoch": 286.90746268656716, "grad_norm": 17.964000701904297, "learning_rate": 9.773015873015875e-06, "loss": 34.1566, "step": 12050 }, { "epoch": 286.93134328358207, "grad_norm": 13.353113174438477, "learning_rate": 9.772222222222222e-06, "loss": 33.1719, "step": 12051 }, { "epoch": 286.95522388059703, "grad_norm": 14.79580307006836, "learning_rate": 9.771428571428571e-06, "loss": 34.417, "step": 12052 }, { "epoch": 286.97910447761194, "grad_norm": 19.75585174560547, "learning_rate": 9.770634920634922e-06, "loss": 34.1401, "step": 12053 }, { "epoch": 287.0, "grad_norm": 19.62725257873535, "learning_rate": 9.769841269841271e-06, "loss": 30.4209, "step": 12054 }, { "epoch": 287.0238805970149, "grad_norm": 16.045976638793945, "learning_rate": 9.76904761904762e-06, "loss": 34.3797, "step": 12055 }, { "epoch": 287.0477611940299, "grad_norm": 16.418935775756836, "learning_rate": 9.76825396825397e-06, "loss": 34.0024, "step": 12056 }, { "epoch": 287.0716417910448, "grad_norm": 23.848142623901367, "learning_rate": 9.767460317460318e-06, "loss": 34.7997, "step": 12057 }, { "epoch": 287.0955223880597, "grad_norm": 20.17426300048828, "learning_rate": 9.766666666666667e-06, "loss": 33.0639, "step": 12058 }, { "epoch": 287.1194029850746, "grad_norm": 14.178504943847656, "learning_rate": 9.765873015873017e-06, "loss": 34.2935, "step": 12059 }, { "epoch": 287.14328358208957, "grad_norm": 18.32088851928711, "learning_rate": 9.765079365079366e-06, "loss": 34.6321, "step": 12060 }, { "epoch": 287.1671641791045, "grad_norm": 17.045101165771484, "learning_rate": 9.764285714285715e-06, "loss": 35.5111, "step": 12061 }, { "epoch": 287.1910447761194, "grad_norm": 19.639179229736328, "learning_rate": 9.763492063492064e-06, "loss": 34.5451, "step": 12062 }, { "epoch": 287.21492537313435, "grad_norm": 18.215702056884766, "learning_rate": 9.762698412698413e-06, "loss": 34.8495, "step": 12063 }, { "epoch": 287.23880597014926, "grad_norm": 16.4918270111084, "learning_rate": 9.761904761904762e-06, "loss": 33.7022, "step": 12064 }, { "epoch": 287.26268656716417, "grad_norm": 18.707651138305664, "learning_rate": 9.761111111111113e-06, "loss": 34.4524, "step": 12065 }, { "epoch": 287.28656716417913, "grad_norm": 14.767991065979004, "learning_rate": 9.760317460317462e-06, "loss": 34.8725, "step": 12066 }, { "epoch": 287.31044776119404, "grad_norm": 23.754541397094727, "learning_rate": 9.75952380952381e-06, "loss": 34.5065, "step": 12067 }, { "epoch": 287.33432835820895, "grad_norm": 16.943313598632812, "learning_rate": 9.75873015873016e-06, "loss": 33.6943, "step": 12068 }, { "epoch": 287.35820895522386, "grad_norm": 20.705615997314453, "learning_rate": 9.757936507936509e-06, "loss": 34.9901, "step": 12069 }, { "epoch": 287.3820895522388, "grad_norm": 16.885560989379883, "learning_rate": 9.757142857142858e-06, "loss": 33.9538, "step": 12070 }, { "epoch": 287.40597014925373, "grad_norm": 16.602771759033203, "learning_rate": 9.756349206349207e-06, "loss": 34.9266, "step": 12071 }, { "epoch": 287.42985074626864, "grad_norm": 19.53326416015625, "learning_rate": 9.755555555555556e-06, "loss": 34.7959, "step": 12072 }, { "epoch": 287.4537313432836, "grad_norm": 15.950719833374023, "learning_rate": 9.754761904761905e-06, "loss": 33.6526, "step": 12073 }, { "epoch": 287.4776119402985, "grad_norm": 16.19793701171875, "learning_rate": 9.753968253968254e-06, "loss": 34.6108, "step": 12074 }, { "epoch": 287.5014925373134, "grad_norm": 16.855018615722656, "learning_rate": 9.753174603174605e-06, "loss": 33.6259, "step": 12075 }, { "epoch": 287.52537313432833, "grad_norm": 14.502487182617188, "learning_rate": 9.752380952380953e-06, "loss": 35.1415, "step": 12076 }, { "epoch": 287.5492537313433, "grad_norm": 20.946552276611328, "learning_rate": 9.751587301587302e-06, "loss": 35.2675, "step": 12077 }, { "epoch": 287.5731343283582, "grad_norm": 19.062923431396484, "learning_rate": 9.750793650793652e-06, "loss": 34.7373, "step": 12078 }, { "epoch": 287.5970149253731, "grad_norm": 15.838395118713379, "learning_rate": 9.75e-06, "loss": 34.807, "step": 12079 }, { "epoch": 287.6208955223881, "grad_norm": 16.52606773376465, "learning_rate": 9.74920634920635e-06, "loss": 35.0307, "step": 12080 }, { "epoch": 287.644776119403, "grad_norm": 17.749820709228516, "learning_rate": 9.7484126984127e-06, "loss": 35.5646, "step": 12081 }, { "epoch": 287.6686567164179, "grad_norm": 16.2675838470459, "learning_rate": 9.747619047619049e-06, "loss": 34.7169, "step": 12082 }, { "epoch": 287.6925373134328, "grad_norm": 19.575477600097656, "learning_rate": 9.746825396825398e-06, "loss": 34.7319, "step": 12083 }, { "epoch": 287.7164179104478, "grad_norm": 15.083663940429688, "learning_rate": 9.746031746031747e-06, "loss": 33.7711, "step": 12084 }, { "epoch": 287.7402985074627, "grad_norm": 14.370621681213379, "learning_rate": 9.745238095238096e-06, "loss": 33.7369, "step": 12085 }, { "epoch": 287.7641791044776, "grad_norm": 17.50619888305664, "learning_rate": 9.744444444444445e-06, "loss": 35.0661, "step": 12086 }, { "epoch": 287.78805970149256, "grad_norm": 15.515654563903809, "learning_rate": 9.743650793650794e-06, "loss": 34.3127, "step": 12087 }, { "epoch": 287.81194029850747, "grad_norm": 17.39630699157715, "learning_rate": 9.742857142857143e-06, "loss": 33.9359, "step": 12088 }, { "epoch": 287.8358208955224, "grad_norm": 16.543195724487305, "learning_rate": 9.742063492063492e-06, "loss": 34.1895, "step": 12089 }, { "epoch": 287.85970149253734, "grad_norm": 21.681650161743164, "learning_rate": 9.741269841269843e-06, "loss": 34.9595, "step": 12090 }, { "epoch": 287.88358208955225, "grad_norm": 14.407553672790527, "learning_rate": 9.74047619047619e-06, "loss": 33.3288, "step": 12091 }, { "epoch": 287.90746268656716, "grad_norm": 17.220455169677734, "learning_rate": 9.73968253968254e-06, "loss": 33.2606, "step": 12092 }, { "epoch": 287.93134328358207, "grad_norm": 15.273118019104004, "learning_rate": 9.73888888888889e-06, "loss": 35.1208, "step": 12093 }, { "epoch": 287.95522388059703, "grad_norm": 23.344297409057617, "learning_rate": 9.73809523809524e-06, "loss": 35.4974, "step": 12094 }, { "epoch": 287.97910447761194, "grad_norm": 15.585911750793457, "learning_rate": 9.737301587301588e-06, "loss": 34.1066, "step": 12095 }, { "epoch": 288.0, "grad_norm": 15.023148536682129, "learning_rate": 9.736507936507938e-06, "loss": 31.1307, "step": 12096 }, { "epoch": 288.0238805970149, "grad_norm": 23.582096099853516, "learning_rate": 9.735714285714287e-06, "loss": 34.7779, "step": 12097 }, { "epoch": 288.0477611940299, "grad_norm": 15.913206100463867, "learning_rate": 9.734920634920636e-06, "loss": 34.7806, "step": 12098 }, { "epoch": 288.0716417910448, "grad_norm": 21.027273178100586, "learning_rate": 9.734126984126985e-06, "loss": 35.0032, "step": 12099 }, { "epoch": 288.0955223880597, "grad_norm": 17.777542114257812, "learning_rate": 9.733333333333334e-06, "loss": 33.5533, "step": 12100 }, { "epoch": 288.1194029850746, "grad_norm": 26.905447006225586, "learning_rate": 9.732539682539683e-06, "loss": 34.4528, "step": 12101 }, { "epoch": 288.14328358208957, "grad_norm": 20.0372257232666, "learning_rate": 9.731746031746032e-06, "loss": 34.6477, "step": 12102 }, { "epoch": 288.1671641791045, "grad_norm": 26.21122932434082, "learning_rate": 9.730952380952383e-06, "loss": 35.3729, "step": 12103 }, { "epoch": 288.1910447761194, "grad_norm": 24.10542869567871, "learning_rate": 9.73015873015873e-06, "loss": 33.4395, "step": 12104 }, { "epoch": 288.21492537313435, "grad_norm": 19.59185218811035, "learning_rate": 9.729365079365081e-06, "loss": 34.752, "step": 12105 }, { "epoch": 288.23880597014926, "grad_norm": 21.703527450561523, "learning_rate": 9.72857142857143e-06, "loss": 35.4408, "step": 12106 }, { "epoch": 288.26268656716417, "grad_norm": 19.279748916625977, "learning_rate": 9.727777777777777e-06, "loss": 33.5879, "step": 12107 }, { "epoch": 288.28656716417913, "grad_norm": 18.189329147338867, "learning_rate": 9.726984126984128e-06, "loss": 34.9721, "step": 12108 }, { "epoch": 288.31044776119404, "grad_norm": 20.553638458251953, "learning_rate": 9.726190476190477e-06, "loss": 34.6883, "step": 12109 }, { "epoch": 288.33432835820895, "grad_norm": 20.090965270996094, "learning_rate": 9.725396825396826e-06, "loss": 34.5064, "step": 12110 }, { "epoch": 288.35820895522386, "grad_norm": 14.965476989746094, "learning_rate": 9.724603174603175e-06, "loss": 34.7103, "step": 12111 }, { "epoch": 288.3820895522388, "grad_norm": 19.639541625976562, "learning_rate": 9.723809523809525e-06, "loss": 34.633, "step": 12112 }, { "epoch": 288.40597014925373, "grad_norm": 18.138107299804688, "learning_rate": 9.723015873015874e-06, "loss": 35.3275, "step": 12113 }, { "epoch": 288.42985074626864, "grad_norm": 15.13943862915039, "learning_rate": 9.722222222222223e-06, "loss": 33.9663, "step": 12114 }, { "epoch": 288.4537313432836, "grad_norm": 15.557840347290039, "learning_rate": 9.721428571428573e-06, "loss": 34.8689, "step": 12115 }, { "epoch": 288.4776119402985, "grad_norm": 13.73161792755127, "learning_rate": 9.720634920634921e-06, "loss": 34.6142, "step": 12116 }, { "epoch": 288.5014925373134, "grad_norm": 15.27243423461914, "learning_rate": 9.71984126984127e-06, "loss": 35.3487, "step": 12117 }, { "epoch": 288.52537313432833, "grad_norm": 15.516766548156738, "learning_rate": 9.71904761904762e-06, "loss": 33.854, "step": 12118 }, { "epoch": 288.5492537313433, "grad_norm": 16.92219352722168, "learning_rate": 9.71825396825397e-06, "loss": 33.0819, "step": 12119 }, { "epoch": 288.5731343283582, "grad_norm": 16.66267204284668, "learning_rate": 9.717460317460317e-06, "loss": 34.5783, "step": 12120 }, { "epoch": 288.5970149253731, "grad_norm": 21.243785858154297, "learning_rate": 9.716666666666668e-06, "loss": 33.4727, "step": 12121 }, { "epoch": 288.6208955223881, "grad_norm": 14.848652839660645, "learning_rate": 9.715873015873017e-06, "loss": 34.1186, "step": 12122 }, { "epoch": 288.644776119403, "grad_norm": 17.60338592529297, "learning_rate": 9.715079365079366e-06, "loss": 34.6413, "step": 12123 }, { "epoch": 288.6686567164179, "grad_norm": 16.577882766723633, "learning_rate": 9.714285714285715e-06, "loss": 34.5929, "step": 12124 }, { "epoch": 288.6925373134328, "grad_norm": 19.29567527770996, "learning_rate": 9.713492063492064e-06, "loss": 35.2559, "step": 12125 }, { "epoch": 288.7164179104478, "grad_norm": 18.365034103393555, "learning_rate": 9.712698412698413e-06, "loss": 34.1905, "step": 12126 }, { "epoch": 288.7402985074627, "grad_norm": 19.5806941986084, "learning_rate": 9.711904761904762e-06, "loss": 34.1149, "step": 12127 }, { "epoch": 288.7641791044776, "grad_norm": 12.890687942504883, "learning_rate": 9.711111111111111e-06, "loss": 33.3241, "step": 12128 }, { "epoch": 288.78805970149256, "grad_norm": 18.987075805664062, "learning_rate": 9.71031746031746e-06, "loss": 34.7101, "step": 12129 }, { "epoch": 288.81194029850747, "grad_norm": 15.151649475097656, "learning_rate": 9.70952380952381e-06, "loss": 34.9443, "step": 12130 }, { "epoch": 288.8358208955224, "grad_norm": 18.86102294921875, "learning_rate": 9.70873015873016e-06, "loss": 33.5959, "step": 12131 }, { "epoch": 288.85970149253734, "grad_norm": 16.394893646240234, "learning_rate": 9.707936507936508e-06, "loss": 35.2346, "step": 12132 }, { "epoch": 288.88358208955225, "grad_norm": 20.39829444885254, "learning_rate": 9.707142857142859e-06, "loss": 34.7344, "step": 12133 }, { "epoch": 288.90746268656716, "grad_norm": 18.291614532470703, "learning_rate": 9.706349206349208e-06, "loss": 34.863, "step": 12134 }, { "epoch": 288.93134328358207, "grad_norm": 16.3395938873291, "learning_rate": 9.705555555555555e-06, "loss": 33.9108, "step": 12135 }, { "epoch": 288.95522388059703, "grad_norm": 18.286191940307617, "learning_rate": 9.704761904761906e-06, "loss": 34.3912, "step": 12136 }, { "epoch": 288.97910447761194, "grad_norm": 19.71375274658203, "learning_rate": 9.703968253968255e-06, "loss": 33.7324, "step": 12137 }, { "epoch": 289.0, "grad_norm": 14.758651733398438, "learning_rate": 9.703174603174604e-06, "loss": 29.6443, "step": 12138 }, { "epoch": 289.0238805970149, "grad_norm": 16.483644485473633, "learning_rate": 9.702380952380953e-06, "loss": 34.7059, "step": 12139 }, { "epoch": 289.0477611940299, "grad_norm": 16.072952270507812, "learning_rate": 9.701587301587302e-06, "loss": 33.5644, "step": 12140 }, { "epoch": 289.0716417910448, "grad_norm": 18.685707092285156, "learning_rate": 9.700793650793651e-06, "loss": 33.909, "step": 12141 }, { "epoch": 289.0955223880597, "grad_norm": 15.370194435119629, "learning_rate": 9.7e-06, "loss": 34.8823, "step": 12142 }, { "epoch": 289.1194029850746, "grad_norm": 17.053407669067383, "learning_rate": 9.699206349206351e-06, "loss": 34.1308, "step": 12143 }, { "epoch": 289.14328358208957, "grad_norm": 13.865734100341797, "learning_rate": 9.698412698412698e-06, "loss": 34.4891, "step": 12144 }, { "epoch": 289.1671641791045, "grad_norm": 18.639347076416016, "learning_rate": 9.697619047619048e-06, "loss": 33.9614, "step": 12145 }, { "epoch": 289.1910447761194, "grad_norm": 13.701078414916992, "learning_rate": 9.696825396825398e-06, "loss": 33.8884, "step": 12146 }, { "epoch": 289.21492537313435, "grad_norm": 20.172367095947266, "learning_rate": 9.696031746031747e-06, "loss": 35.0894, "step": 12147 }, { "epoch": 289.23880597014926, "grad_norm": 22.14353370666504, "learning_rate": 9.695238095238096e-06, "loss": 33.8598, "step": 12148 }, { "epoch": 289.26268656716417, "grad_norm": 17.53331184387207, "learning_rate": 9.694444444444446e-06, "loss": 35.4552, "step": 12149 }, { "epoch": 289.28656716417913, "grad_norm": 17.245830535888672, "learning_rate": 9.693650793650795e-06, "loss": 34.8001, "step": 12150 }, { "epoch": 289.31044776119404, "grad_norm": 15.168274879455566, "learning_rate": 9.692857142857144e-06, "loss": 34.1529, "step": 12151 }, { "epoch": 289.33432835820895, "grad_norm": 18.141550064086914, "learning_rate": 9.692063492063493e-06, "loss": 33.5119, "step": 12152 }, { "epoch": 289.35820895522386, "grad_norm": 14.61592960357666, "learning_rate": 9.691269841269842e-06, "loss": 34.165, "step": 12153 }, { "epoch": 289.3820895522388, "grad_norm": 15.107131958007812, "learning_rate": 9.690476190476191e-06, "loss": 33.3661, "step": 12154 }, { "epoch": 289.40597014925373, "grad_norm": 13.791497230529785, "learning_rate": 9.68968253968254e-06, "loss": 33.5373, "step": 12155 }, { "epoch": 289.42985074626864, "grad_norm": 15.632122993469238, "learning_rate": 9.688888888888889e-06, "loss": 34.685, "step": 12156 }, { "epoch": 289.4537313432836, "grad_norm": 14.425263404846191, "learning_rate": 9.688095238095238e-06, "loss": 33.7206, "step": 12157 }, { "epoch": 289.4776119402985, "grad_norm": 14.650760650634766, "learning_rate": 9.687301587301589e-06, "loss": 33.1826, "step": 12158 }, { "epoch": 289.5014925373134, "grad_norm": NaN, "learning_rate": 9.686507936507938e-06, "loss": 51.007, "step": 12159 }, { "epoch": 289.52537313432833, "grad_norm": 15.101150512695312, "learning_rate": 9.686507936507938e-06, "loss": 35.4393, "step": 12160 }, { "epoch": 289.5492537313433, "grad_norm": 15.44886302947998, "learning_rate": 9.685714285714285e-06, "loss": 35.3721, "step": 12161 }, { "epoch": 289.5731343283582, "grad_norm": 15.483396530151367, "learning_rate": 9.684920634920636e-06, "loss": 33.7206, "step": 12162 }, { "epoch": 289.5970149253731, "grad_norm": 18.036157608032227, "learning_rate": 9.684126984126985e-06, "loss": 34.7579, "step": 12163 }, { "epoch": 289.6208955223881, "grad_norm": 19.82551383972168, "learning_rate": 9.683333333333334e-06, "loss": 34.0443, "step": 12164 }, { "epoch": 289.644776119403, "grad_norm": 14.942346572875977, "learning_rate": 9.682539682539683e-06, "loss": 35.119, "step": 12165 }, { "epoch": 289.6686567164179, "grad_norm": 14.71619701385498, "learning_rate": 9.681746031746033e-06, "loss": 33.8474, "step": 12166 }, { "epoch": 289.6925373134328, "grad_norm": 18.883920669555664, "learning_rate": 9.680952380952382e-06, "loss": 34.5756, "step": 12167 }, { "epoch": 289.7164179104478, "grad_norm": 22.619094848632812, "learning_rate": 9.68015873015873e-06, "loss": 35.173, "step": 12168 }, { "epoch": 289.7402985074627, "grad_norm": 14.180581092834473, "learning_rate": 9.679365079365081e-06, "loss": 34.4955, "step": 12169 }, { "epoch": 289.7641791044776, "grad_norm": 22.267663955688477, "learning_rate": 9.678571428571429e-06, "loss": 35.0336, "step": 12170 }, { "epoch": 289.78805970149256, "grad_norm": 25.306835174560547, "learning_rate": 9.677777777777778e-06, "loss": 34.7632, "step": 12171 }, { "epoch": 289.81194029850747, "grad_norm": 14.044944763183594, "learning_rate": 9.676984126984129e-06, "loss": 35.3385, "step": 12172 }, { "epoch": 289.8358208955224, "grad_norm": 20.621187210083008, "learning_rate": 9.676190476190476e-06, "loss": 33.8966, "step": 12173 }, { "epoch": 289.85970149253734, "grad_norm": NaN, "learning_rate": 9.675396825396827e-06, "loss": 54.0567, "step": 12174 }, { "epoch": 289.88358208955225, "grad_norm": 22.201995849609375, "learning_rate": 9.675396825396827e-06, "loss": 35.6075, "step": 12175 }, { "epoch": 289.90746268656716, "grad_norm": 13.381609916687012, "learning_rate": 9.674603174603176e-06, "loss": 34.063, "step": 12176 }, { "epoch": 289.93134328358207, "grad_norm": 30.19305992126465, "learning_rate": 9.673809523809525e-06, "loss": 34.3243, "step": 12177 }, { "epoch": 289.95522388059703, "grad_norm": 18.34126853942871, "learning_rate": 9.673015873015874e-06, "loss": 34.54, "step": 12178 }, { "epoch": 289.97910447761194, "grad_norm": 23.226390838623047, "learning_rate": 9.672222222222223e-06, "loss": 34.5679, "step": 12179 }, { "epoch": 290.0, "grad_norm": 19.478759765625, "learning_rate": 9.671428571428572e-06, "loss": 31.2824, "step": 12180 }, { "epoch": 290.0238805970149, "grad_norm": 14.978033065795898, "learning_rate": 9.670634920634921e-06, "loss": 34.1951, "step": 12181 }, { "epoch": 290.0477611940299, "grad_norm": 31.53683853149414, "learning_rate": 9.66984126984127e-06, "loss": 34.7907, "step": 12182 }, { "epoch": 290.0716417910448, "grad_norm": 19.020130157470703, "learning_rate": 9.66904761904762e-06, "loss": 33.9402, "step": 12183 }, { "epoch": 290.0955223880597, "grad_norm": 31.166902542114258, "learning_rate": 9.668253968253969e-06, "loss": 35.2873, "step": 12184 }, { "epoch": 290.1194029850746, "grad_norm": 21.55522346496582, "learning_rate": 9.66746031746032e-06, "loss": 34.9449, "step": 12185 }, { "epoch": 290.14328358208957, "grad_norm": 34.57597351074219, "learning_rate": 9.666666666666667e-06, "loss": 33.3533, "step": 12186 }, { "epoch": 290.1671641791045, "grad_norm": 21.43866729736328, "learning_rate": 9.665873015873016e-06, "loss": 34.6336, "step": 12187 }, { "epoch": 290.1910447761194, "grad_norm": 33.21577453613281, "learning_rate": 9.665079365079367e-06, "loss": 33.9339, "step": 12188 }, { "epoch": 290.21492537313435, "grad_norm": 22.744247436523438, "learning_rate": 9.664285714285716e-06, "loss": 34.7873, "step": 12189 }, { "epoch": 290.23880597014926, "grad_norm": 33.429779052734375, "learning_rate": 9.663492063492065e-06, "loss": 33.0795, "step": 12190 }, { "epoch": 290.26268656716417, "grad_norm": 27.033706665039062, "learning_rate": 9.662698412698414e-06, "loss": 32.9232, "step": 12191 }, { "epoch": 290.28656716417913, "grad_norm": 31.327665328979492, "learning_rate": 9.661904761904763e-06, "loss": 34.7549, "step": 12192 }, { "epoch": 290.31044776119404, "grad_norm": 23.397293090820312, "learning_rate": 9.661111111111112e-06, "loss": 35.1283, "step": 12193 }, { "epoch": 290.33432835820895, "grad_norm": 27.564517974853516, "learning_rate": 9.660317460317461e-06, "loss": 34.3094, "step": 12194 }, { "epoch": 290.35820895522386, "grad_norm": NaN, "learning_rate": 9.65952380952381e-06, "loss": 58.411, "step": 12195 }, { "epoch": 290.3820895522388, "grad_norm": 22.267906188964844, "learning_rate": 9.65952380952381e-06, "loss": 34.6854, "step": 12196 }, { "epoch": 290.40597014925373, "grad_norm": 26.782926559448242, "learning_rate": 9.65873015873016e-06, "loss": 34.7152, "step": 12197 }, { "epoch": 290.42985074626864, "grad_norm": 26.812774658203125, "learning_rate": 9.657936507936508e-06, "loss": 34.3506, "step": 12198 }, { "epoch": 290.4537313432836, "grad_norm": 21.261577606201172, "learning_rate": 9.657142857142859e-06, "loss": 34.4862, "step": 12199 }, { "epoch": 290.4776119402985, "grad_norm": 33.28779602050781, "learning_rate": 9.656349206349206e-06, "loss": 33.4131, "step": 12200 }, { "epoch": 290.5014925373134, "grad_norm": 23.193044662475586, "learning_rate": 9.655555555555556e-06, "loss": 33.48, "step": 12201 }, { "epoch": 290.52537313432833, "grad_norm": 39.30086135864258, "learning_rate": 9.654761904761906e-06, "loss": 33.9847, "step": 12202 }, { "epoch": 290.5492537313433, "grad_norm": 33.107276916503906, "learning_rate": 9.653968253968254e-06, "loss": 33.361, "step": 12203 }, { "epoch": 290.5731343283582, "grad_norm": 27.033092498779297, "learning_rate": 9.653174603174604e-06, "loss": 34.1614, "step": 12204 }, { "epoch": 290.5970149253731, "grad_norm": 27.202957153320312, "learning_rate": 9.652380952380954e-06, "loss": 35.5516, "step": 12205 }, { "epoch": 290.6208955223881, "grad_norm": 28.261327743530273, "learning_rate": 9.651587301587303e-06, "loss": 34.913, "step": 12206 }, { "epoch": 290.644776119403, "grad_norm": 22.540699005126953, "learning_rate": 9.650793650793652e-06, "loss": 33.3709, "step": 12207 }, { "epoch": 290.6686567164179, "grad_norm": 28.34935760498047, "learning_rate": 9.65e-06, "loss": 34.8894, "step": 12208 }, { "epoch": 290.6925373134328, "grad_norm": 19.734691619873047, "learning_rate": 9.64920634920635e-06, "loss": 35.1564, "step": 12209 }, { "epoch": 290.7164179104478, "grad_norm": 32.46640396118164, "learning_rate": 9.648412698412699e-06, "loss": 34.5664, "step": 12210 }, { "epoch": 290.7402985074627, "grad_norm": 22.91280174255371, "learning_rate": 9.647619047619048e-06, "loss": 32.5768, "step": 12211 }, { "epoch": 290.7641791044776, "grad_norm": 28.09925079345703, "learning_rate": 9.646825396825397e-06, "loss": 35.2974, "step": 12212 }, { "epoch": 290.78805970149256, "grad_norm": 24.54458999633789, "learning_rate": 9.646031746031746e-06, "loss": 35.0112, "step": 12213 }, { "epoch": 290.81194029850747, "grad_norm": 23.932899475097656, "learning_rate": 9.645238095238097e-06, "loss": 34.8741, "step": 12214 }, { "epoch": 290.8358208955224, "grad_norm": 27.692768096923828, "learning_rate": 9.644444444444444e-06, "loss": 34.8985, "step": 12215 }, { "epoch": 290.85970149253734, "grad_norm": 18.806684494018555, "learning_rate": 9.643650793650793e-06, "loss": 34.6133, "step": 12216 }, { "epoch": 290.88358208955225, "grad_norm": 34.68870162963867, "learning_rate": 9.642857142857144e-06, "loss": 34.9589, "step": 12217 }, { "epoch": 290.90746268656716, "grad_norm": 27.245176315307617, "learning_rate": 9.642063492063493e-06, "loss": 35.1987, "step": 12218 }, { "epoch": 290.93134328358207, "grad_norm": 29.974613189697266, "learning_rate": 9.641269841269842e-06, "loss": 33.3384, "step": 12219 }, { "epoch": 290.95522388059703, "grad_norm": 25.222753524780273, "learning_rate": 9.640476190476191e-06, "loss": 34.8828, "step": 12220 }, { "epoch": 290.97910447761194, "grad_norm": 28.725669860839844, "learning_rate": 9.63968253968254e-06, "loss": 34.5293, "step": 12221 }, { "epoch": 291.0, "grad_norm": 17.669416427612305, "learning_rate": 9.63888888888889e-06, "loss": 30.053, "step": 12222 }, { "epoch": 291.0238805970149, "grad_norm": 25.961732864379883, "learning_rate": 9.638095238095239e-06, "loss": 34.0411, "step": 12223 }, { "epoch": 291.0477611940299, "grad_norm": 23.097198486328125, "learning_rate": 9.637301587301588e-06, "loss": 34.749, "step": 12224 }, { "epoch": 291.0716417910448, "grad_norm": 27.990400314331055, "learning_rate": 9.636507936507937e-06, "loss": 34.6194, "step": 12225 }, { "epoch": 291.0955223880597, "grad_norm": 23.676801681518555, "learning_rate": 9.635714285714286e-06, "loss": 33.9769, "step": 12226 }, { "epoch": 291.1194029850746, "grad_norm": 17.73317527770996, "learning_rate": 9.634920634920637e-06, "loss": 34.8169, "step": 12227 }, { "epoch": 291.14328358208957, "grad_norm": 32.58198928833008, "learning_rate": 9.634126984126984e-06, "loss": 33.9618, "step": 12228 }, { "epoch": 291.1671641791045, "grad_norm": 22.8065242767334, "learning_rate": 9.633333333333335e-06, "loss": 34.3213, "step": 12229 }, { "epoch": 291.1910447761194, "grad_norm": 38.063899993896484, "learning_rate": 9.632539682539684e-06, "loss": 33.8887, "step": 12230 }, { "epoch": 291.21492537313435, "grad_norm": 31.94734001159668, "learning_rate": 9.631746031746031e-06, "loss": 33.8354, "step": 12231 }, { "epoch": 291.23880597014926, "grad_norm": 29.75364875793457, "learning_rate": 9.630952380952382e-06, "loss": 34.4246, "step": 12232 }, { "epoch": 291.26268656716417, "grad_norm": 29.69106674194336, "learning_rate": 9.630158730158731e-06, "loss": 34.5028, "step": 12233 }, { "epoch": 291.28656716417913, "grad_norm": 27.216402053833008, "learning_rate": 9.62936507936508e-06, "loss": 33.2136, "step": 12234 }, { "epoch": 291.31044776119404, "grad_norm": NaN, "learning_rate": 9.62857142857143e-06, "loss": 62.1075, "step": 12235 }, { "epoch": 291.33432835820895, "grad_norm": 24.074199676513672, "learning_rate": 9.62857142857143e-06, "loss": 34.4417, "step": 12236 }, { "epoch": 291.35820895522386, "grad_norm": 34.20615768432617, "learning_rate": 9.627777777777778e-06, "loss": 34.924, "step": 12237 }, { "epoch": 291.3820895522388, "grad_norm": 27.6361026763916, "learning_rate": 9.626984126984127e-06, "loss": 34.4716, "step": 12238 }, { "epoch": 291.40597014925373, "grad_norm": 33.79500961303711, "learning_rate": 9.626190476190477e-06, "loss": 33.6966, "step": 12239 }, { "epoch": 291.42985074626864, "grad_norm": 31.77932357788086, "learning_rate": 9.625396825396827e-06, "loss": 34.575, "step": 12240 }, { "epoch": 291.4537313432836, "grad_norm": 30.573434829711914, "learning_rate": 9.624603174603175e-06, "loss": 32.7903, "step": 12241 }, { "epoch": 291.4776119402985, "grad_norm": 28.312847137451172, "learning_rate": 9.623809523809524e-06, "loss": 35.0707, "step": 12242 }, { "epoch": 291.5014925373134, "grad_norm": 32.0899543762207, "learning_rate": 9.623015873015875e-06, "loss": 34.2454, "step": 12243 }, { "epoch": 291.52537313432833, "grad_norm": 26.4555606842041, "learning_rate": 9.622222222222222e-06, "loss": 34.3891, "step": 12244 }, { "epoch": 291.5492537313433, "grad_norm": 33.5330924987793, "learning_rate": 9.621428571428573e-06, "loss": 33.8859, "step": 12245 }, { "epoch": 291.5731343283582, "grad_norm": 31.670372009277344, "learning_rate": 9.620634920634922e-06, "loss": 33.8622, "step": 12246 }, { "epoch": 291.5970149253731, "grad_norm": 31.936368942260742, "learning_rate": 9.619841269841271e-06, "loss": 33.6762, "step": 12247 }, { "epoch": 291.6208955223881, "grad_norm": 30.13316535949707, "learning_rate": 9.61904761904762e-06, "loss": 34.2466, "step": 12248 }, { "epoch": 291.644776119403, "grad_norm": 32.32080841064453, "learning_rate": 9.618253968253969e-06, "loss": 34.4787, "step": 12249 }, { "epoch": 291.6686567164179, "grad_norm": 26.717697143554688, "learning_rate": 9.617460317460318e-06, "loss": 34.4717, "step": 12250 }, { "epoch": 291.6925373134328, "grad_norm": 33.60049819946289, "learning_rate": 9.616666666666667e-06, "loss": 34.4618, "step": 12251 }, { "epoch": 291.7164179104478, "grad_norm": 26.2215518951416, "learning_rate": 9.615873015873016e-06, "loss": 34.5382, "step": 12252 }, { "epoch": 291.7402985074627, "grad_norm": 29.8955135345459, "learning_rate": 9.615079365079365e-06, "loss": 35.053, "step": 12253 }, { "epoch": 291.7641791044776, "grad_norm": 26.757553100585938, "learning_rate": 9.614285714285714e-06, "loss": 34.5648, "step": 12254 }, { "epoch": 291.78805970149256, "grad_norm": 32.92394256591797, "learning_rate": 9.613492063492065e-06, "loss": 34.0267, "step": 12255 }, { "epoch": 291.81194029850747, "grad_norm": 27.11213493347168, "learning_rate": 9.612698412698414e-06, "loss": 33.973, "step": 12256 }, { "epoch": 291.8358208955224, "grad_norm": 30.325374603271484, "learning_rate": 9.611904761904762e-06, "loss": 34.2087, "step": 12257 }, { "epoch": 291.85970149253734, "grad_norm": 27.406278610229492, "learning_rate": 9.611111111111112e-06, "loss": 35.4236, "step": 12258 }, { "epoch": 291.88358208955225, "grad_norm": 31.16053009033203, "learning_rate": 9.610317460317462e-06, "loss": 35.4351, "step": 12259 }, { "epoch": 291.90746268656716, "grad_norm": 26.751670837402344, "learning_rate": 9.60952380952381e-06, "loss": 34.6347, "step": 12260 }, { "epoch": 291.93134328358207, "grad_norm": 33.27516174316406, "learning_rate": 9.60873015873016e-06, "loss": 34.5843, "step": 12261 }, { "epoch": 291.95522388059703, "grad_norm": 27.722028732299805, "learning_rate": 9.607936507936509e-06, "loss": 33.5751, "step": 12262 }, { "epoch": 291.97910447761194, "grad_norm": 28.656055450439453, "learning_rate": 9.607142857142858e-06, "loss": 33.5382, "step": 12263 }, { "epoch": 292.0, "grad_norm": 24.935575485229492, "learning_rate": 9.606349206349207e-06, "loss": 30.3113, "step": 12264 }, { "epoch": 292.0238805970149, "grad_norm": 31.052854537963867, "learning_rate": 9.605555555555556e-06, "loss": 35.0746, "step": 12265 }, { "epoch": 292.0477611940299, "grad_norm": 28.53564453125, "learning_rate": 9.604761904761905e-06, "loss": 34.9898, "step": 12266 }, { "epoch": 292.0716417910448, "grad_norm": 30.77297019958496, "learning_rate": 9.603968253968254e-06, "loss": 33.4999, "step": 12267 }, { "epoch": 292.0955223880597, "grad_norm": 26.038986206054688, "learning_rate": 9.603174603174605e-06, "loss": 34.0279, "step": 12268 }, { "epoch": 292.1194029850746, "grad_norm": 33.35354232788086, "learning_rate": 9.602380952380952e-06, "loss": 34.7686, "step": 12269 }, { "epoch": 292.14328358208957, "grad_norm": 29.855701446533203, "learning_rate": 9.601587301587303e-06, "loss": 35.0543, "step": 12270 }, { "epoch": 292.1671641791045, "grad_norm": 28.970848083496094, "learning_rate": 9.600793650793652e-06, "loss": 34.4599, "step": 12271 }, { "epoch": 292.1910447761194, "grad_norm": 28.292043685913086, "learning_rate": 9.600000000000001e-06, "loss": 34.6065, "step": 12272 }, { "epoch": 292.21492537313435, "grad_norm": 28.919475555419922, "learning_rate": 9.59920634920635e-06, "loss": 34.005, "step": 12273 }, { "epoch": 292.23880597014926, "grad_norm": 24.51378631591797, "learning_rate": 9.5984126984127e-06, "loss": 34.3403, "step": 12274 }, { "epoch": 292.26268656716417, "grad_norm": 34.44512939453125, "learning_rate": 9.597619047619048e-06, "loss": 34.8771, "step": 12275 }, { "epoch": 292.28656716417913, "grad_norm": 25.570201873779297, "learning_rate": 9.596825396825398e-06, "loss": 33.9209, "step": 12276 }, { "epoch": 292.31044776119404, "grad_norm": 32.16792678833008, "learning_rate": 9.596031746031747e-06, "loss": 35.0343, "step": 12277 }, { "epoch": 292.33432835820895, "grad_norm": 27.99781036376953, "learning_rate": 9.595238095238096e-06, "loss": 33.8863, "step": 12278 }, { "epoch": 292.35820895522386, "grad_norm": 31.632658004760742, "learning_rate": 9.594444444444445e-06, "loss": 34.2391, "step": 12279 }, { "epoch": 292.3820895522388, "grad_norm": 27.672597885131836, "learning_rate": 9.593650793650794e-06, "loss": 33.8981, "step": 12280 }, { "epoch": 292.40597014925373, "grad_norm": 27.102998733520508, "learning_rate": 9.592857142857143e-06, "loss": 33.9211, "step": 12281 }, { "epoch": 292.42985074626864, "grad_norm": 28.44908905029297, "learning_rate": 9.592063492063492e-06, "loss": 34.7022, "step": 12282 }, { "epoch": 292.4537313432836, "grad_norm": 28.945695877075195, "learning_rate": 9.591269841269843e-06, "loss": 34.3976, "step": 12283 }, { "epoch": 292.4776119402985, "grad_norm": 24.871110916137695, "learning_rate": 9.590476190476192e-06, "loss": 34.3921, "step": 12284 }, { "epoch": 292.5014925373134, "grad_norm": 29.340715408325195, "learning_rate": 9.58968253968254e-06, "loss": 34.5926, "step": 12285 }, { "epoch": 292.52537313432833, "grad_norm": 26.397930145263672, "learning_rate": 9.58888888888889e-06, "loss": 34.4733, "step": 12286 }, { "epoch": 292.5492537313433, "grad_norm": 33.7936897277832, "learning_rate": 9.588095238095239e-06, "loss": 33.8797, "step": 12287 }, { "epoch": 292.5731343283582, "grad_norm": 30.71653175354004, "learning_rate": 9.587301587301588e-06, "loss": 34.3851, "step": 12288 }, { "epoch": 292.5970149253731, "grad_norm": 27.971527099609375, "learning_rate": 9.586507936507937e-06, "loss": 33.9437, "step": 12289 }, { "epoch": 292.6208955223881, "grad_norm": 23.75400161743164, "learning_rate": 9.585714285714286e-06, "loss": 34.3425, "step": 12290 }, { "epoch": 292.644776119403, "grad_norm": 28.923099517822266, "learning_rate": 9.584920634920635e-06, "loss": 34.8029, "step": 12291 }, { "epoch": 292.6686567164179, "grad_norm": 24.210174560546875, "learning_rate": 9.584126984126985e-06, "loss": 32.6122, "step": 12292 }, { "epoch": 292.6925373134328, "grad_norm": 28.171342849731445, "learning_rate": 9.583333333333335e-06, "loss": 33.9238, "step": 12293 }, { "epoch": 292.7164179104478, "grad_norm": 24.929515838623047, "learning_rate": 9.582539682539683e-06, "loss": 34.3477, "step": 12294 }, { "epoch": 292.7402985074627, "grad_norm": 31.719621658325195, "learning_rate": 9.581746031746032e-06, "loss": 34.5204, "step": 12295 }, { "epoch": 292.7641791044776, "grad_norm": 30.568191528320312, "learning_rate": 9.580952380952383e-06, "loss": 34.7862, "step": 12296 }, { "epoch": 292.78805970149256, "grad_norm": 28.750465393066406, "learning_rate": 9.58015873015873e-06, "loss": 35.5672, "step": 12297 }, { "epoch": 292.81194029850747, "grad_norm": 26.463260650634766, "learning_rate": 9.57936507936508e-06, "loss": 33.3431, "step": 12298 }, { "epoch": 292.8358208955224, "grad_norm": 27.773862838745117, "learning_rate": 9.57857142857143e-06, "loss": 36.3078, "step": 12299 }, { "epoch": 292.85970149253734, "grad_norm": 22.0357608795166, "learning_rate": 9.577777777777779e-06, "loss": 34.3722, "step": 12300 }, { "epoch": 292.88358208955225, "grad_norm": 30.44091796875, "learning_rate": 9.576984126984128e-06, "loss": 34.0244, "step": 12301 }, { "epoch": 292.90746268656716, "grad_norm": 24.222484588623047, "learning_rate": 9.576190476190477e-06, "loss": 33.6204, "step": 12302 }, { "epoch": 292.93134328358207, "grad_norm": 31.65188980102539, "learning_rate": 9.575396825396826e-06, "loss": 33.8672, "step": 12303 }, { "epoch": 292.95522388059703, "grad_norm": 27.379779815673828, "learning_rate": 9.574603174603175e-06, "loss": 33.9861, "step": 12304 }, { "epoch": 292.97910447761194, "grad_norm": 31.15701675415039, "learning_rate": 9.573809523809524e-06, "loss": 33.571, "step": 12305 }, { "epoch": 293.0, "grad_norm": 21.773134231567383, "learning_rate": 9.573015873015873e-06, "loss": 28.9808, "step": 12306 }, { "epoch": 293.0238805970149, "grad_norm": 30.449079513549805, "learning_rate": 9.572222222222222e-06, "loss": 33.7464, "step": 12307 }, { "epoch": 293.0477611940299, "grad_norm": 28.632801055908203, "learning_rate": 9.571428571428573e-06, "loss": 34.9108, "step": 12308 }, { "epoch": 293.0716417910448, "grad_norm": 30.977502822875977, "learning_rate": 9.57063492063492e-06, "loss": 35.5955, "step": 12309 }, { "epoch": 293.0955223880597, "grad_norm": 26.199066162109375, "learning_rate": 9.56984126984127e-06, "loss": 34.2995, "step": 12310 }, { "epoch": 293.1194029850746, "grad_norm": 29.398496627807617, "learning_rate": 9.56904761904762e-06, "loss": 34.0355, "step": 12311 }, { "epoch": 293.14328358208957, "grad_norm": 26.90253257751465, "learning_rate": 9.56825396825397e-06, "loss": 35.3956, "step": 12312 }, { "epoch": 293.1671641791045, "grad_norm": 28.612627029418945, "learning_rate": 9.567460317460319e-06, "loss": 34.6483, "step": 12313 }, { "epoch": 293.1910447761194, "grad_norm": 23.256914138793945, "learning_rate": 9.566666666666668e-06, "loss": 34.2701, "step": 12314 }, { "epoch": 293.21492537313435, "grad_norm": 29.50980567932129, "learning_rate": 9.565873015873017e-06, "loss": 33.7863, "step": 12315 }, { "epoch": 293.23880597014926, "grad_norm": 24.027650833129883, "learning_rate": 9.565079365079366e-06, "loss": 34.143, "step": 12316 }, { "epoch": 293.26268656716417, "grad_norm": 31.43763542175293, "learning_rate": 9.564285714285715e-06, "loss": 33.8023, "step": 12317 }, { "epoch": 293.28656716417913, "grad_norm": 26.828107833862305, "learning_rate": 9.563492063492064e-06, "loss": 34.472, "step": 12318 }, { "epoch": 293.31044776119404, "grad_norm": NaN, "learning_rate": 9.562698412698413e-06, "loss": 52.0314, "step": 12319 }, { "epoch": 293.33432835820895, "grad_norm": 29.993623733520508, "learning_rate": 9.562698412698413e-06, "loss": 33.4094, "step": 12320 }, { "epoch": 293.35820895522386, "grad_norm": 27.77976417541504, "learning_rate": 9.561904761904762e-06, "loss": 33.8334, "step": 12321 }, { "epoch": 293.3820895522388, "grad_norm": 28.90367317199707, "learning_rate": 9.561111111111113e-06, "loss": 35.6582, "step": 12322 }, { "epoch": 293.40597014925373, "grad_norm": 25.03451919555664, "learning_rate": 9.56031746031746e-06, "loss": 34.8488, "step": 12323 }, { "epoch": 293.42985074626864, "grad_norm": 34.04804992675781, "learning_rate": 9.559523809523811e-06, "loss": 34.9502, "step": 12324 }, { "epoch": 293.4537313432836, "grad_norm": 29.848726272583008, "learning_rate": 9.55873015873016e-06, "loss": 33.6212, "step": 12325 }, { "epoch": 293.4776119402985, "grad_norm": 25.464885711669922, "learning_rate": 9.557936507936508e-06, "loss": 33.1467, "step": 12326 }, { "epoch": 293.5014925373134, "grad_norm": 25.84783935546875, "learning_rate": 9.557142857142858e-06, "loss": 34.1532, "step": 12327 }, { "epoch": 293.52537313432833, "grad_norm": 28.271520614624023, "learning_rate": 9.556349206349207e-06, "loss": 34.3759, "step": 12328 }, { "epoch": 293.5492537313433, "grad_norm": 23.839200973510742, "learning_rate": 9.555555555555556e-06, "loss": 34.8524, "step": 12329 }, { "epoch": 293.5731343283582, "grad_norm": 32.99650573730469, "learning_rate": 9.554761904761906e-06, "loss": 34.4787, "step": 12330 }, { "epoch": 293.5970149253731, "grad_norm": 29.437646865844727, "learning_rate": 9.553968253968255e-06, "loss": 34.0187, "step": 12331 }, { "epoch": 293.6208955223881, "grad_norm": 29.929519653320312, "learning_rate": 9.553174603174604e-06, "loss": 33.5362, "step": 12332 }, { "epoch": 293.644776119403, "grad_norm": 28.433238983154297, "learning_rate": 9.552380952380953e-06, "loss": 35.5923, "step": 12333 }, { "epoch": 293.6686567164179, "grad_norm": 29.59156036376953, "learning_rate": 9.551587301587304e-06, "loss": 34.5687, "step": 12334 }, { "epoch": 293.6925373134328, "grad_norm": 25.87936782836914, "learning_rate": 9.550793650793651e-06, "loss": 35.4809, "step": 12335 }, { "epoch": 293.7164179104478, "grad_norm": 26.75420570373535, "learning_rate": 9.55e-06, "loss": 34.4088, "step": 12336 }, { "epoch": 293.7402985074627, "grad_norm": 24.215700149536133, "learning_rate": 9.54920634920635e-06, "loss": 34.198, "step": 12337 }, { "epoch": 293.7641791044776, "grad_norm": 32.12968826293945, "learning_rate": 9.548412698412698e-06, "loss": 33.7925, "step": 12338 }, { "epoch": 293.78805970149256, "grad_norm": 30.090240478515625, "learning_rate": 9.547619047619049e-06, "loss": 33.5707, "step": 12339 }, { "epoch": 293.81194029850747, "grad_norm": 27.812780380249023, "learning_rate": 9.546825396825398e-06, "loss": 34.3747, "step": 12340 }, { "epoch": 293.8358208955224, "grad_norm": 30.12023162841797, "learning_rate": 9.546031746031747e-06, "loss": 34.0938, "step": 12341 }, { "epoch": 293.85970149253734, "grad_norm": 26.99824333190918, "learning_rate": 9.545238095238096e-06, "loss": 34.3025, "step": 12342 }, { "epoch": 293.88358208955225, "grad_norm": 25.77198028564453, "learning_rate": 9.544444444444445e-06, "loss": 34.9137, "step": 12343 }, { "epoch": 293.90746268656716, "grad_norm": 27.67975616455078, "learning_rate": 9.543650793650794e-06, "loss": 34.4613, "step": 12344 }, { "epoch": 293.93134328358207, "grad_norm": 25.866796493530273, "learning_rate": 9.542857142857143e-06, "loss": 33.6191, "step": 12345 }, { "epoch": 293.95522388059703, "grad_norm": 28.284847259521484, "learning_rate": 9.542063492063493e-06, "loss": 34.1001, "step": 12346 }, { "epoch": 293.97910447761194, "grad_norm": 25.0577392578125, "learning_rate": 9.541269841269842e-06, "loss": 34.3683, "step": 12347 }, { "epoch": 294.0, "grad_norm": 25.284692764282227, "learning_rate": 9.54047619047619e-06, "loss": 29.5702, "step": 12348 }, { "epoch": 294.0238805970149, "grad_norm": 25.21659278869629, "learning_rate": 9.539682539682541e-06, "loss": 33.7699, "step": 12349 }, { "epoch": 294.0477611940299, "grad_norm": 26.633054733276367, "learning_rate": 9.53888888888889e-06, "loss": 34.5498, "step": 12350 }, { "epoch": 294.0716417910448, "grad_norm": 21.414623260498047, "learning_rate": 9.538095238095238e-06, "loss": 34.3403, "step": 12351 }, { "epoch": 294.0955223880597, "grad_norm": 30.119386672973633, "learning_rate": 9.537301587301589e-06, "loss": 34.4893, "step": 12352 }, { "epoch": 294.1194029850746, "grad_norm": 22.80232048034668, "learning_rate": 9.536507936507938e-06, "loss": 34.6263, "step": 12353 }, { "epoch": 294.14328358208957, "grad_norm": 25.87506675720215, "learning_rate": 9.535714285714287e-06, "loss": 33.5805, "step": 12354 }, { "epoch": 294.1671641791045, "grad_norm": NaN, "learning_rate": 9.534920634920636e-06, "loss": 33.8619, "step": 12355 }, { "epoch": 294.1910447761194, "grad_norm": 23.632522583007812, "learning_rate": 9.534920634920636e-06, "loss": 35.0454, "step": 12356 }, { "epoch": 294.21492537313435, "grad_norm": 24.637853622436523, "learning_rate": 9.534126984126985e-06, "loss": 36.1425, "step": 12357 }, { "epoch": 294.23880597014926, "grad_norm": 21.857200622558594, "learning_rate": 9.533333333333334e-06, "loss": 33.8715, "step": 12358 }, { "epoch": 294.26268656716417, "grad_norm": 26.445865631103516, "learning_rate": 9.532539682539683e-06, "loss": 33.0418, "step": 12359 }, { "epoch": 294.28656716417913, "grad_norm": 21.537635803222656, "learning_rate": 9.531746031746032e-06, "loss": 33.6292, "step": 12360 }, { "epoch": 294.31044776119404, "grad_norm": 28.713010787963867, "learning_rate": 9.530952380952381e-06, "loss": 34.4618, "step": 12361 }, { "epoch": 294.33432835820895, "grad_norm": 22.629819869995117, "learning_rate": 9.53015873015873e-06, "loss": 34.7087, "step": 12362 }, { "epoch": 294.35820895522386, "grad_norm": 25.359880447387695, "learning_rate": 9.529365079365081e-06, "loss": 33.8194, "step": 12363 }, { "epoch": 294.3820895522388, "grad_norm": 21.736087799072266, "learning_rate": 9.528571428571429e-06, "loss": 33.602, "step": 12364 }, { "epoch": 294.40597014925373, "grad_norm": 24.244258880615234, "learning_rate": 9.527777777777778e-06, "loss": 34.2988, "step": 12365 }, { "epoch": 294.42985074626864, "grad_norm": 22.63555335998535, "learning_rate": 9.526984126984128e-06, "loss": 34.4116, "step": 12366 }, { "epoch": 294.4537313432836, "grad_norm": 19.558313369750977, "learning_rate": 9.526190476190476e-06, "loss": 33.8121, "step": 12367 }, { "epoch": 294.4776119402985, "grad_norm": 25.274812698364258, "learning_rate": 9.525396825396827e-06, "loss": 35.9436, "step": 12368 }, { "epoch": 294.5014925373134, "grad_norm": 18.29305648803711, "learning_rate": 9.524603174603176e-06, "loss": 34.9848, "step": 12369 }, { "epoch": 294.52537313432833, "grad_norm": 22.77621841430664, "learning_rate": 9.523809523809525e-06, "loss": 33.5784, "step": 12370 }, { "epoch": 294.5492537313433, "grad_norm": 21.122507095336914, "learning_rate": 9.523015873015874e-06, "loss": 34.8318, "step": 12371 }, { "epoch": 294.5731343283582, "grad_norm": 22.661550521850586, "learning_rate": 9.522222222222223e-06, "loss": 35.0228, "step": 12372 }, { "epoch": 294.5970149253731, "grad_norm": 19.22883415222168, "learning_rate": 9.521428571428572e-06, "loss": 34.9678, "step": 12373 }, { "epoch": 294.6208955223881, "grad_norm": 17.760299682617188, "learning_rate": 9.520634920634921e-06, "loss": 32.7356, "step": 12374 }, { "epoch": 294.644776119403, "grad_norm": 17.98216438293457, "learning_rate": 9.51984126984127e-06, "loss": 34.2342, "step": 12375 }, { "epoch": 294.6686567164179, "grad_norm": 16.56303596496582, "learning_rate": 9.51904761904762e-06, "loss": 34.3649, "step": 12376 }, { "epoch": 294.6925373134328, "grad_norm": 20.535871505737305, "learning_rate": 9.518253968253968e-06, "loss": 33.4957, "step": 12377 }, { "epoch": 294.7164179104478, "grad_norm": 17.814687728881836, "learning_rate": 9.517460317460319e-06, "loss": 34.6065, "step": 12378 }, { "epoch": 294.7402985074627, "grad_norm": 18.159772872924805, "learning_rate": 9.516666666666668e-06, "loss": 32.49, "step": 12379 }, { "epoch": 294.7641791044776, "grad_norm": 16.40694808959961, "learning_rate": 9.515873015873016e-06, "loss": 34.4936, "step": 12380 }, { "epoch": 294.78805970149256, "grad_norm": 18.144777297973633, "learning_rate": 9.515079365079366e-06, "loss": 34.502, "step": 12381 }, { "epoch": 294.81194029850747, "grad_norm": 18.975643157958984, "learning_rate": 9.514285714285715e-06, "loss": 34.3534, "step": 12382 }, { "epoch": 294.8358208955224, "grad_norm": 17.128089904785156, "learning_rate": 9.513492063492064e-06, "loss": 33.2332, "step": 12383 }, { "epoch": 294.85970149253734, "grad_norm": 20.600330352783203, "learning_rate": 9.512698412698414e-06, "loss": 34.8824, "step": 12384 }, { "epoch": 294.88358208955225, "grad_norm": 15.444587707519531, "learning_rate": 9.511904761904763e-06, "loss": 35.1217, "step": 12385 }, { "epoch": 294.90746268656716, "grad_norm": 17.063888549804688, "learning_rate": 9.511111111111112e-06, "loss": 34.3084, "step": 12386 }, { "epoch": 294.93134328358207, "grad_norm": 18.895130157470703, "learning_rate": 9.51031746031746e-06, "loss": 34.6135, "step": 12387 }, { "epoch": 294.95522388059703, "grad_norm": 18.868885040283203, "learning_rate": 9.50952380952381e-06, "loss": 34.1046, "step": 12388 }, { "epoch": 294.97910447761194, "grad_norm": 18.9312686920166, "learning_rate": 9.508730158730159e-06, "loss": 35.0173, "step": 12389 }, { "epoch": 295.0, "grad_norm": 13.075404167175293, "learning_rate": 9.507936507936508e-06, "loss": 29.0332, "step": 12390 }, { "epoch": 295.0238805970149, "grad_norm": 17.01565170288086, "learning_rate": 9.507142857142859e-06, "loss": 33.8036, "step": 12391 }, { "epoch": 295.0477611940299, "grad_norm": 18.59445571899414, "learning_rate": 9.506349206349206e-06, "loss": 34.6043, "step": 12392 }, { "epoch": 295.0716417910448, "grad_norm": 15.26381778717041, "learning_rate": 9.505555555555557e-06, "loss": 35.2198, "step": 12393 }, { "epoch": 295.0955223880597, "grad_norm": 21.32588768005371, "learning_rate": 9.504761904761906e-06, "loss": 34.1363, "step": 12394 }, { "epoch": 295.1194029850746, "grad_norm": 17.290464401245117, "learning_rate": 9.503968253968255e-06, "loss": 33.2469, "step": 12395 }, { "epoch": 295.14328358208957, "grad_norm": 19.420989990234375, "learning_rate": 9.503174603174604e-06, "loss": 33.864, "step": 12396 }, { "epoch": 295.1671641791045, "grad_norm": 17.46050262451172, "learning_rate": 9.502380952380953e-06, "loss": 33.4547, "step": 12397 }, { "epoch": 295.1910447761194, "grad_norm": 19.51968765258789, "learning_rate": 9.501587301587302e-06, "loss": 33.0625, "step": 12398 }, { "epoch": 295.21492537313435, "grad_norm": 20.821683883666992, "learning_rate": 9.500793650793651e-06, "loss": 34.7235, "step": 12399 }, { "epoch": 295.23880597014926, "grad_norm": 18.05327033996582, "learning_rate": 9.5e-06, "loss": 34.1115, "step": 12400 }, { "epoch": 295.26268656716417, "grad_norm": 18.052072525024414, "learning_rate": 9.49920634920635e-06, "loss": 33.28, "step": 12401 }, { "epoch": 295.28656716417913, "grad_norm": 21.33596420288086, "learning_rate": 9.498412698412699e-06, "loss": 33.7036, "step": 12402 }, { "epoch": 295.31044776119404, "grad_norm": 18.234445571899414, "learning_rate": 9.49761904761905e-06, "loss": 35.0936, "step": 12403 }, { "epoch": 295.33432835820895, "grad_norm": 18.282062530517578, "learning_rate": 9.496825396825397e-06, "loss": 34.604, "step": 12404 }, { "epoch": 295.35820895522386, "grad_norm": 19.064664840698242, "learning_rate": 9.496031746031746e-06, "loss": 34.7586, "step": 12405 }, { "epoch": 295.3820895522388, "grad_norm": 17.39444923400879, "learning_rate": 9.495238095238097e-06, "loss": 34.7037, "step": 12406 }, { "epoch": 295.40597014925373, "grad_norm": 15.011940002441406, "learning_rate": 9.494444444444446e-06, "loss": 33.2601, "step": 12407 }, { "epoch": 295.42985074626864, "grad_norm": 18.480915069580078, "learning_rate": 9.493650793650795e-06, "loss": 32.9466, "step": 12408 }, { "epoch": 295.4537313432836, "grad_norm": 17.534500122070312, "learning_rate": 9.492857142857144e-06, "loss": 34.6965, "step": 12409 }, { "epoch": 295.4776119402985, "grad_norm": 14.153911590576172, "learning_rate": 9.492063492063493e-06, "loss": 33.6866, "step": 12410 }, { "epoch": 295.5014925373134, "grad_norm": 14.531407356262207, "learning_rate": 9.491269841269842e-06, "loss": 34.527, "step": 12411 }, { "epoch": 295.52537313432833, "grad_norm": 20.134963989257812, "learning_rate": 9.490476190476191e-06, "loss": 34.4852, "step": 12412 }, { "epoch": 295.5492537313433, "grad_norm": 19.988662719726562, "learning_rate": 9.48968253968254e-06, "loss": 36.11, "step": 12413 }, { "epoch": 295.5731343283582, "grad_norm": 13.376730918884277, "learning_rate": 9.48888888888889e-06, "loss": 34.295, "step": 12414 }, { "epoch": 295.5970149253731, "grad_norm": 25.447059631347656, "learning_rate": 9.488095238095238e-06, "loss": 34.694, "step": 12415 }, { "epoch": 295.6208955223881, "grad_norm": 17.149171829223633, "learning_rate": 9.48730158730159e-06, "loss": 33.6649, "step": 12416 }, { "epoch": 295.644776119403, "grad_norm": 22.13737678527832, "learning_rate": 9.486507936507937e-06, "loss": 34.735, "step": 12417 }, { "epoch": 295.6686567164179, "grad_norm": 20.037952423095703, "learning_rate": 9.485714285714287e-06, "loss": 33.5167, "step": 12418 }, { "epoch": 295.6925373134328, "grad_norm": 16.075672149658203, "learning_rate": 9.484920634920636e-06, "loss": 34.4586, "step": 12419 }, { "epoch": 295.7164179104478, "grad_norm": 25.83331298828125, "learning_rate": 9.484126984126984e-06, "loss": 33.9106, "step": 12420 }, { "epoch": 295.7402985074627, "grad_norm": 16.158767700195312, "learning_rate": 9.483333333333335e-06, "loss": 34.7901, "step": 12421 }, { "epoch": 295.7641791044776, "grad_norm": 23.243450164794922, "learning_rate": 9.482539682539684e-06, "loss": 34.4313, "step": 12422 }, { "epoch": 295.78805970149256, "grad_norm": 23.996456146240234, "learning_rate": 9.481746031746033e-06, "loss": 34.7577, "step": 12423 }, { "epoch": 295.81194029850747, "grad_norm": 14.75000286102295, "learning_rate": 9.480952380952382e-06, "loss": 34.4676, "step": 12424 }, { "epoch": 295.8358208955224, "grad_norm": 28.206483840942383, "learning_rate": 9.480158730158731e-06, "loss": 34.217, "step": 12425 }, { "epoch": 295.85970149253734, "grad_norm": 20.71973419189453, "learning_rate": 9.47936507936508e-06, "loss": 34.9382, "step": 12426 }, { "epoch": 295.88358208955225, "grad_norm": 22.474464416503906, "learning_rate": 9.478571428571429e-06, "loss": 34.1382, "step": 12427 }, { "epoch": 295.90746268656716, "grad_norm": 22.20485496520996, "learning_rate": 9.47777777777778e-06, "loss": 34.9758, "step": 12428 }, { "epoch": 295.93134328358207, "grad_norm": 19.13903045654297, "learning_rate": 9.476984126984127e-06, "loss": 33.5434, "step": 12429 }, { "epoch": 295.95522388059703, "grad_norm": 22.864238739013672, "learning_rate": 9.476190476190476e-06, "loss": 34.2639, "step": 12430 }, { "epoch": 295.97910447761194, "grad_norm": 17.49142837524414, "learning_rate": 9.475396825396827e-06, "loss": 34.6608, "step": 12431 }, { "epoch": 296.0, "grad_norm": 23.418718338012695, "learning_rate": 9.474603174603174e-06, "loss": 29.8988, "step": 12432 }, { "epoch": 296.0238805970149, "grad_norm": 17.08441925048828, "learning_rate": 9.473809523809525e-06, "loss": 33.9752, "step": 12433 }, { "epoch": 296.0477611940299, "grad_norm": 23.049734115600586, "learning_rate": 9.473015873015874e-06, "loss": 34.2149, "step": 12434 }, { "epoch": 296.0716417910448, "grad_norm": 20.94700813293457, "learning_rate": 9.472222222222223e-06, "loss": 33.7319, "step": 12435 }, { "epoch": 296.0955223880597, "grad_norm": 16.3214054107666, "learning_rate": 9.471428571428572e-06, "loss": 35.0182, "step": 12436 }, { "epoch": 296.1194029850746, "grad_norm": 26.8597469329834, "learning_rate": 9.470634920634922e-06, "loss": 33.7682, "step": 12437 }, { "epoch": 296.14328358208957, "grad_norm": 17.673561096191406, "learning_rate": 9.46984126984127e-06, "loss": 34.1338, "step": 12438 }, { "epoch": 296.1671641791045, "grad_norm": 27.371057510375977, "learning_rate": 9.46904761904762e-06, "loss": 33.285, "step": 12439 }, { "epoch": 296.1910447761194, "grad_norm": 20.176958084106445, "learning_rate": 9.468253968253969e-06, "loss": 35.1346, "step": 12440 }, { "epoch": 296.21492537313435, "grad_norm": 22.260957717895508, "learning_rate": 9.467460317460318e-06, "loss": 34.0694, "step": 12441 }, { "epoch": 296.23880597014926, "grad_norm": 21.492176055908203, "learning_rate": 9.466666666666667e-06, "loss": 35.1014, "step": 12442 }, { "epoch": 296.26268656716417, "grad_norm": 15.671597480773926, "learning_rate": 9.465873015873016e-06, "loss": 34.8614, "step": 12443 }, { "epoch": 296.28656716417913, "grad_norm": 23.840309143066406, "learning_rate": 9.465079365079367e-06, "loss": 34.1475, "step": 12444 }, { "epoch": 296.31044776119404, "grad_norm": 18.10795021057129, "learning_rate": 9.464285714285714e-06, "loss": 34.7024, "step": 12445 }, { "epoch": 296.33432835820895, "grad_norm": 17.174304962158203, "learning_rate": 9.463492063492065e-06, "loss": 33.6018, "step": 12446 }, { "epoch": 296.35820895522386, "grad_norm": 21.586544036865234, "learning_rate": 9.462698412698414e-06, "loss": 33.4138, "step": 12447 }, { "epoch": 296.3820895522388, "grad_norm": 15.472028732299805, "learning_rate": 9.461904761904761e-06, "loss": 34.4597, "step": 12448 }, { "epoch": 296.40597014925373, "grad_norm": 18.487974166870117, "learning_rate": 9.461111111111112e-06, "loss": 33.473, "step": 12449 }, { "epoch": 296.42985074626864, "grad_norm": 16.693988800048828, "learning_rate": 9.460317460317461e-06, "loss": 35.647, "step": 12450 }, { "epoch": 296.4537313432836, "grad_norm": 16.25507926940918, "learning_rate": 9.45952380952381e-06, "loss": 35.8971, "step": 12451 }, { "epoch": 296.4776119402985, "grad_norm": 15.41960334777832, "learning_rate": 9.45873015873016e-06, "loss": 34.6937, "step": 12452 }, { "epoch": 296.5014925373134, "grad_norm": 16.79047393798828, "learning_rate": 9.457936507936509e-06, "loss": 34.3645, "step": 12453 }, { "epoch": 296.52537313432833, "grad_norm": 17.38849639892578, "learning_rate": 9.457142857142858e-06, "loss": 34.0681, "step": 12454 }, { "epoch": 296.5492537313433, "grad_norm": 17.246997833251953, "learning_rate": 9.456349206349207e-06, "loss": 34.1353, "step": 12455 }, { "epoch": 296.5731343283582, "grad_norm": 22.749364852905273, "learning_rate": 9.455555555555557e-06, "loss": 34.0916, "step": 12456 }, { "epoch": 296.5970149253731, "grad_norm": 17.064678192138672, "learning_rate": 9.454761904761905e-06, "loss": 33.6057, "step": 12457 }, { "epoch": 296.6208955223881, "grad_norm": 17.87356185913086, "learning_rate": 9.453968253968254e-06, "loss": 33.3623, "step": 12458 }, { "epoch": 296.644776119403, "grad_norm": 18.962120056152344, "learning_rate": 9.453174603174605e-06, "loss": 33.5192, "step": 12459 }, { "epoch": 296.6686567164179, "grad_norm": 18.000173568725586, "learning_rate": 9.452380952380952e-06, "loss": 35.1714, "step": 12460 }, { "epoch": 296.6925373134328, "grad_norm": 13.997823715209961, "learning_rate": 9.451587301587303e-06, "loss": 33.7567, "step": 12461 }, { "epoch": 296.7164179104478, "grad_norm": 14.310981750488281, "learning_rate": 9.450793650793652e-06, "loss": 34.7802, "step": 12462 }, { "epoch": 296.7402985074627, "grad_norm": 15.097182273864746, "learning_rate": 9.450000000000001e-06, "loss": 33.7409, "step": 12463 }, { "epoch": 296.7641791044776, "grad_norm": 21.005247116088867, "learning_rate": 9.44920634920635e-06, "loss": 34.2042, "step": 12464 }, { "epoch": 296.78805970149256, "grad_norm": 16.77436637878418, "learning_rate": 9.4484126984127e-06, "loss": 34.5884, "step": 12465 }, { "epoch": 296.81194029850747, "grad_norm": 16.423629760742188, "learning_rate": 9.447619047619048e-06, "loss": 35.0852, "step": 12466 }, { "epoch": 296.8358208955224, "grad_norm": 13.942682266235352, "learning_rate": 9.446825396825397e-06, "loss": 34.443, "step": 12467 }, { "epoch": 296.85970149253734, "grad_norm": 16.88393783569336, "learning_rate": 9.446031746031746e-06, "loss": 34.1228, "step": 12468 }, { "epoch": 296.88358208955225, "grad_norm": 13.906193733215332, "learning_rate": 9.445238095238095e-06, "loss": 34.0372, "step": 12469 }, { "epoch": 296.90746268656716, "grad_norm": 19.061094284057617, "learning_rate": 9.444444444444445e-06, "loss": 33.0478, "step": 12470 }, { "epoch": 296.93134328358207, "grad_norm": 15.234111785888672, "learning_rate": 9.443650793650795e-06, "loss": 33.4145, "step": 12471 }, { "epoch": 296.95522388059703, "grad_norm": 17.861047744750977, "learning_rate": 9.442857142857144e-06, "loss": 33.4059, "step": 12472 }, { "epoch": 296.97910447761194, "grad_norm": 16.07079315185547, "learning_rate": 9.442063492063492e-06, "loss": 34.8257, "step": 12473 }, { "epoch": 297.0, "grad_norm": NaN, "learning_rate": 9.441269841269843e-06, "loss": 29.9155, "step": 12474 }, { "epoch": 297.0238805970149, "grad_norm": 20.782690048217773, "learning_rate": 9.441269841269843e-06, "loss": 33.9278, "step": 12475 }, { "epoch": 297.0477611940299, "grad_norm": 15.933671951293945, "learning_rate": 9.440476190476192e-06, "loss": 33.9953, "step": 12476 }, { "epoch": 297.0716417910448, "grad_norm": 18.559167861938477, "learning_rate": 9.43968253968254e-06, "loss": 34.4357, "step": 12477 }, { "epoch": 297.0955223880597, "grad_norm": 23.417346954345703, "learning_rate": 9.43888888888889e-06, "loss": 35.0378, "step": 12478 }, { "epoch": 297.1194029850746, "grad_norm": 14.970905303955078, "learning_rate": 9.438095238095239e-06, "loss": 33.9518, "step": 12479 }, { "epoch": 297.14328358208957, "grad_norm": 18.455663681030273, "learning_rate": 9.437301587301588e-06, "loss": 35.023, "step": 12480 }, { "epoch": 297.1671641791045, "grad_norm": 21.78778839111328, "learning_rate": 9.436507936507937e-06, "loss": 34.0907, "step": 12481 }, { "epoch": 297.1910447761194, "grad_norm": 14.536425590515137, "learning_rate": 9.435714285714286e-06, "loss": 32.558, "step": 12482 }, { "epoch": 297.21492537313435, "grad_norm": 23.07076072692871, "learning_rate": 9.434920634920635e-06, "loss": 34.0558, "step": 12483 }, { "epoch": 297.23880597014926, "grad_norm": 22.406545639038086, "learning_rate": 9.434126984126984e-06, "loss": 34.8833, "step": 12484 }, { "epoch": 297.26268656716417, "grad_norm": 15.541619300842285, "learning_rate": 9.433333333333335e-06, "loss": 34.3949, "step": 12485 }, { "epoch": 297.28656716417913, "grad_norm": 32.484676361083984, "learning_rate": 9.432539682539682e-06, "loss": 34.3982, "step": 12486 }, { "epoch": 297.31044776119404, "grad_norm": 18.681869506835938, "learning_rate": 9.431746031746033e-06, "loss": 33.5947, "step": 12487 }, { "epoch": 297.33432835820895, "grad_norm": 27.216064453125, "learning_rate": 9.430952380952382e-06, "loss": 34.0555, "step": 12488 }, { "epoch": 297.35820895522386, "grad_norm": 20.63875961303711, "learning_rate": 9.43015873015873e-06, "loss": 35.0615, "step": 12489 }, { "epoch": 297.3820895522388, "grad_norm": 30.936704635620117, "learning_rate": 9.42936507936508e-06, "loss": 33.7986, "step": 12490 }, { "epoch": 297.40597014925373, "grad_norm": 19.177112579345703, "learning_rate": 9.42857142857143e-06, "loss": 33.8598, "step": 12491 }, { "epoch": 297.42985074626864, "grad_norm": 29.62114143371582, "learning_rate": 9.427777777777779e-06, "loss": 34.225, "step": 12492 }, { "epoch": 297.4537313432836, "grad_norm": 18.298412322998047, "learning_rate": 9.426984126984128e-06, "loss": 34.0888, "step": 12493 }, { "epoch": 297.4776119402985, "grad_norm": 30.48539161682129, "learning_rate": 9.426190476190477e-06, "loss": 33.1199, "step": 12494 }, { "epoch": 297.5014925373134, "grad_norm": 19.66048812866211, "learning_rate": 9.425396825396826e-06, "loss": 34.8943, "step": 12495 }, { "epoch": 297.52537313432833, "grad_norm": 24.55680274963379, "learning_rate": 9.424603174603175e-06, "loss": 33.9494, "step": 12496 }, { "epoch": 297.5492537313433, "grad_norm": 21.85373878479004, "learning_rate": 9.423809523809526e-06, "loss": 34.5054, "step": 12497 }, { "epoch": 297.5731343283582, "grad_norm": 21.17949104309082, "learning_rate": 9.423015873015873e-06, "loss": 33.2735, "step": 12498 }, { "epoch": 297.5970149253731, "grad_norm": 26.658388137817383, "learning_rate": 9.422222222222222e-06, "loss": 35.2951, "step": 12499 }, { "epoch": 297.6208955223881, "grad_norm": 19.48539924621582, "learning_rate": 9.421428571428573e-06, "loss": 34.0202, "step": 12500 }, { "epoch": 297.644776119403, "grad_norm": 33.12431335449219, "learning_rate": 9.420634920634922e-06, "loss": 34.7648, "step": 12501 }, { "epoch": 297.6686567164179, "grad_norm": 25.690244674682617, "learning_rate": 9.419841269841271e-06, "loss": 32.8813, "step": 12502 }, { "epoch": 297.6925373134328, "grad_norm": 34.491947174072266, "learning_rate": 9.41904761904762e-06, "loss": 34.0445, "step": 12503 }, { "epoch": 297.7164179104478, "grad_norm": 31.598169326782227, "learning_rate": 9.41825396825397e-06, "loss": 35.0873, "step": 12504 }, { "epoch": 297.7402985074627, "grad_norm": 29.2357177734375, "learning_rate": 9.417460317460318e-06, "loss": 33.3787, "step": 12505 }, { "epoch": 297.7641791044776, "grad_norm": 24.363779067993164, "learning_rate": 9.416666666666667e-06, "loss": 35.3853, "step": 12506 }, { "epoch": 297.78805970149256, "grad_norm": 28.963375091552734, "learning_rate": 9.415873015873017e-06, "loss": 34.1984, "step": 12507 }, { "epoch": 297.81194029850747, "grad_norm": 23.006086349487305, "learning_rate": 9.415079365079366e-06, "loss": 34.2327, "step": 12508 }, { "epoch": 297.8358208955224, "grad_norm": 39.04518508911133, "learning_rate": 9.414285714285715e-06, "loss": 32.6477, "step": 12509 }, { "epoch": 297.85970149253734, "grad_norm": 33.81496810913086, "learning_rate": 9.413492063492064e-06, "loss": 33.4368, "step": 12510 }, { "epoch": 297.88358208955225, "grad_norm": 32.090091705322266, "learning_rate": 9.412698412698413e-06, "loss": 34.1932, "step": 12511 }, { "epoch": 297.90746268656716, "grad_norm": 30.99967384338379, "learning_rate": 9.411904761904764e-06, "loss": 34.3895, "step": 12512 }, { "epoch": 297.93134328358207, "grad_norm": 28.09487533569336, "learning_rate": 9.411111111111113e-06, "loss": 34.3309, "step": 12513 }, { "epoch": 297.95522388059703, "grad_norm": 25.880022048950195, "learning_rate": 9.41031746031746e-06, "loss": 34.4944, "step": 12514 }, { "epoch": 297.97910447761194, "grad_norm": 30.924030303955078, "learning_rate": 9.40952380952381e-06, "loss": 34.2899, "step": 12515 }, { "epoch": 298.0, "grad_norm": 24.302059173583984, "learning_rate": 9.40873015873016e-06, "loss": 30.3359, "step": 12516 }, { "epoch": 298.0238805970149, "grad_norm": 29.897274017333984, "learning_rate": 9.407936507936509e-06, "loss": 34.8396, "step": 12517 }, { "epoch": 298.0477611940299, "grad_norm": 26.808429718017578, "learning_rate": 9.407142857142858e-06, "loss": 33.7162, "step": 12518 }, { "epoch": 298.0716417910448, "grad_norm": 29.109149932861328, "learning_rate": 9.406349206349207e-06, "loss": 34.579, "step": 12519 }, { "epoch": 298.0955223880597, "grad_norm": 27.8997745513916, "learning_rate": 9.405555555555556e-06, "loss": 34.2835, "step": 12520 }, { "epoch": 298.1194029850746, "grad_norm": 29.360164642333984, "learning_rate": 9.404761904761905e-06, "loss": 35.318, "step": 12521 }, { "epoch": 298.14328358208957, "grad_norm": 29.223485946655273, "learning_rate": 9.403968253968254e-06, "loss": 33.7443, "step": 12522 }, { "epoch": 298.1671641791045, "grad_norm": 28.036426544189453, "learning_rate": 9.403174603174603e-06, "loss": 34.0103, "step": 12523 }, { "epoch": 298.1910447761194, "grad_norm": 23.568742752075195, "learning_rate": 9.402380952380953e-06, "loss": 33.8043, "step": 12524 }, { "epoch": 298.21492537313435, "grad_norm": 32.28895568847656, "learning_rate": 9.401587301587303e-06, "loss": 33.552, "step": 12525 }, { "epoch": 298.23880597014926, "grad_norm": 28.25148582458496, "learning_rate": 9.40079365079365e-06, "loss": 35.2862, "step": 12526 }, { "epoch": 298.26268656716417, "grad_norm": 31.405595779418945, "learning_rate": 9.4e-06, "loss": 33.9998, "step": 12527 }, { "epoch": 298.28656716417913, "grad_norm": 28.964921951293945, "learning_rate": 9.39920634920635e-06, "loss": 33.4029, "step": 12528 }, { "epoch": 298.31044776119404, "grad_norm": 29.21355628967285, "learning_rate": 9.3984126984127e-06, "loss": 34.1061, "step": 12529 }, { "epoch": 298.33432835820895, "grad_norm": 27.861732482910156, "learning_rate": 9.397619047619049e-06, "loss": 34.6255, "step": 12530 }, { "epoch": 298.35820895522386, "grad_norm": 30.02569580078125, "learning_rate": 9.396825396825398e-06, "loss": 34.3636, "step": 12531 }, { "epoch": 298.3820895522388, "grad_norm": 26.379541397094727, "learning_rate": 9.396031746031747e-06, "loss": 33.8475, "step": 12532 }, { "epoch": 298.40597014925373, "grad_norm": 29.752214431762695, "learning_rate": 9.395238095238096e-06, "loss": 33.6342, "step": 12533 }, { "epoch": 298.42985074626864, "grad_norm": 26.6483211517334, "learning_rate": 9.394444444444445e-06, "loss": 33.9557, "step": 12534 }, { "epoch": 298.4537313432836, "grad_norm": 30.56734275817871, "learning_rate": 9.393650793650794e-06, "loss": 32.5809, "step": 12535 }, { "epoch": 298.4776119402985, "grad_norm": 25.45297622680664, "learning_rate": 9.392857142857143e-06, "loss": 34.1128, "step": 12536 }, { "epoch": 298.5014925373134, "grad_norm": 31.98762321472168, "learning_rate": 9.392063492063492e-06, "loss": 33.5747, "step": 12537 }, { "epoch": 298.52537313432833, "grad_norm": 28.247699737548828, "learning_rate": 9.391269841269843e-06, "loss": 34.8701, "step": 12538 }, { "epoch": 298.5492537313433, "grad_norm": 27.908994674682617, "learning_rate": 9.39047619047619e-06, "loss": 34.1208, "step": 12539 }, { "epoch": 298.5731343283582, "grad_norm": 30.79598045349121, "learning_rate": 9.389682539682541e-06, "loss": 35.3856, "step": 12540 }, { "epoch": 298.5970149253731, "grad_norm": 27.6414737701416, "learning_rate": 9.38888888888889e-06, "loss": 34.36, "step": 12541 }, { "epoch": 298.6208955223881, "grad_norm": 26.428972244262695, "learning_rate": 9.388095238095238e-06, "loss": 33.9819, "step": 12542 }, { "epoch": 298.644776119403, "grad_norm": 33.0452766418457, "learning_rate": 9.387301587301588e-06, "loss": 33.901, "step": 12543 }, { "epoch": 298.6686567164179, "grad_norm": 27.6353816986084, "learning_rate": 9.386507936507938e-06, "loss": 33.7931, "step": 12544 }, { "epoch": 298.6925373134328, "grad_norm": 27.583881378173828, "learning_rate": 9.385714285714287e-06, "loss": 33.8017, "step": 12545 }, { "epoch": 298.7164179104478, "grad_norm": 28.089115142822266, "learning_rate": 9.384920634920636e-06, "loss": 34.1774, "step": 12546 }, { "epoch": 298.7402985074627, "grad_norm": 24.383211135864258, "learning_rate": 9.384126984126985e-06, "loss": 34.4093, "step": 12547 }, { "epoch": 298.7641791044776, "grad_norm": 23.508140563964844, "learning_rate": 9.383333333333334e-06, "loss": 33.464, "step": 12548 }, { "epoch": 298.78805970149256, "grad_norm": 30.98582649230957, "learning_rate": 9.382539682539683e-06, "loss": 34.3285, "step": 12549 }, { "epoch": 298.81194029850747, "grad_norm": 24.830345153808594, "learning_rate": 9.381746031746034e-06, "loss": 34.1048, "step": 12550 }, { "epoch": 298.8358208955224, "grad_norm": 32.89422607421875, "learning_rate": 9.380952380952381e-06, "loss": 34.069, "step": 12551 }, { "epoch": 298.85970149253734, "grad_norm": 27.040531158447266, "learning_rate": 9.38015873015873e-06, "loss": 34.6239, "step": 12552 }, { "epoch": 298.88358208955225, "grad_norm": 30.2327938079834, "learning_rate": 9.379365079365081e-06, "loss": 34.2197, "step": 12553 }, { "epoch": 298.90746268656716, "grad_norm": 27.648653030395508, "learning_rate": 9.378571428571428e-06, "loss": 34.4678, "step": 12554 }, { "epoch": 298.93134328358207, "grad_norm": 28.97263526916504, "learning_rate": 9.377777777777779e-06, "loss": 33.818, "step": 12555 }, { "epoch": 298.95522388059703, "grad_norm": 26.15717315673828, "learning_rate": 9.376984126984128e-06, "loss": 34.3434, "step": 12556 }, { "epoch": 298.97910447761194, "grad_norm": 29.58806800842285, "learning_rate": 9.376190476190477e-06, "loss": 33.3861, "step": 12557 }, { "epoch": 299.0, "grad_norm": 22.08994483947754, "learning_rate": 9.375396825396826e-06, "loss": 29.2248, "step": 12558 }, { "epoch": 299.0238805970149, "grad_norm": 26.545316696166992, "learning_rate": 9.374603174603175e-06, "loss": 34.4542, "step": 12559 }, { "epoch": 299.0477611940299, "grad_norm": 22.460620880126953, "learning_rate": 9.373809523809524e-06, "loss": 33.5026, "step": 12560 }, { "epoch": 299.0716417910448, "grad_norm": 31.822744369506836, "learning_rate": 9.373015873015874e-06, "loss": 35.5345, "step": 12561 }, { "epoch": 299.0955223880597, "grad_norm": 26.98124885559082, "learning_rate": 9.372222222222223e-06, "loss": 34.5696, "step": 12562 }, { "epoch": 299.1194029850746, "grad_norm": 31.908985137939453, "learning_rate": 9.371428571428572e-06, "loss": 34.0203, "step": 12563 }, { "epoch": 299.14328358208957, "grad_norm": 27.260379791259766, "learning_rate": 9.37063492063492e-06, "loss": 34.0293, "step": 12564 }, { "epoch": 299.1671641791045, "grad_norm": 27.416215896606445, "learning_rate": 9.369841269841272e-06, "loss": 34.4288, "step": 12565 }, { "epoch": 299.1910447761194, "grad_norm": 22.469879150390625, "learning_rate": 9.36904761904762e-06, "loss": 34.167, "step": 12566 }, { "epoch": 299.21492537313435, "grad_norm": 29.78142738342285, "learning_rate": 9.368253968253968e-06, "loss": 33.9699, "step": 12567 }, { "epoch": 299.23880597014926, "grad_norm": 26.320222854614258, "learning_rate": 9.367460317460319e-06, "loss": 33.7922, "step": 12568 }, { "epoch": 299.26268656716417, "grad_norm": 31.120466232299805, "learning_rate": 9.366666666666668e-06, "loss": 34.5581, "step": 12569 }, { "epoch": 299.28656716417913, "grad_norm": 28.603199005126953, "learning_rate": 9.365873015873017e-06, "loss": 34.4757, "step": 12570 }, { "epoch": 299.31044776119404, "grad_norm": 25.24419403076172, "learning_rate": 9.365079365079366e-06, "loss": 34.3327, "step": 12571 }, { "epoch": 299.33432835820895, "grad_norm": 22.70826530456543, "learning_rate": 9.364285714285715e-06, "loss": 32.9677, "step": 12572 }, { "epoch": 299.35820895522386, "grad_norm": 27.052648544311523, "learning_rate": 9.363492063492064e-06, "loss": 33.9781, "step": 12573 }, { "epoch": 299.3820895522388, "grad_norm": 21.436756134033203, "learning_rate": 9.362698412698413e-06, "loss": 33.6712, "step": 12574 }, { "epoch": 299.40597014925373, "grad_norm": 32.069488525390625, "learning_rate": 9.361904761904762e-06, "loss": 33.2873, "step": 12575 }, { "epoch": 299.42985074626864, "grad_norm": 25.22380828857422, "learning_rate": 9.361111111111111e-06, "loss": 34.0121, "step": 12576 }, { "epoch": 299.4537313432836, "grad_norm": 27.944772720336914, "learning_rate": 9.36031746031746e-06, "loss": 34.542, "step": 12577 }, { "epoch": 299.4776119402985, "grad_norm": NaN, "learning_rate": 9.359523809523811e-06, "loss": 29.7356, "step": 12578 }, { "epoch": 299.5014925373134, "grad_norm": 27.564472198486328, "learning_rate": 9.359523809523811e-06, "loss": 34.4153, "step": 12579 }, { "epoch": 299.52537313432833, "grad_norm": 29.498014450073242, "learning_rate": 9.358730158730159e-06, "loss": 33.3341, "step": 12580 }, { "epoch": 299.5492537313433, "grad_norm": 23.420347213745117, "learning_rate": 9.35793650793651e-06, "loss": 34.2089, "step": 12581 }, { "epoch": 299.5731343283582, "grad_norm": 31.40555191040039, "learning_rate": 9.357142857142859e-06, "loss": 34.7442, "step": 12582 }, { "epoch": 299.5970149253731, "grad_norm": 27.798049926757812, "learning_rate": 9.356349206349206e-06, "loss": 33.1066, "step": 12583 }, { "epoch": 299.6208955223881, "grad_norm": 30.39460563659668, "learning_rate": 9.355555555555557e-06, "loss": 34.2612, "step": 12584 }, { "epoch": 299.644776119403, "grad_norm": 28.282512664794922, "learning_rate": 9.354761904761906e-06, "loss": 34.7089, "step": 12585 }, { "epoch": 299.6686567164179, "grad_norm": 25.006746292114258, "learning_rate": 9.353968253968255e-06, "loss": 33.8158, "step": 12586 }, { "epoch": 299.6925373134328, "grad_norm": 24.87606430053711, "learning_rate": 9.353174603174604e-06, "loss": 33.8517, "step": 12587 }, { "epoch": 299.7164179104478, "grad_norm": 28.879676818847656, "learning_rate": 9.352380952380953e-06, "loss": 33.885, "step": 12588 }, { "epoch": 299.7402985074627, "grad_norm": 23.082223892211914, "learning_rate": 9.351587301587302e-06, "loss": 35.6958, "step": 12589 }, { "epoch": 299.7641791044776, "grad_norm": 28.886215209960938, "learning_rate": 9.350793650793651e-06, "loss": 33.1954, "step": 12590 }, { "epoch": 299.78805970149256, "grad_norm": 22.496732711791992, "learning_rate": 9.350000000000002e-06, "loss": 34.3587, "step": 12591 }, { "epoch": 299.81194029850747, "grad_norm": 28.374666213989258, "learning_rate": 9.34920634920635e-06, "loss": 34.6024, "step": 12592 }, { "epoch": 299.8358208955224, "grad_norm": 23.658954620361328, "learning_rate": 9.348412698412698e-06, "loss": 33.771, "step": 12593 }, { "epoch": 299.85970149253734, "grad_norm": 29.819311141967773, "learning_rate": 9.34761904761905e-06, "loss": 34.2294, "step": 12594 }, { "epoch": 299.88358208955225, "grad_norm": 23.745912551879883, "learning_rate": 9.346825396825398e-06, "loss": 34.2839, "step": 12595 }, { "epoch": 299.90746268656716, "grad_norm": 31.37458038330078, "learning_rate": 9.346031746031747e-06, "loss": 34.0879, "step": 12596 }, { "epoch": 299.93134328358207, "grad_norm": 26.10491180419922, "learning_rate": 9.345238095238096e-06, "loss": 32.6205, "step": 12597 }, { "epoch": 299.95522388059703, "grad_norm": 26.289323806762695, "learning_rate": 9.344444444444446e-06, "loss": 34.3294, "step": 12598 }, { "epoch": 299.97910447761194, "grad_norm": NaN, "learning_rate": 9.343650793650795e-06, "loss": 42.9851, "step": 12599 }, { "epoch": 300.0, "grad_norm": 21.931875228881836, "learning_rate": 9.343650793650795e-06, "loss": 29.5696, "step": 12600 }, { "epoch": 300.0, "step": 12600, "total_flos": 6.194079361991644e+17, "train_loss": 2.3002554760282004, "train_runtime": 25617.7034, "train_samples_per_second": 62.675, "train_steps_per_second": 0.492 }, { "epoch": 300.0238805970149, "grad_norm": 26.171287536621094, "learning_rate": 1e-05, "loss": 35.2852, "step": 12601 }, { "epoch": 300.0477611940299, "grad_norm": Infinity, "learning_rate": 9.999255952380954e-06, "loss": 39.4791, "step": 12602 }, { "epoch": 300.0716417910448, "grad_norm": Infinity, "learning_rate": 9.999255952380954e-06, "loss": 41.0054, "step": 12603 }, { "epoch": 300.0955223880597, "grad_norm": 366.8433837890625, "learning_rate": 9.999255952380954e-06, "loss": 40.4224, "step": 12604 }, { "epoch": 300.1194029850746, "grad_norm": 206.70486450195312, "learning_rate": 9.998511904761904e-06, "loss": 38.5891, "step": 12605 }, { "epoch": 300.14328358208957, "grad_norm": 90.80491638183594, "learning_rate": 9.997767857142859e-06, "loss": 36.7585, "step": 12606 }, { "epoch": 300.1671641791045, "grad_norm": 69.32449340820312, "learning_rate": 9.99702380952381e-06, "loss": 36.3685, "step": 12607 }, { "epoch": 300.1910447761194, "grad_norm": 60.954158782958984, "learning_rate": 9.996279761904763e-06, "loss": 35.251, "step": 12608 }, { "epoch": 300.21492537313435, "grad_norm": 49.41020202636719, "learning_rate": 9.995535714285715e-06, "loss": 34.9641, "step": 12609 }, { "epoch": 300.23880597014926, "grad_norm": 48.63925552368164, "learning_rate": 9.994791666666668e-06, "loss": 35.2815, "step": 12610 }, { "epoch": 300.26268656716417, "grad_norm": 33.32303237915039, "learning_rate": 9.99404761904762e-06, "loss": 33.8647, "step": 12611 }, { "epoch": 300.28656716417913, "grad_norm": 34.75282669067383, "learning_rate": 9.993303571428572e-06, "loss": 33.7036, "step": 12612 }, { "epoch": 300.31044776119404, "grad_norm": 31.14154815673828, "learning_rate": 9.992559523809524e-06, "loss": 35.0993, "step": 12613 }, { "epoch": 300.33432835820895, "grad_norm": 25.557985305786133, "learning_rate": 9.991815476190477e-06, "loss": 34.5335, "step": 12614 }, { "epoch": 300.35820895522386, "grad_norm": 28.474023818969727, "learning_rate": 9.99107142857143e-06, "loss": 32.82, "step": 12615 }, { "epoch": 300.3820895522388, "grad_norm": 22.004846572875977, "learning_rate": 9.990327380952382e-06, "loss": 34.1407, "step": 12616 }, { "epoch": 300.40597014925373, "grad_norm": 22.144994735717773, "learning_rate": 9.989583333333333e-06, "loss": 33.5889, "step": 12617 }, { "epoch": 300.42985074626864, "grad_norm": 19.678911209106445, "learning_rate": 9.988839285714286e-06, "loss": 33.8089, "step": 12618 }, { "epoch": 300.4537313432836, "grad_norm": 18.216703414916992, "learning_rate": 9.988095238095239e-06, "loss": 34.4819, "step": 12619 }, { "epoch": 300.4776119402985, "grad_norm": 19.384990692138672, "learning_rate": 9.987351190476191e-06, "loss": 33.4617, "step": 12620 }, { "epoch": 300.5014925373134, "grad_norm": 19.665416717529297, "learning_rate": 9.986607142857142e-06, "loss": 33.7936, "step": 12621 }, { "epoch": 300.52537313432833, "grad_norm": 19.364046096801758, "learning_rate": 9.985863095238097e-06, "loss": 33.9958, "step": 12622 }, { "epoch": 300.5492537313433, "grad_norm": 16.984670639038086, "learning_rate": 9.985119047619048e-06, "loss": 34.3715, "step": 12623 }, { "epoch": 300.5731343283582, "grad_norm": 20.104597091674805, "learning_rate": 9.984375e-06, "loss": 34.824, "step": 12624 }, { "epoch": 300.5970149253731, "grad_norm": 19.9632511138916, "learning_rate": 9.983630952380953e-06, "loss": 34.0996, "step": 12625 }, { "epoch": 300.6208955223881, "grad_norm": 19.555803298950195, "learning_rate": 9.982886904761906e-06, "loss": 33.1285, "step": 12626 }, { "epoch": 300.644776119403, "grad_norm": 14.718427658081055, "learning_rate": 9.982142857142858e-06, "loss": 33.6884, "step": 12627 }, { "epoch": 300.6686567164179, "grad_norm": 18.065269470214844, "learning_rate": 9.98139880952381e-06, "loss": 32.808, "step": 12628 }, { "epoch": 300.6925373134328, "grad_norm": 27.190515518188477, "learning_rate": 9.980654761904762e-06, "loss": 33.6793, "step": 12629 }, { "epoch": 300.7164179104478, "grad_norm": 18.258726119995117, "learning_rate": 9.979910714285715e-06, "loss": 34.8021, "step": 12630 }, { "epoch": 300.7402985074627, "grad_norm": 19.380048751831055, "learning_rate": 9.979166666666668e-06, "loss": 34.4981, "step": 12631 }, { "epoch": 300.7641791044776, "grad_norm": 25.334543228149414, "learning_rate": 9.97842261904762e-06, "loss": 34.8994, "step": 12632 }, { "epoch": 300.78805970149256, "grad_norm": 15.441366195678711, "learning_rate": 9.977678571428571e-06, "loss": 33.9724, "step": 12633 }, { "epoch": 300.81194029850747, "grad_norm": 16.614748001098633, "learning_rate": 9.976934523809526e-06, "loss": 34.1694, "step": 12634 }, { "epoch": 300.8358208955224, "grad_norm": 16.37385368347168, "learning_rate": 9.976190476190477e-06, "loss": 33.9853, "step": 12635 }, { "epoch": 300.85970149253734, "grad_norm": 17.56476593017578, "learning_rate": 9.97544642857143e-06, "loss": 35.3559, "step": 12636 }, { "epoch": 300.88358208955225, "grad_norm": 15.733250617980957, "learning_rate": 9.974702380952382e-06, "loss": 33.4581, "step": 12637 }, { "epoch": 300.90746268656716, "grad_norm": 17.142967224121094, "learning_rate": 9.973958333333335e-06, "loss": 34.729, "step": 12638 }, { "epoch": 300.93134328358207, "grad_norm": 23.365276336669922, "learning_rate": 9.973214285714287e-06, "loss": 35.2895, "step": 12639 }, { "epoch": 300.95522388059703, "grad_norm": 16.783174514770508, "learning_rate": 9.972470238095238e-06, "loss": 34.4815, "step": 12640 }, { "epoch": 300.97910447761194, "grad_norm": 13.008834838867188, "learning_rate": 9.971726190476191e-06, "loss": 35.215, "step": 12641 }, { "epoch": 301.0, "grad_norm": 12.587215423583984, "learning_rate": 9.970982142857144e-06, "loss": 30.3054, "step": 12642 }, { "epoch": 301.0238805970149, "grad_norm": 15.930685043334961, "learning_rate": 9.970238095238096e-06, "loss": 33.6951, "step": 12643 }, { "epoch": 301.0477611940299, "grad_norm": 20.298192977905273, "learning_rate": 9.969494047619049e-06, "loss": 34.2815, "step": 12644 }, { "epoch": 301.0716417910448, "grad_norm": 17.51775360107422, "learning_rate": 9.96875e-06, "loss": 35.7121, "step": 12645 }, { "epoch": 301.0955223880597, "grad_norm": 15.353557586669922, "learning_rate": 9.968005952380953e-06, "loss": 33.3068, "step": 12646 }, { "epoch": 301.1194029850746, "grad_norm": 13.602364540100098, "learning_rate": 9.967261904761905e-06, "loss": 34.2159, "step": 12647 }, { "epoch": 301.14328358208957, "grad_norm": 16.48188591003418, "learning_rate": 9.966517857142858e-06, "loss": 33.9076, "step": 12648 }, { "epoch": 301.1671641791045, "grad_norm": 15.48665714263916, "learning_rate": 9.965773809523809e-06, "loss": 34.5498, "step": 12649 }, { "epoch": 301.1910447761194, "grad_norm": 14.90807819366455, "learning_rate": 9.965029761904763e-06, "loss": 33.397, "step": 12650 }, { "epoch": 301.21492537313435, "grad_norm": 15.84902286529541, "learning_rate": 9.964285714285714e-06, "loss": 34.2463, "step": 12651 }, { "epoch": 301.23880597014926, "grad_norm": 16.459049224853516, "learning_rate": 9.963541666666667e-06, "loss": 33.443, "step": 12652 }, { "epoch": 301.26268656716417, "grad_norm": 16.44605827331543, "learning_rate": 9.96279761904762e-06, "loss": 35.0376, "step": 12653 }, { "epoch": 301.28656716417913, "grad_norm": 27.001888275146484, "learning_rate": 9.962053571428573e-06, "loss": 34.9869, "step": 12654 }, { "epoch": 301.31044776119404, "grad_norm": 17.03106117248535, "learning_rate": 9.961309523809525e-06, "loss": 35.5666, "step": 12655 }, { "epoch": 301.33432835820895, "grad_norm": 19.105209350585938, "learning_rate": 9.960565476190476e-06, "loss": 33.8211, "step": 12656 }, { "epoch": 301.35820895522386, "grad_norm": 24.32528305053711, "learning_rate": 9.959821428571429e-06, "loss": 33.7321, "step": 12657 }, { "epoch": 301.3820895522388, "grad_norm": 14.261208534240723, "learning_rate": 9.959077380952382e-06, "loss": 33.4276, "step": 12658 }, { "epoch": 301.40597014925373, "grad_norm": 13.748629570007324, "learning_rate": 9.958333333333334e-06, "loss": 33.5701, "step": 12659 }, { "epoch": 301.42985074626864, "grad_norm": 18.491018295288086, "learning_rate": 9.957589285714287e-06, "loss": 32.7185, "step": 12660 }, { "epoch": 301.4537313432836, "grad_norm": 17.87461280822754, "learning_rate": 9.956845238095238e-06, "loss": 33.5648, "step": 12661 }, { "epoch": 301.4776119402985, "grad_norm": 19.160520553588867, "learning_rate": 9.956101190476192e-06, "loss": 34.6172, "step": 12662 }, { "epoch": 301.5014925373134, "grad_norm": 13.538578987121582, "learning_rate": 9.955357142857143e-06, "loss": 33.9981, "step": 12663 }, { "epoch": 301.52537313432833, "grad_norm": 20.332965850830078, "learning_rate": 9.954613095238096e-06, "loss": 34.503, "step": 12664 }, { "epoch": 301.5492537313433, "grad_norm": 22.29603385925293, "learning_rate": 9.953869047619049e-06, "loss": 33.5233, "step": 12665 }, { "epoch": 301.5731343283582, "grad_norm": 14.441452026367188, "learning_rate": 9.953125000000001e-06, "loss": 33.2648, "step": 12666 }, { "epoch": 301.5970149253731, "grad_norm": 16.77598762512207, "learning_rate": 9.952380952380954e-06, "loss": 33.8784, "step": 12667 }, { "epoch": 301.6208955223881, "grad_norm": NaN, "learning_rate": 9.951636904761905e-06, "loss": 45.9914, "step": 12668 }, { "epoch": 301.644776119403, "grad_norm": 18.28978729248047, "learning_rate": 9.951636904761905e-06, "loss": 34.5955, "step": 12669 }, { "epoch": 301.6686567164179, "grad_norm": 20.85295867919922, "learning_rate": 9.950892857142858e-06, "loss": 33.6025, "step": 12670 }, { "epoch": 301.6925373134328, "grad_norm": 15.833793640136719, "learning_rate": 9.95014880952381e-06, "loss": 34.9255, "step": 12671 }, { "epoch": 301.7164179104478, "grad_norm": 25.031152725219727, "learning_rate": 9.949404761904763e-06, "loss": 35.3748, "step": 12672 }, { "epoch": 301.7402985074627, "grad_norm": 18.836427688598633, "learning_rate": 9.948660714285716e-06, "loss": 32.7802, "step": 12673 }, { "epoch": 301.7641791044776, "grad_norm": 18.327163696289062, "learning_rate": 9.947916666666667e-06, "loss": 33.6246, "step": 12674 }, { "epoch": 301.78805970149256, "grad_norm": 24.15089988708496, "learning_rate": 9.947172619047621e-06, "loss": 34.5693, "step": 12675 }, { "epoch": 301.81194029850747, "grad_norm": 16.35633087158203, "learning_rate": 9.946428571428572e-06, "loss": 34.2978, "step": 12676 }, { "epoch": 301.8358208955224, "grad_norm": 20.777875900268555, "learning_rate": 9.945684523809525e-06, "loss": 34.7239, "step": 12677 }, { "epoch": 301.85970149253734, "grad_norm": 21.883655548095703, "learning_rate": 9.944940476190476e-06, "loss": 33.3973, "step": 12678 }, { "epoch": 301.88358208955225, "grad_norm": 19.19086265563965, "learning_rate": 9.94419642857143e-06, "loss": 33.5899, "step": 12679 }, { "epoch": 301.90746268656716, "grad_norm": 16.247377395629883, "learning_rate": 9.943452380952381e-06, "loss": 34.0119, "step": 12680 }, { "epoch": 301.93134328358207, "grad_norm": 25.994731903076172, "learning_rate": 9.942708333333334e-06, "loss": 34.4688, "step": 12681 }, { "epoch": 301.95522388059703, "grad_norm": 18.13869857788086, "learning_rate": 9.941964285714287e-06, "loss": 34.0545, "step": 12682 }, { "epoch": 301.97910447761194, "grad_norm": 20.08393096923828, "learning_rate": 9.94122023809524e-06, "loss": 33.2465, "step": 12683 }, { "epoch": 302.0, "grad_norm": 19.09702491760254, "learning_rate": 9.940476190476192e-06, "loss": 30.7813, "step": 12684 }, { "epoch": 302.0238805970149, "grad_norm": 19.38040542602539, "learning_rate": 9.939732142857143e-06, "loss": 34.5853, "step": 12685 }, { "epoch": 302.0477611940299, "grad_norm": 17.767541885375977, "learning_rate": 9.938988095238096e-06, "loss": 34.1928, "step": 12686 }, { "epoch": 302.0716417910448, "grad_norm": 26.805606842041016, "learning_rate": 9.938244047619048e-06, "loss": 33.806, "step": 12687 }, { "epoch": 302.0955223880597, "grad_norm": 15.128178596496582, "learning_rate": 9.937500000000001e-06, "loss": 33.5924, "step": 12688 }, { "epoch": 302.1194029850746, "grad_norm": 24.914600372314453, "learning_rate": 9.936755952380954e-06, "loss": 33.5394, "step": 12689 }, { "epoch": 302.14328358208957, "grad_norm": 19.96113395690918, "learning_rate": 9.936011904761905e-06, "loss": 34.0537, "step": 12690 }, { "epoch": 302.1671641791045, "grad_norm": 17.563623428344727, "learning_rate": 9.935267857142859e-06, "loss": 34.6207, "step": 12691 }, { "epoch": 302.1910447761194, "grad_norm": 28.396629333496094, "learning_rate": 9.93452380952381e-06, "loss": 34.8099, "step": 12692 }, { "epoch": 302.21492537313435, "grad_norm": 16.51083755493164, "learning_rate": 9.933779761904763e-06, "loss": 34.0849, "step": 12693 }, { "epoch": 302.23880597014926, "grad_norm": 27.13544273376465, "learning_rate": 9.933035714285715e-06, "loss": 34.1322, "step": 12694 }, { "epoch": 302.26268656716417, "grad_norm": 19.99069595336914, "learning_rate": 9.932291666666668e-06, "loss": 33.3611, "step": 12695 }, { "epoch": 302.28656716417913, "grad_norm": 21.026384353637695, "learning_rate": 9.93154761904762e-06, "loss": 34.6265, "step": 12696 }, { "epoch": 302.31044776119404, "grad_norm": 21.092233657836914, "learning_rate": 9.930803571428572e-06, "loss": 33.2195, "step": 12697 }, { "epoch": 302.33432835820895, "grad_norm": 18.910707473754883, "learning_rate": 9.930059523809524e-06, "loss": 34.1341, "step": 12698 }, { "epoch": 302.35820895522386, "grad_norm": 16.45378875732422, "learning_rate": 9.929315476190477e-06, "loss": 32.7027, "step": 12699 }, { "epoch": 302.3820895522388, "grad_norm": 22.312129974365234, "learning_rate": 9.92857142857143e-06, "loss": 35.3579, "step": 12700 }, { "epoch": 302.40597014925373, "grad_norm": 16.72593879699707, "learning_rate": 9.927827380952383e-06, "loss": 32.2487, "step": 12701 }, { "epoch": 302.42985074626864, "grad_norm": 17.502164840698242, "learning_rate": 9.927083333333334e-06, "loss": 34.8174, "step": 12702 }, { "epoch": 302.4537313432836, "grad_norm": 18.55446434020996, "learning_rate": 9.926339285714288e-06, "loss": 33.5066, "step": 12703 }, { "epoch": 302.4776119402985, "grad_norm": 14.626564979553223, "learning_rate": 9.925595238095239e-06, "loss": 33.6174, "step": 12704 }, { "epoch": 302.5014925373134, "grad_norm": 20.662425994873047, "learning_rate": 9.924851190476192e-06, "loss": 33.3769, "step": 12705 }, { "epoch": 302.52537313432833, "grad_norm": 17.78053092956543, "learning_rate": 9.924107142857143e-06, "loss": 35.3668, "step": 12706 }, { "epoch": 302.5492537313433, "grad_norm": 19.1500186920166, "learning_rate": 9.923363095238097e-06, "loss": 33.919, "step": 12707 }, { "epoch": 302.5731343283582, "grad_norm": 17.39780616760254, "learning_rate": 9.922619047619048e-06, "loss": 33.9475, "step": 12708 }, { "epoch": 302.5970149253731, "grad_norm": 15.932716369628906, "learning_rate": 9.921875e-06, "loss": 33.6818, "step": 12709 }, { "epoch": 302.6208955223881, "grad_norm": 13.43214225769043, "learning_rate": 9.921130952380953e-06, "loss": 35.5086, "step": 12710 }, { "epoch": 302.644776119403, "grad_norm": 18.646276473999023, "learning_rate": 9.920386904761904e-06, "loss": 33.3648, "step": 12711 }, { "epoch": 302.6686567164179, "grad_norm": 17.119834899902344, "learning_rate": 9.919642857142859e-06, "loss": 35.7617, "step": 12712 }, { "epoch": 302.6925373134328, "grad_norm": 17.717750549316406, "learning_rate": 9.91889880952381e-06, "loss": 33.1063, "step": 12713 }, { "epoch": 302.7164179104478, "grad_norm": 17.798465728759766, "learning_rate": 9.918154761904762e-06, "loss": 34.2558, "step": 12714 }, { "epoch": 302.7402985074627, "grad_norm": 15.104628562927246, "learning_rate": 9.917410714285715e-06, "loss": 32.9761, "step": 12715 }, { "epoch": 302.7641791044776, "grad_norm": 15.719858169555664, "learning_rate": 9.916666666666668e-06, "loss": 34.086, "step": 12716 }, { "epoch": 302.78805970149256, "grad_norm": 15.24967098236084, "learning_rate": 9.91592261904762e-06, "loss": 34.8758, "step": 12717 }, { "epoch": 302.81194029850747, "grad_norm": 14.304805755615234, "learning_rate": 9.915178571428571e-06, "loss": 33.4391, "step": 12718 }, { "epoch": 302.8358208955224, "grad_norm": 15.708247184753418, "learning_rate": 9.914434523809524e-06, "loss": 33.8727, "step": 12719 }, { "epoch": 302.85970149253734, "grad_norm": 17.25934410095215, "learning_rate": 9.913690476190477e-06, "loss": 33.8216, "step": 12720 }, { "epoch": 302.88358208955225, "grad_norm": 21.16497039794922, "learning_rate": 9.91294642857143e-06, "loss": 34.6096, "step": 12721 }, { "epoch": 302.90746268656716, "grad_norm": 15.628374099731445, "learning_rate": 9.912202380952382e-06, "loss": 34.5423, "step": 12722 }, { "epoch": 302.93134328358207, "grad_norm": 15.003438949584961, "learning_rate": 9.911458333333333e-06, "loss": 33.9223, "step": 12723 }, { "epoch": 302.95522388059703, "grad_norm": 16.933246612548828, "learning_rate": 9.910714285714288e-06, "loss": 33.9144, "step": 12724 }, { "epoch": 302.97910447761194, "grad_norm": 15.308454513549805, "learning_rate": 9.909970238095238e-06, "loss": 34.005, "step": 12725 }, { "epoch": 303.0, "grad_norm": 15.518234252929688, "learning_rate": 9.909226190476191e-06, "loss": 29.1557, "step": 12726 }, { "epoch": 303.0238805970149, "grad_norm": 14.188685417175293, "learning_rate": 9.908482142857144e-06, "loss": 35.1923, "step": 12727 }, { "epoch": 303.0477611940299, "grad_norm": 19.43871307373047, "learning_rate": 9.907738095238097e-06, "loss": 33.6881, "step": 12728 }, { "epoch": 303.0716417910448, "grad_norm": 14.773733139038086, "learning_rate": 9.90699404761905e-06, "loss": 34.4568, "step": 12729 }, { "epoch": 303.0955223880597, "grad_norm": 18.925222396850586, "learning_rate": 9.90625e-06, "loss": 34.5105, "step": 12730 }, { "epoch": 303.1194029850746, "grad_norm": 15.766783714294434, "learning_rate": 9.905505952380953e-06, "loss": 35.4248, "step": 12731 }, { "epoch": 303.14328358208957, "grad_norm": 16.977514266967773, "learning_rate": 9.904761904761906e-06, "loss": 33.9733, "step": 12732 }, { "epoch": 303.1671641791045, "grad_norm": 17.148256301879883, "learning_rate": 9.904017857142858e-06, "loss": 32.9881, "step": 12733 }, { "epoch": 303.1910447761194, "grad_norm": 16.359018325805664, "learning_rate": 9.90327380952381e-06, "loss": 34.7802, "step": 12734 }, { "epoch": 303.21492537313435, "grad_norm": 17.943899154663086, "learning_rate": 9.902529761904762e-06, "loss": 34.0458, "step": 12735 }, { "epoch": 303.23880597014926, "grad_norm": 19.585962295532227, "learning_rate": 9.901785714285715e-06, "loss": 34.242, "step": 12736 }, { "epoch": 303.26268656716417, "grad_norm": 21.159753799438477, "learning_rate": 9.901041666666667e-06, "loss": 34.6812, "step": 12737 }, { "epoch": 303.28656716417913, "grad_norm": 13.98138427734375, "learning_rate": 9.90029761904762e-06, "loss": 33.2287, "step": 12738 }, { "epoch": 303.31044776119404, "grad_norm": 18.36336326599121, "learning_rate": 9.899553571428571e-06, "loss": 33.3087, "step": 12739 }, { "epoch": 303.33432835820895, "grad_norm": 15.030476570129395, "learning_rate": 9.898809523809525e-06, "loss": 35.2234, "step": 12740 }, { "epoch": 303.35820895522386, "grad_norm": 17.081613540649414, "learning_rate": 9.898065476190476e-06, "loss": 33.2825, "step": 12741 }, { "epoch": 303.3820895522388, "grad_norm": 17.4920654296875, "learning_rate": 9.897321428571429e-06, "loss": 33.3747, "step": 12742 }, { "epoch": 303.40597014925373, "grad_norm": 16.359024047851562, "learning_rate": 9.896577380952382e-06, "loss": 33.502, "step": 12743 }, { "epoch": 303.42985074626864, "grad_norm": 24.50620460510254, "learning_rate": 9.895833333333334e-06, "loss": 33.6481, "step": 12744 }, { "epoch": 303.4537313432836, "grad_norm": 14.11179256439209, "learning_rate": 9.895089285714287e-06, "loss": 33.9526, "step": 12745 }, { "epoch": 303.4776119402985, "grad_norm": 16.194307327270508, "learning_rate": 9.894345238095238e-06, "loss": 33.9687, "step": 12746 }, { "epoch": 303.5014925373134, "grad_norm": 19.30034828186035, "learning_rate": 9.89360119047619e-06, "loss": 33.1953, "step": 12747 }, { "epoch": 303.52537313432833, "grad_norm": 20.41242790222168, "learning_rate": 9.892857142857143e-06, "loss": 35.2336, "step": 12748 }, { "epoch": 303.5492537313433, "grad_norm": 19.893640518188477, "learning_rate": 9.892113095238096e-06, "loss": 33.6545, "step": 12749 }, { "epoch": 303.5731343283582, "grad_norm": 15.920687675476074, "learning_rate": 9.891369047619049e-06, "loss": 33.4267, "step": 12750 }, { "epoch": 303.5970149253731, "grad_norm": 18.475324630737305, "learning_rate": 9.890625e-06, "loss": 33.6388, "step": 12751 }, { "epoch": 303.6208955223881, "grad_norm": 17.941503524780273, "learning_rate": 9.889880952380954e-06, "loss": 33.4139, "step": 12752 }, { "epoch": 303.644776119403, "grad_norm": 19.626501083374023, "learning_rate": 9.889136904761905e-06, "loss": 33.9111, "step": 12753 }, { "epoch": 303.6686567164179, "grad_norm": 16.618045806884766, "learning_rate": 9.888392857142858e-06, "loss": 32.954, "step": 12754 }, { "epoch": 303.6925373134328, "grad_norm": 17.80182456970215, "learning_rate": 9.88764880952381e-06, "loss": 34.252, "step": 12755 }, { "epoch": 303.7164179104478, "grad_norm": 14.318121910095215, "learning_rate": 9.886904761904763e-06, "loss": 34.4709, "step": 12756 }, { "epoch": 303.7402985074627, "grad_norm": 16.709972381591797, "learning_rate": 9.886160714285716e-06, "loss": 33.7177, "step": 12757 }, { "epoch": 303.7641791044776, "grad_norm": 21.392791748046875, "learning_rate": 9.885416666666667e-06, "loss": 32.6111, "step": 12758 }, { "epoch": 303.78805970149256, "grad_norm": 19.380441665649414, "learning_rate": 9.88467261904762e-06, "loss": 34.8239, "step": 12759 }, { "epoch": 303.81194029850747, "grad_norm": 14.385751724243164, "learning_rate": 9.883928571428572e-06, "loss": 33.4982, "step": 12760 }, { "epoch": 303.8358208955224, "grad_norm": 16.864112854003906, "learning_rate": 9.883184523809525e-06, "loss": 34.4644, "step": 12761 }, { "epoch": 303.85970149253734, "grad_norm": 19.41377830505371, "learning_rate": 9.882440476190478e-06, "loss": 32.4874, "step": 12762 }, { "epoch": 303.88358208955225, "grad_norm": 18.528348922729492, "learning_rate": 9.881696428571429e-06, "loss": 34.8511, "step": 12763 }, { "epoch": 303.90746268656716, "grad_norm": 15.660223960876465, "learning_rate": 9.880952380952381e-06, "loss": 34.883, "step": 12764 }, { "epoch": 303.93134328358207, "grad_norm": 12.949760437011719, "learning_rate": 9.880208333333334e-06, "loss": 33.6117, "step": 12765 }, { "epoch": 303.95522388059703, "grad_norm": 17.378768920898438, "learning_rate": 9.879464285714287e-06, "loss": 34.1157, "step": 12766 }, { "epoch": 303.97910447761194, "grad_norm": 15.662836074829102, "learning_rate": 9.878720238095238e-06, "loss": 34.0723, "step": 12767 }, { "epoch": 304.0, "grad_norm": 19.132266998291016, "learning_rate": 9.877976190476192e-06, "loss": 30.1438, "step": 12768 }, { "epoch": 304.0238805970149, "grad_norm": 14.296299934387207, "learning_rate": 9.877232142857143e-06, "loss": 33.8153, "step": 12769 }, { "epoch": 304.0477611940299, "grad_norm": 17.48133087158203, "learning_rate": 9.876488095238096e-06, "loss": 34.5658, "step": 12770 }, { "epoch": 304.0716417910448, "grad_norm": 18.91547203063965, "learning_rate": 9.875744047619048e-06, "loss": 33.7174, "step": 12771 }, { "epoch": 304.0955223880597, "grad_norm": 19.87838363647461, "learning_rate": 9.875000000000001e-06, "loss": 33.9482, "step": 12772 }, { "epoch": 304.1194029850746, "grad_norm": 16.255050659179688, "learning_rate": 9.874255952380954e-06, "loss": 33.1052, "step": 12773 }, { "epoch": 304.14328358208957, "grad_norm": 17.426010131835938, "learning_rate": 9.873511904761905e-06, "loss": 34.3741, "step": 12774 }, { "epoch": 304.1671641791045, "grad_norm": 13.147067070007324, "learning_rate": 9.872767857142858e-06, "loss": 33.9996, "step": 12775 }, { "epoch": 304.1910447761194, "grad_norm": 25.692888259887695, "learning_rate": 9.87202380952381e-06, "loss": 34.756, "step": 12776 }, { "epoch": 304.21492537313435, "grad_norm": 18.753189086914062, "learning_rate": 9.871279761904763e-06, "loss": 32.5578, "step": 12777 }, { "epoch": 304.23880597014926, "grad_norm": 18.00774383544922, "learning_rate": 9.870535714285716e-06, "loss": 33.9436, "step": 12778 }, { "epoch": 304.26268656716417, "grad_norm": 17.329633712768555, "learning_rate": 9.869791666666667e-06, "loss": 34.6306, "step": 12779 }, { "epoch": 304.28656716417913, "grad_norm": 19.246505737304688, "learning_rate": 9.869047619047621e-06, "loss": 34.655, "step": 12780 }, { "epoch": 304.31044776119404, "grad_norm": 19.427467346191406, "learning_rate": 9.868303571428572e-06, "loss": 32.5883, "step": 12781 }, { "epoch": 304.33432835820895, "grad_norm": 20.187152862548828, "learning_rate": 9.867559523809525e-06, "loss": 33.6937, "step": 12782 }, { "epoch": 304.35820895522386, "grad_norm": 15.276482582092285, "learning_rate": 9.866815476190477e-06, "loss": 35.0563, "step": 12783 }, { "epoch": 304.3820895522388, "grad_norm": 14.907896041870117, "learning_rate": 9.86607142857143e-06, "loss": 34.5241, "step": 12784 }, { "epoch": 304.40597014925373, "grad_norm": 18.98166847229004, "learning_rate": 9.865327380952383e-06, "loss": 34.6579, "step": 12785 }, { "epoch": 304.42985074626864, "grad_norm": 19.271556854248047, "learning_rate": 9.864583333333334e-06, "loss": 33.7333, "step": 12786 }, { "epoch": 304.4537313432836, "grad_norm": 13.590328216552734, "learning_rate": 9.863839285714286e-06, "loss": 34.1618, "step": 12787 }, { "epoch": 304.4776119402985, "grad_norm": 14.266732215881348, "learning_rate": 9.863095238095239e-06, "loss": 33.2994, "step": 12788 }, { "epoch": 304.5014925373134, "grad_norm": 15.15308666229248, "learning_rate": 9.862351190476192e-06, "loss": 34.8215, "step": 12789 }, { "epoch": 304.52537313432833, "grad_norm": 18.415672302246094, "learning_rate": 9.861607142857144e-06, "loss": 33.2897, "step": 12790 }, { "epoch": 304.5492537313433, "grad_norm": 17.03144645690918, "learning_rate": 9.860863095238095e-06, "loss": 34.4784, "step": 12791 }, { "epoch": 304.5731343283582, "grad_norm": 17.239591598510742, "learning_rate": 9.860119047619048e-06, "loss": 33.5741, "step": 12792 }, { "epoch": 304.5970149253731, "grad_norm": 17.369388580322266, "learning_rate": 9.859375e-06, "loss": 33.7227, "step": 12793 }, { "epoch": 304.6208955223881, "grad_norm": 14.208517074584961, "learning_rate": 9.858630952380953e-06, "loss": 34.1275, "step": 12794 }, { "epoch": 304.644776119403, "grad_norm": 22.755041122436523, "learning_rate": 9.857886904761904e-06, "loss": 34.2113, "step": 12795 }, { "epoch": 304.6686567164179, "grad_norm": 17.733745574951172, "learning_rate": 9.857142857142859e-06, "loss": 34.4321, "step": 12796 }, { "epoch": 304.6925373134328, "grad_norm": 17.67374610900879, "learning_rate": 9.85639880952381e-06, "loss": 34.5426, "step": 12797 }, { "epoch": 304.7164179104478, "grad_norm": 14.477204322814941, "learning_rate": 9.855654761904763e-06, "loss": 34.2791, "step": 12798 }, { "epoch": 304.7402985074627, "grad_norm": 16.99796485900879, "learning_rate": 9.854910714285715e-06, "loss": 34.0483, "step": 12799 }, { "epoch": 304.7641791044776, "grad_norm": 13.359634399414062, "learning_rate": 9.854166666666668e-06, "loss": 33.8718, "step": 12800 }, { "epoch": 304.78805970149256, "grad_norm": 14.59427547454834, "learning_rate": 9.85342261904762e-06, "loss": 33.2423, "step": 12801 }, { "epoch": 304.81194029850747, "grad_norm": 19.618736267089844, "learning_rate": 9.852678571428572e-06, "loss": 33.9496, "step": 12802 }, { "epoch": 304.8358208955224, "grad_norm": 23.237140655517578, "learning_rate": 9.851934523809524e-06, "loss": 33.6882, "step": 12803 }, { "epoch": 304.85970149253734, "grad_norm": 15.610248565673828, "learning_rate": 9.851190476190477e-06, "loss": 34.0138, "step": 12804 }, { "epoch": 304.88358208955225, "grad_norm": 15.84520149230957, "learning_rate": 9.85044642857143e-06, "loss": 34.5489, "step": 12805 }, { "epoch": 304.90746268656716, "grad_norm": 22.97906494140625, "learning_rate": 9.849702380952382e-06, "loss": 32.1467, "step": 12806 }, { "epoch": 304.93134328358207, "grad_norm": 19.967920303344727, "learning_rate": 9.848958333333333e-06, "loss": 32.5344, "step": 12807 }, { "epoch": 304.95522388059703, "grad_norm": 15.402724266052246, "learning_rate": 9.848214285714288e-06, "loss": 33.953, "step": 12808 }, { "epoch": 304.97910447761194, "grad_norm": 16.7278995513916, "learning_rate": 9.847470238095239e-06, "loss": 35.0912, "step": 12809 }, { "epoch": 305.0, "grad_norm": 12.981561660766602, "learning_rate": 9.846726190476191e-06, "loss": 29.2283, "step": 12810 }, { "epoch": 305.0238805970149, "grad_norm": 19.851594924926758, "learning_rate": 9.845982142857144e-06, "loss": 33.1445, "step": 12811 }, { "epoch": 305.0477611940299, "grad_norm": 19.49011993408203, "learning_rate": 9.845238095238097e-06, "loss": 34.982, "step": 12812 }, { "epoch": 305.0716417910448, "grad_norm": 25.895421981811523, "learning_rate": 9.84449404761905e-06, "loss": 32.9737, "step": 12813 }, { "epoch": 305.0955223880597, "grad_norm": 17.449796676635742, "learning_rate": 9.84375e-06, "loss": 33.482, "step": 12814 }, { "epoch": 305.1194029850746, "grad_norm": 24.824426651000977, "learning_rate": 9.843005952380953e-06, "loss": 33.8339, "step": 12815 }, { "epoch": 305.14328358208957, "grad_norm": 24.20445442199707, "learning_rate": 9.842261904761906e-06, "loss": 32.8542, "step": 12816 }, { "epoch": 305.1671641791045, "grad_norm": 18.673444747924805, "learning_rate": 9.841517857142858e-06, "loss": 34.1516, "step": 12817 }, { "epoch": 305.1910447761194, "grad_norm": 31.55808448791504, "learning_rate": 9.840773809523811e-06, "loss": 34.3214, "step": 12818 }, { "epoch": 305.21492537313435, "grad_norm": 20.21152114868164, "learning_rate": 9.840029761904762e-06, "loss": 33.559, "step": 12819 }, { "epoch": 305.23880597014926, "grad_norm": 39.6590576171875, "learning_rate": 9.839285714285715e-06, "loss": 34.5408, "step": 12820 }, { "epoch": 305.26268656716417, "grad_norm": 29.15034294128418, "learning_rate": 9.838541666666668e-06, "loss": 33.7678, "step": 12821 }, { "epoch": 305.28656716417913, "grad_norm": 39.511295318603516, "learning_rate": 9.83779761904762e-06, "loss": 34.1371, "step": 12822 }, { "epoch": 305.31044776119404, "grad_norm": 28.5534725189209, "learning_rate": 9.837053571428571e-06, "loss": 34.1853, "step": 12823 }, { "epoch": 305.33432835820895, "grad_norm": 42.570369720458984, "learning_rate": 9.836309523809524e-06, "loss": 34.2188, "step": 12824 }, { "epoch": 305.35820895522386, "grad_norm": 37.27431869506836, "learning_rate": 9.835565476190477e-06, "loss": 34.4188, "step": 12825 }, { "epoch": 305.3820895522388, "grad_norm": 31.43735694885254, "learning_rate": 9.83482142857143e-06, "loss": 33.9932, "step": 12826 }, { "epoch": 305.40597014925373, "grad_norm": 30.48042106628418, "learning_rate": 9.834077380952382e-06, "loss": 32.4454, "step": 12827 }, { "epoch": 305.42985074626864, "grad_norm": 28.137269973754883, "learning_rate": 9.833333333333333e-06, "loss": 34.293, "step": 12828 }, { "epoch": 305.4537313432836, "grad_norm": 24.35393524169922, "learning_rate": 9.832589285714287e-06, "loss": 34.0471, "step": 12829 }, { "epoch": 305.4776119402985, "grad_norm": 39.596553802490234, "learning_rate": 9.831845238095238e-06, "loss": 33.3478, "step": 12830 }, { "epoch": 305.5014925373134, "grad_norm": 33.761383056640625, "learning_rate": 9.831101190476191e-06, "loss": 34.2828, "step": 12831 }, { "epoch": 305.52537313432833, "grad_norm": 33.55476760864258, "learning_rate": 9.830357142857144e-06, "loss": 34.1581, "step": 12832 }, { "epoch": 305.5492537313433, "grad_norm": 30.091413497924805, "learning_rate": 9.829613095238096e-06, "loss": 35.2272, "step": 12833 }, { "epoch": 305.5731343283582, "grad_norm": 30.065752029418945, "learning_rate": 9.828869047619049e-06, "loss": 33.5706, "step": 12834 }, { "epoch": 305.5970149253731, "grad_norm": 28.13954734802246, "learning_rate": 9.828125e-06, "loss": 34.4559, "step": 12835 }, { "epoch": 305.6208955223881, "grad_norm": 33.12614822387695, "learning_rate": 9.827380952380953e-06, "loss": 33.4614, "step": 12836 }, { "epoch": 305.644776119403, "grad_norm": 29.598318099975586, "learning_rate": 9.826636904761905e-06, "loss": 34.0791, "step": 12837 }, { "epoch": 305.6686567164179, "grad_norm": 35.32413101196289, "learning_rate": 9.825892857142858e-06, "loss": 34.2338, "step": 12838 }, { "epoch": 305.6925373134328, "grad_norm": 33.113548278808594, "learning_rate": 9.82514880952381e-06, "loss": 34.2677, "step": 12839 }, { "epoch": 305.7164179104478, "grad_norm": 27.92148780822754, "learning_rate": 9.824404761904762e-06, "loss": 33.1426, "step": 12840 }, { "epoch": 305.7402985074627, "grad_norm": 26.550376892089844, "learning_rate": 9.823660714285716e-06, "loss": 33.0227, "step": 12841 }, { "epoch": 305.7641791044776, "grad_norm": 29.040504455566406, "learning_rate": 9.822916666666667e-06, "loss": 34.9542, "step": 12842 }, { "epoch": 305.78805970149256, "grad_norm": 25.413545608520508, "learning_rate": 9.82217261904762e-06, "loss": 33.7384, "step": 12843 }, { "epoch": 305.81194029850747, "grad_norm": 36.03944778442383, "learning_rate": 9.821428571428573e-06, "loss": 32.7667, "step": 12844 }, { "epoch": 305.8358208955224, "grad_norm": 31.59793472290039, "learning_rate": 9.820684523809525e-06, "loss": 34.1801, "step": 12845 }, { "epoch": 305.85970149253734, "grad_norm": 32.00392532348633, "learning_rate": 9.819940476190478e-06, "loss": 34.7444, "step": 12846 }, { "epoch": 305.88358208955225, "grad_norm": 29.623685836791992, "learning_rate": 9.819196428571429e-06, "loss": 33.2053, "step": 12847 }, { "epoch": 305.90746268656716, "grad_norm": 26.65155601501465, "learning_rate": 9.818452380952382e-06, "loss": 33.0502, "step": 12848 }, { "epoch": 305.93134328358207, "grad_norm": 26.55497932434082, "learning_rate": 9.817708333333334e-06, "loss": 34.7496, "step": 12849 }, { "epoch": 305.95522388059703, "grad_norm": 34.47642135620117, "learning_rate": 9.816964285714287e-06, "loss": 34.3054, "step": 12850 }, { "epoch": 305.97910447761194, "grad_norm": 26.32135772705078, "learning_rate": 9.816220238095238e-06, "loss": 34.1304, "step": 12851 }, { "epoch": 306.0, "grad_norm": 29.453325271606445, "learning_rate": 9.81547619047619e-06, "loss": 29.8504, "step": 12852 }, { "epoch": 306.0238805970149, "grad_norm": 31.310495376586914, "learning_rate": 9.814732142857143e-06, "loss": 34.8228, "step": 12853 }, { "epoch": 306.0477611940299, "grad_norm": 29.23021697998047, "learning_rate": 9.813988095238096e-06, "loss": 34.135, "step": 12854 }, { "epoch": 306.0716417910448, "grad_norm": 28.847148895263672, "learning_rate": 9.813244047619049e-06, "loss": 34.2013, "step": 12855 }, { "epoch": 306.0955223880597, "grad_norm": 32.356266021728516, "learning_rate": 9.8125e-06, "loss": 35.2559, "step": 12856 }, { "epoch": 306.1194029850746, "grad_norm": 27.931236267089844, "learning_rate": 9.811755952380954e-06, "loss": 33.5151, "step": 12857 }, { "epoch": 306.14328358208957, "grad_norm": 28.33109474182129, "learning_rate": 9.811011904761905e-06, "loss": 33.693, "step": 12858 }, { "epoch": 306.1671641791045, "grad_norm": 30.227746963500977, "learning_rate": 9.810267857142858e-06, "loss": 34.2395, "step": 12859 }, { "epoch": 306.1910447761194, "grad_norm": 30.9976749420166, "learning_rate": 9.80952380952381e-06, "loss": 35.1999, "step": 12860 }, { "epoch": 306.21492537313435, "grad_norm": 25.662109375, "learning_rate": 9.808779761904763e-06, "loss": 34.5391, "step": 12861 }, { "epoch": 306.23880597014926, "grad_norm": 31.476734161376953, "learning_rate": 9.808035714285716e-06, "loss": 34.0768, "step": 12862 }, { "epoch": 306.26268656716417, "grad_norm": 27.349348068237305, "learning_rate": 9.807291666666667e-06, "loss": 34.3984, "step": 12863 }, { "epoch": 306.28656716417913, "grad_norm": 32.680023193359375, "learning_rate": 9.80654761904762e-06, "loss": 34.6098, "step": 12864 }, { "epoch": 306.31044776119404, "grad_norm": 28.24233627319336, "learning_rate": 9.805803571428572e-06, "loss": 34.8734, "step": 12865 }, { "epoch": 306.33432835820895, "grad_norm": 29.77298927307129, "learning_rate": 9.805059523809525e-06, "loss": 32.5477, "step": 12866 }, { "epoch": 306.35820895522386, "grad_norm": 29.427587509155273, "learning_rate": 9.804315476190477e-06, "loss": 33.5228, "step": 12867 }, { "epoch": 306.3820895522388, "grad_norm": 30.593961715698242, "learning_rate": 9.803571428571428e-06, "loss": 34.325, "step": 12868 }, { "epoch": 306.40597014925373, "grad_norm": 28.113536834716797, "learning_rate": 9.802827380952383e-06, "loss": 32.7251, "step": 12869 }, { "epoch": 306.42985074626864, "grad_norm": 30.79559326171875, "learning_rate": 9.802083333333334e-06, "loss": 33.5964, "step": 12870 }, { "epoch": 306.4537313432836, "grad_norm": 29.22075653076172, "learning_rate": 9.801339285714287e-06, "loss": 32.6929, "step": 12871 }, { "epoch": 306.4776119402985, "grad_norm": 29.520912170410156, "learning_rate": 9.80059523809524e-06, "loss": 33.0633, "step": 12872 }, { "epoch": 306.5014925373134, "grad_norm": 27.607057571411133, "learning_rate": 9.799851190476192e-06, "loss": 35.2869, "step": 12873 }, { "epoch": 306.52537313432833, "grad_norm": 31.058963775634766, "learning_rate": 9.799107142857145e-06, "loss": 33.4048, "step": 12874 }, { "epoch": 306.5492537313433, "grad_norm": 26.349206924438477, "learning_rate": 9.798363095238096e-06, "loss": 33.7153, "step": 12875 }, { "epoch": 306.5731343283582, "grad_norm": 29.270294189453125, "learning_rate": 9.797619047619048e-06, "loss": 34.4316, "step": 12876 }, { "epoch": 306.5970149253731, "grad_norm": 25.742521286010742, "learning_rate": 9.796875000000001e-06, "loss": 33.7534, "step": 12877 }, { "epoch": 306.6208955223881, "grad_norm": 32.86083221435547, "learning_rate": 9.796130952380954e-06, "loss": 33.7219, "step": 12878 }, { "epoch": 306.644776119403, "grad_norm": 30.2369384765625, "learning_rate": 9.795386904761905e-06, "loss": 32.5957, "step": 12879 }, { "epoch": 306.6686567164179, "grad_norm": 30.621706008911133, "learning_rate": 9.794642857142857e-06, "loss": 33.9441, "step": 12880 }, { "epoch": 306.6925373134328, "grad_norm": 26.510602951049805, "learning_rate": 9.79389880952381e-06, "loss": 33.5628, "step": 12881 }, { "epoch": 306.7164179104478, "grad_norm": 28.74705696105957, "learning_rate": 9.793154761904763e-06, "loss": 33.8791, "step": 12882 }, { "epoch": 306.7402985074627, "grad_norm": 26.187252044677734, "learning_rate": 9.792410714285715e-06, "loss": 33.6656, "step": 12883 }, { "epoch": 306.7641791044776, "grad_norm": 30.24416160583496, "learning_rate": 9.791666666666666e-06, "loss": 33.413, "step": 12884 }, { "epoch": 306.78805970149256, "grad_norm": 27.702054977416992, "learning_rate": 9.79092261904762e-06, "loss": 33.824, "step": 12885 }, { "epoch": 306.81194029850747, "grad_norm": 29.828664779663086, "learning_rate": 9.790178571428572e-06, "loss": 33.0718, "step": 12886 }, { "epoch": 306.8358208955224, "grad_norm": 29.822650909423828, "learning_rate": 9.789434523809524e-06, "loss": 33.8282, "step": 12887 }, { "epoch": 306.85970149253734, "grad_norm": 31.442548751831055, "learning_rate": 9.788690476190477e-06, "loss": 33.5222, "step": 12888 }, { "epoch": 306.88358208955225, "grad_norm": 28.49906349182129, "learning_rate": 9.78794642857143e-06, "loss": 32.341, "step": 12889 }, { "epoch": 306.90746268656716, "grad_norm": 31.756093978881836, "learning_rate": 9.787202380952382e-06, "loss": 33.9306, "step": 12890 }, { "epoch": 306.93134328358207, "grad_norm": 28.60268211364746, "learning_rate": 9.786458333333333e-06, "loss": 34.6159, "step": 12891 }, { "epoch": 306.95522388059703, "grad_norm": 27.625715255737305, "learning_rate": 9.785714285714286e-06, "loss": 33.7364, "step": 12892 }, { "epoch": 306.97910447761194, "grad_norm": 27.08561897277832, "learning_rate": 9.784970238095239e-06, "loss": 34.2786, "step": 12893 }, { "epoch": 307.0, "grad_norm": 26.741044998168945, "learning_rate": 9.784226190476192e-06, "loss": 29.7322, "step": 12894 }, { "epoch": 307.0238805970149, "grad_norm": 29.69178581237793, "learning_rate": 9.783482142857144e-06, "loss": 33.1324, "step": 12895 }, { "epoch": 307.0477611940299, "grad_norm": 30.632442474365234, "learning_rate": 9.782738095238095e-06, "loss": 32.5834, "step": 12896 }, { "epoch": 307.0716417910448, "grad_norm": 28.92683982849121, "learning_rate": 9.78199404761905e-06, "loss": 34.3942, "step": 12897 }, { "epoch": 307.0955223880597, "grad_norm": 27.258102416992188, "learning_rate": 9.78125e-06, "loss": 34.2923, "step": 12898 }, { "epoch": 307.1194029850746, "grad_norm": 22.587934494018555, "learning_rate": 9.780505952380953e-06, "loss": 34.0427, "step": 12899 }, { "epoch": 307.14328358208957, "grad_norm": 29.33913803100586, "learning_rate": 9.779761904761906e-06, "loss": 34.9919, "step": 12900 }, { "epoch": 307.1671641791045, "grad_norm": 25.18365478515625, "learning_rate": 9.779017857142859e-06, "loss": 34.6795, "step": 12901 }, { "epoch": 307.1910447761194, "grad_norm": 31.733814239501953, "learning_rate": 9.778273809523811e-06, "loss": 33.973, "step": 12902 }, { "epoch": 307.21492537313435, "grad_norm": 28.06524085998535, "learning_rate": 9.777529761904762e-06, "loss": 34.0458, "step": 12903 }, { "epoch": 307.23880597014926, "grad_norm": 28.391054153442383, "learning_rate": 9.776785714285715e-06, "loss": 33.2156, "step": 12904 }, { "epoch": 307.26268656716417, "grad_norm": 26.460420608520508, "learning_rate": 9.776041666666668e-06, "loss": 33.1812, "step": 12905 }, { "epoch": 307.28656716417913, "grad_norm": 24.927038192749023, "learning_rate": 9.77529761904762e-06, "loss": 33.4322, "step": 12906 }, { "epoch": 307.31044776119404, "grad_norm": 24.9912052154541, "learning_rate": 9.774553571428571e-06, "loss": 34.2864, "step": 12907 }, { "epoch": 307.33432835820895, "grad_norm": 27.373680114746094, "learning_rate": 9.773809523809524e-06, "loss": 34.2386, "step": 12908 }, { "epoch": 307.35820895522386, "grad_norm": 21.200687408447266, "learning_rate": 9.773065476190477e-06, "loss": 35.2577, "step": 12909 }, { "epoch": 307.3820895522388, "grad_norm": 26.07646369934082, "learning_rate": 9.77232142857143e-06, "loss": 33.0107, "step": 12910 }, { "epoch": 307.40597014925373, "grad_norm": 18.282670974731445, "learning_rate": 9.771577380952382e-06, "loss": 32.8028, "step": 12911 }, { "epoch": 307.42985074626864, "grad_norm": 27.18486213684082, "learning_rate": 9.770833333333333e-06, "loss": 32.7475, "step": 12912 }, { "epoch": 307.4537313432836, "grad_norm": 24.014129638671875, "learning_rate": 9.770089285714287e-06, "loss": 34.0267, "step": 12913 }, { "epoch": 307.4776119402985, "grad_norm": 25.076383590698242, "learning_rate": 9.769345238095238e-06, "loss": 33.6766, "step": 12914 }, { "epoch": 307.5014925373134, "grad_norm": 23.164873123168945, "learning_rate": 9.768601190476191e-06, "loss": 34.2511, "step": 12915 }, { "epoch": 307.52537313432833, "grad_norm": 24.044761657714844, "learning_rate": 9.767857142857144e-06, "loss": 34.5234, "step": 12916 }, { "epoch": 307.5492537313433, "grad_norm": 21.319169998168945, "learning_rate": 9.767113095238097e-06, "loss": 33.1081, "step": 12917 }, { "epoch": 307.5731343283582, "grad_norm": 20.085777282714844, "learning_rate": 9.76636904761905e-06, "loss": 33.9557, "step": 12918 }, { "epoch": 307.5970149253731, "grad_norm": 18.691049575805664, "learning_rate": 9.765625e-06, "loss": 33.961, "step": 12919 }, { "epoch": 307.6208955223881, "grad_norm": 22.317243576049805, "learning_rate": 9.764880952380953e-06, "loss": 33.8924, "step": 12920 }, { "epoch": 307.644776119403, "grad_norm": 17.514055252075195, "learning_rate": 9.764136904761906e-06, "loss": 33.7417, "step": 12921 }, { "epoch": 307.6686567164179, "grad_norm": 23.589597702026367, "learning_rate": 9.763392857142858e-06, "loss": 33.7327, "step": 12922 }, { "epoch": 307.6925373134328, "grad_norm": 20.747957229614258, "learning_rate": 9.762648809523811e-06, "loss": 34.4243, "step": 12923 }, { "epoch": 307.7164179104478, "grad_norm": 20.31751251220703, "learning_rate": 9.761904761904762e-06, "loss": 34.9525, "step": 12924 }, { "epoch": 307.7402985074627, "grad_norm": 22.384435653686523, "learning_rate": 9.761160714285715e-06, "loss": 33.6509, "step": 12925 }, { "epoch": 307.7641791044776, "grad_norm": 19.30194664001465, "learning_rate": 9.760416666666667e-06, "loss": 34.2968, "step": 12926 }, { "epoch": 307.78805970149256, "grad_norm": 21.226215362548828, "learning_rate": 9.75967261904762e-06, "loss": 34.3109, "step": 12927 }, { "epoch": 307.81194029850747, "grad_norm": 20.73770523071289, "learning_rate": 9.758928571428573e-06, "loss": 32.4683, "step": 12928 }, { "epoch": 307.8358208955224, "grad_norm": NaN, "learning_rate": 9.758184523809524e-06, "loss": 43.1886, "step": 12929 }, { "epoch": 307.85970149253734, "grad_norm": 20.99942398071289, "learning_rate": 9.758184523809524e-06, "loss": 34.5113, "step": 12930 }, { "epoch": 307.88358208955225, "grad_norm": 16.915035247802734, "learning_rate": 9.757440476190478e-06, "loss": 33.5386, "step": 12931 }, { "epoch": 307.90746268656716, "grad_norm": 22.421545028686523, "learning_rate": 9.756696428571429e-06, "loss": 33.0041, "step": 12932 }, { "epoch": 307.93134328358207, "grad_norm": 18.106794357299805, "learning_rate": 9.755952380952382e-06, "loss": 33.063, "step": 12933 }, { "epoch": 307.95522388059703, "grad_norm": 19.077604293823242, "learning_rate": 9.755208333333334e-06, "loss": 33.5393, "step": 12934 }, { "epoch": 307.97910447761194, "grad_norm": 20.95818328857422, "learning_rate": 9.754464285714287e-06, "loss": 34.7901, "step": 12935 }, { "epoch": 308.0, "grad_norm": 17.7838191986084, "learning_rate": 9.753720238095238e-06, "loss": 28.7349, "step": 12936 }, { "epoch": 308.0238805970149, "grad_norm": 16.09705924987793, "learning_rate": 9.75297619047619e-06, "loss": 34.0701, "step": 12937 }, { "epoch": 308.0477611940299, "grad_norm": 23.41761589050293, "learning_rate": 9.752232142857143e-06, "loss": 34.0079, "step": 12938 }, { "epoch": 308.0716417910448, "grad_norm": 17.222984313964844, "learning_rate": 9.751488095238096e-06, "loss": 32.2955, "step": 12939 }, { "epoch": 308.0955223880597, "grad_norm": 19.617464065551758, "learning_rate": 9.750744047619049e-06, "loss": 33.196, "step": 12940 }, { "epoch": 308.1194029850746, "grad_norm": 17.333797454833984, "learning_rate": 9.75e-06, "loss": 33.097, "step": 12941 }, { "epoch": 308.14328358208957, "grad_norm": 20.236370086669922, "learning_rate": 9.749255952380953e-06, "loss": 34.5801, "step": 12942 }, { "epoch": 308.1671641791045, "grad_norm": 15.978302955627441, "learning_rate": 9.748511904761905e-06, "loss": 33.2588, "step": 12943 }, { "epoch": 308.1910447761194, "grad_norm": 18.41038703918457, "learning_rate": 9.747767857142858e-06, "loss": 33.9537, "step": 12944 }, { "epoch": 308.21492537313435, "grad_norm": 15.606754302978516, "learning_rate": 9.74702380952381e-06, "loss": 33.7234, "step": 12945 }, { "epoch": 308.23880597014926, "grad_norm": 19.32736587524414, "learning_rate": 9.746279761904762e-06, "loss": 33.4628, "step": 12946 }, { "epoch": 308.26268656716417, "grad_norm": 18.63032341003418, "learning_rate": 9.745535714285716e-06, "loss": 33.4484, "step": 12947 }, { "epoch": 308.28656716417913, "grad_norm": 16.7097110748291, "learning_rate": 9.744791666666667e-06, "loss": 33.3486, "step": 12948 }, { "epoch": 308.31044776119404, "grad_norm": 19.329191207885742, "learning_rate": 9.74404761904762e-06, "loss": 34.6827, "step": 12949 }, { "epoch": 308.33432835820895, "grad_norm": 14.51841926574707, "learning_rate": 9.743303571428572e-06, "loss": 34.5459, "step": 12950 }, { "epoch": 308.35820895522386, "grad_norm": 22.095539093017578, "learning_rate": 9.742559523809525e-06, "loss": 33.3448, "step": 12951 }, { "epoch": 308.3820895522388, "grad_norm": 17.434141159057617, "learning_rate": 9.741815476190478e-06, "loss": 33.8781, "step": 12952 }, { "epoch": 308.40597014925373, "grad_norm": 17.55922508239746, "learning_rate": 9.741071428571429e-06, "loss": 34.3081, "step": 12953 }, { "epoch": 308.42985074626864, "grad_norm": 17.018356323242188, "learning_rate": 9.740327380952381e-06, "loss": 33.655, "step": 12954 }, { "epoch": 308.4537313432836, "grad_norm": 17.0175724029541, "learning_rate": 9.739583333333334e-06, "loss": 32.7921, "step": 12955 }, { "epoch": 308.4776119402985, "grad_norm": NaN, "learning_rate": 9.738839285714287e-06, "loss": 58.4704, "step": 12956 }, { "epoch": 308.5014925373134, "grad_norm": 16.916526794433594, "learning_rate": 9.738839285714287e-06, "loss": 35.8149, "step": 12957 }, { "epoch": 308.52537313432833, "grad_norm": 18.18893051147461, "learning_rate": 9.73809523809524e-06, "loss": 32.7847, "step": 12958 }, { "epoch": 308.5492537313433, "grad_norm": 14.804588317871094, "learning_rate": 9.73735119047619e-06, "loss": 33.8423, "step": 12959 }, { "epoch": 308.5731343283582, "grad_norm": 16.292497634887695, "learning_rate": 9.736607142857145e-06, "loss": 34.3412, "step": 12960 }, { "epoch": 308.5970149253731, "grad_norm": 18.089969635009766, "learning_rate": 9.735863095238096e-06, "loss": 34.3364, "step": 12961 }, { "epoch": 308.6208955223881, "grad_norm": 20.194747924804688, "learning_rate": 9.735119047619048e-06, "loss": 34.4244, "step": 12962 }, { "epoch": 308.644776119403, "grad_norm": 15.840314865112305, "learning_rate": 9.734375000000001e-06, "loss": 33.2715, "step": 12963 }, { "epoch": 308.6686567164179, "grad_norm": 16.695568084716797, "learning_rate": 9.733630952380954e-06, "loss": 34.0288, "step": 12964 }, { "epoch": 308.6925373134328, "grad_norm": 17.103296279907227, "learning_rate": 9.732886904761907e-06, "loss": 33.7916, "step": 12965 }, { "epoch": 308.7164179104478, "grad_norm": 16.213998794555664, "learning_rate": 9.732142857142858e-06, "loss": 34.0483, "step": 12966 }, { "epoch": 308.7402985074627, "grad_norm": 20.33165740966797, "learning_rate": 9.73139880952381e-06, "loss": 33.7052, "step": 12967 }, { "epoch": 308.7641791044776, "grad_norm": 16.87818717956543, "learning_rate": 9.730654761904763e-06, "loss": 33.2133, "step": 12968 }, { "epoch": 308.78805970149256, "grad_norm": 14.578195571899414, "learning_rate": 9.729910714285716e-06, "loss": 35.137, "step": 12969 }, { "epoch": 308.81194029850747, "grad_norm": 16.438709259033203, "learning_rate": 9.729166666666667e-06, "loss": 33.9921, "step": 12970 }, { "epoch": 308.8358208955224, "grad_norm": 15.464425086975098, "learning_rate": 9.72842261904762e-06, "loss": 33.0498, "step": 12971 }, { "epoch": 308.85970149253734, "grad_norm": NaN, "learning_rate": 9.727678571428572e-06, "loss": 34.0863, "step": 12972 }, { "epoch": 308.88358208955225, "grad_norm": 19.971511840820312, "learning_rate": 9.727678571428572e-06, "loss": 34.1805, "step": 12973 }, { "epoch": 308.90746268656716, "grad_norm": 16.907794952392578, "learning_rate": 9.726934523809525e-06, "loss": 34.1479, "step": 12974 }, { "epoch": 308.93134328358207, "grad_norm": 16.841999053955078, "learning_rate": 9.726190476190477e-06, "loss": 33.5721, "step": 12975 }, { "epoch": 308.95522388059703, "grad_norm": 19.87732696533203, "learning_rate": 9.725446428571428e-06, "loss": 33.8914, "step": 12976 }, { "epoch": 308.97910447761194, "grad_norm": 15.79689884185791, "learning_rate": 9.724702380952383e-06, "loss": 34.1774, "step": 12977 }, { "epoch": 309.0, "grad_norm": 23.370094299316406, "learning_rate": 9.723958333333334e-06, "loss": 29.8413, "step": 12978 }, { "epoch": 309.0238805970149, "grad_norm": 17.6906681060791, "learning_rate": 9.723214285714286e-06, "loss": 34.4275, "step": 12979 }, { "epoch": 309.0477611940299, "grad_norm": 24.894100189208984, "learning_rate": 9.722470238095239e-06, "loss": 33.7288, "step": 12980 }, { "epoch": 309.0716417910448, "grad_norm": 18.9682559967041, "learning_rate": 9.721726190476192e-06, "loss": 34.7069, "step": 12981 }, { "epoch": 309.0955223880597, "grad_norm": 26.61629867553711, "learning_rate": 9.720982142857144e-06, "loss": 32.9412, "step": 12982 }, { "epoch": 309.1194029850746, "grad_norm": 21.343372344970703, "learning_rate": 9.720238095238095e-06, "loss": 33.177, "step": 12983 }, { "epoch": 309.14328358208957, "grad_norm": 20.692062377929688, "learning_rate": 9.719494047619048e-06, "loss": 33.9896, "step": 12984 }, { "epoch": 309.1671641791045, "grad_norm": 22.972320556640625, "learning_rate": 9.71875e-06, "loss": 34.4122, "step": 12985 }, { "epoch": 309.1910447761194, "grad_norm": 20.217164993286133, "learning_rate": 9.718005952380953e-06, "loss": 33.561, "step": 12986 }, { "epoch": 309.21492537313435, "grad_norm": 16.543354034423828, "learning_rate": 9.717261904761906e-06, "loss": 32.8687, "step": 12987 }, { "epoch": 309.23880597014926, "grad_norm": 26.884403228759766, "learning_rate": 9.716517857142857e-06, "loss": 33.1497, "step": 12988 }, { "epoch": 309.26268656716417, "grad_norm": 16.741165161132812, "learning_rate": 9.715773809523812e-06, "loss": 33.7586, "step": 12989 }, { "epoch": 309.28656716417913, "grad_norm": 32.45367431640625, "learning_rate": 9.715029761904762e-06, "loss": 32.623, "step": 12990 }, { "epoch": 309.31044776119404, "grad_norm": 21.32443618774414, "learning_rate": 9.714285714285715e-06, "loss": 32.6459, "step": 12991 }, { "epoch": 309.33432835820895, "grad_norm": 30.66120147705078, "learning_rate": 9.713541666666668e-06, "loss": 33.9185, "step": 12992 }, { "epoch": 309.35820895522386, "grad_norm": 27.259050369262695, "learning_rate": 9.71279761904762e-06, "loss": 35.1806, "step": 12993 }, { "epoch": 309.3820895522388, "grad_norm": 30.870134353637695, "learning_rate": 9.712053571428573e-06, "loss": 32.6699, "step": 12994 }, { "epoch": 309.40597014925373, "grad_norm": 25.539621353149414, "learning_rate": 9.711309523809524e-06, "loss": 35.1825, "step": 12995 }, { "epoch": 309.42985074626864, "grad_norm": 30.160842895507812, "learning_rate": 9.710565476190477e-06, "loss": 34.7504, "step": 12996 }, { "epoch": 309.4537313432836, "grad_norm": 22.481428146362305, "learning_rate": 9.70982142857143e-06, "loss": 33.4778, "step": 12997 }, { "epoch": 309.4776119402985, "grad_norm": 30.42070198059082, "learning_rate": 9.709077380952382e-06, "loss": 33.0725, "step": 12998 }, { "epoch": 309.5014925373134, "grad_norm": 20.800201416015625, "learning_rate": 9.708333333333333e-06, "loss": 35.0291, "step": 12999 }, { "epoch": 309.52537313432833, "grad_norm": 32.657894134521484, "learning_rate": 9.707589285714286e-06, "loss": 33.2058, "step": 13000 }, { "epoch": 309.5492537313433, "grad_norm": 27.442174911499023, "learning_rate": 9.706845238095239e-06, "loss": 33.5788, "step": 13001 }, { "epoch": 309.5731343283582, "grad_norm": 30.188657760620117, "learning_rate": 9.706101190476191e-06, "loss": 33.1575, "step": 13002 }, { "epoch": 309.5970149253731, "grad_norm": 25.580913543701172, "learning_rate": 9.705357142857144e-06, "loss": 33.398, "step": 13003 }, { "epoch": 309.6208955223881, "grad_norm": 29.054780960083008, "learning_rate": 9.704613095238095e-06, "loss": 33.1039, "step": 13004 }, { "epoch": 309.644776119403, "grad_norm": 25.801345825195312, "learning_rate": 9.70386904761905e-06, "loss": 34.3573, "step": 13005 }, { "epoch": 309.6686567164179, "grad_norm": 26.575672149658203, "learning_rate": 9.703125e-06, "loss": 34.532, "step": 13006 }, { "epoch": 309.6925373134328, "grad_norm": 23.786701202392578, "learning_rate": 9.702380952380953e-06, "loss": 32.8245, "step": 13007 }, { "epoch": 309.7164179104478, "grad_norm": 24.64287757873535, "learning_rate": 9.701636904761906e-06, "loss": 33.9079, "step": 13008 }, { "epoch": 309.7402985074627, "grad_norm": 25.367307662963867, "learning_rate": 9.700892857142858e-06, "loss": 32.5972, "step": 13009 }, { "epoch": 309.7641791044776, "grad_norm": 19.622753143310547, "learning_rate": 9.700148809523811e-06, "loss": 33.8491, "step": 13010 }, { "epoch": 309.78805970149256, "grad_norm": 23.75461196899414, "learning_rate": 9.699404761904762e-06, "loss": 34.1387, "step": 13011 }, { "epoch": 309.81194029850747, "grad_norm": 19.268911361694336, "learning_rate": 9.698660714285715e-06, "loss": 35.2761, "step": 13012 }, { "epoch": 309.8358208955224, "grad_norm": 16.754852294921875, "learning_rate": 9.697916666666667e-06, "loss": 33.3417, "step": 13013 }, { "epoch": 309.85970149253734, "grad_norm": 19.657302856445312, "learning_rate": 9.69717261904762e-06, "loss": 34.0011, "step": 13014 }, { "epoch": 309.88358208955225, "grad_norm": 19.572189331054688, "learning_rate": 9.696428571428573e-06, "loss": 33.6482, "step": 13015 }, { "epoch": 309.90746268656716, "grad_norm": 13.575444221496582, "learning_rate": 9.695684523809524e-06, "loss": 34.9708, "step": 13016 }, { "epoch": 309.93134328358207, "grad_norm": 21.723373413085938, "learning_rate": 9.694940476190478e-06, "loss": 35.3739, "step": 13017 }, { "epoch": 309.95522388059703, "grad_norm": 16.05686378479004, "learning_rate": 9.69419642857143e-06, "loss": 34.6028, "step": 13018 }, { "epoch": 309.97910447761194, "grad_norm": 19.51734733581543, "learning_rate": 9.693452380952382e-06, "loss": 33.1861, "step": 13019 }, { "epoch": 310.0, "grad_norm": 15.258634567260742, "learning_rate": 9.692708333333335e-06, "loss": 28.8878, "step": 13020 }, { "epoch": 310.0238805970149, "grad_norm": 21.517152786254883, "learning_rate": 9.691964285714287e-06, "loss": 33.3936, "step": 13021 }, { "epoch": 310.0477611940299, "grad_norm": 16.182579040527344, "learning_rate": 9.69122023809524e-06, "loss": 32.5093, "step": 13022 }, { "epoch": 310.0716417910448, "grad_norm": 20.20163917541504, "learning_rate": 9.690476190476191e-06, "loss": 32.5731, "step": 13023 }, { "epoch": 310.0955223880597, "grad_norm": 18.66176414489746, "learning_rate": 9.689732142857144e-06, "loss": 34.2317, "step": 13024 }, { "epoch": 310.1194029850746, "grad_norm": 19.497772216796875, "learning_rate": 9.688988095238096e-06, "loss": 34.5274, "step": 13025 }, { "epoch": 310.14328358208957, "grad_norm": 16.244461059570312, "learning_rate": 9.688244047619049e-06, "loss": 32.709, "step": 13026 }, { "epoch": 310.1671641791045, "grad_norm": 19.932151794433594, "learning_rate": 9.6875e-06, "loss": 32.2925, "step": 13027 }, { "epoch": 310.1910447761194, "grad_norm": 18.922943115234375, "learning_rate": 9.686755952380953e-06, "loss": 34.0793, "step": 13028 }, { "epoch": 310.21492537313435, "grad_norm": 18.103727340698242, "learning_rate": 9.686011904761905e-06, "loss": 33.6132, "step": 13029 }, { "epoch": 310.23880597014926, "grad_norm": 17.927507400512695, "learning_rate": 9.685267857142858e-06, "loss": 33.7991, "step": 13030 }, { "epoch": 310.26268656716417, "grad_norm": 16.29496192932129, "learning_rate": 9.68452380952381e-06, "loss": 32.8704, "step": 13031 }, { "epoch": 310.28656716417913, "grad_norm": 17.598722457885742, "learning_rate": 9.683779761904762e-06, "loss": 34.1045, "step": 13032 }, { "epoch": 310.31044776119404, "grad_norm": 15.629679679870605, "learning_rate": 9.683035714285714e-06, "loss": 33.9642, "step": 13033 }, { "epoch": 310.33432835820895, "grad_norm": 15.261300086975098, "learning_rate": 9.682291666666667e-06, "loss": 33.927, "step": 13034 }, { "epoch": 310.35820895522386, "grad_norm": 20.092823028564453, "learning_rate": 9.68154761904762e-06, "loss": 34.0795, "step": 13035 }, { "epoch": 310.3820895522388, "grad_norm": 19.83094596862793, "learning_rate": 9.680803571428572e-06, "loss": 33.7482, "step": 13036 }, { "epoch": 310.40597014925373, "grad_norm": 16.10625457763672, "learning_rate": 9.680059523809523e-06, "loss": 33.9186, "step": 13037 }, { "epoch": 310.42985074626864, "grad_norm": 17.777740478515625, "learning_rate": 9.679315476190478e-06, "loss": 33.5461, "step": 13038 }, { "epoch": 310.4537313432836, "grad_norm": 20.631752014160156, "learning_rate": 9.678571428571429e-06, "loss": 34.6323, "step": 13039 }, { "epoch": 310.4776119402985, "grad_norm": 18.21401023864746, "learning_rate": 9.677827380952382e-06, "loss": 34.0632, "step": 13040 }, { "epoch": 310.5014925373134, "grad_norm": 15.418707847595215, "learning_rate": 9.677083333333334e-06, "loss": 32.5218, "step": 13041 }, { "epoch": 310.52537313432833, "grad_norm": NaN, "learning_rate": 9.676339285714287e-06, "loss": 55.8785, "step": 13042 }, { "epoch": 310.5492537313433, "grad_norm": 13.48697280883789, "learning_rate": 9.676339285714287e-06, "loss": 33.587, "step": 13043 }, { "epoch": 310.5731343283582, "grad_norm": 20.246139526367188, "learning_rate": 9.67559523809524e-06, "loss": 34.176, "step": 13044 }, { "epoch": 310.5970149253731, "grad_norm": 14.433034896850586, "learning_rate": 9.67485119047619e-06, "loss": 32.9637, "step": 13045 }, { "epoch": 310.6208955223881, "grad_norm": 16.697980880737305, "learning_rate": 9.674107142857143e-06, "loss": 33.3235, "step": 13046 }, { "epoch": 310.644776119403, "grad_norm": 17.75384521484375, "learning_rate": 9.673363095238096e-06, "loss": 34.1102, "step": 13047 }, { "epoch": 310.6686567164179, "grad_norm": 23.510988235473633, "learning_rate": 9.672619047619049e-06, "loss": 33.492, "step": 13048 }, { "epoch": 310.6925373134328, "grad_norm": 20.356592178344727, "learning_rate": 9.671875000000001e-06, "loss": 35.2349, "step": 13049 }, { "epoch": 310.7164179104478, "grad_norm": 15.113434791564941, "learning_rate": 9.671130952380952e-06, "loss": 34.0031, "step": 13050 }, { "epoch": 310.7402985074627, "grad_norm": 21.017969131469727, "learning_rate": 9.670386904761907e-06, "loss": 33.8223, "step": 13051 }, { "epoch": 310.7641791044776, "grad_norm": 20.728343963623047, "learning_rate": 9.669642857142858e-06, "loss": 34.2523, "step": 13052 }, { "epoch": 310.78805970149256, "grad_norm": 14.433070182800293, "learning_rate": 9.66889880952381e-06, "loss": 34.6699, "step": 13053 }, { "epoch": 310.81194029850747, "grad_norm": 22.28704261779785, "learning_rate": 9.668154761904763e-06, "loss": 33.1882, "step": 13054 }, { "epoch": 310.8358208955224, "grad_norm": 19.53321647644043, "learning_rate": 9.667410714285716e-06, "loss": 33.8604, "step": 13055 }, { "epoch": 310.85970149253734, "grad_norm": 14.87886905670166, "learning_rate": 9.666666666666667e-06, "loss": 33.4077, "step": 13056 }, { "epoch": 310.88358208955225, "grad_norm": 19.086444854736328, "learning_rate": 9.66592261904762e-06, "loss": 33.7193, "step": 13057 }, { "epoch": 310.90746268656716, "grad_norm": 19.99090003967285, "learning_rate": 9.665178571428572e-06, "loss": 34.5341, "step": 13058 }, { "epoch": 310.93134328358207, "grad_norm": 18.049787521362305, "learning_rate": 9.664434523809525e-06, "loss": 34.3963, "step": 13059 }, { "epoch": 310.95522388059703, "grad_norm": 14.74404239654541, "learning_rate": 9.663690476190477e-06, "loss": 34.2839, "step": 13060 }, { "epoch": 310.97910447761194, "grad_norm": NaN, "learning_rate": 9.662946428571428e-06, "loss": 38.865, "step": 13061 }, { "epoch": 311.0, "grad_norm": 13.75111198425293, "learning_rate": 9.662946428571428e-06, "loss": 29.714, "step": 13062 }, { "epoch": 311.0238805970149, "grad_norm": 17.926097869873047, "learning_rate": 9.662202380952381e-06, "loss": 33.7766, "step": 13063 }, { "epoch": 311.0477611940299, "grad_norm": 14.747750282287598, "learning_rate": 9.661458333333334e-06, "loss": 33.3906, "step": 13064 }, { "epoch": 311.0716417910448, "grad_norm": 18.3504695892334, "learning_rate": 9.660714285714287e-06, "loss": 33.6988, "step": 13065 }, { "epoch": 311.0955223880597, "grad_norm": 17.141036987304688, "learning_rate": 9.65997023809524e-06, "loss": 34.888, "step": 13066 }, { "epoch": 311.1194029850746, "grad_norm": 20.44035530090332, "learning_rate": 9.65922619047619e-06, "loss": 32.4416, "step": 13067 }, { "epoch": 311.14328358208957, "grad_norm": 14.6190824508667, "learning_rate": 9.658482142857145e-06, "loss": 34.6001, "step": 13068 }, { "epoch": 311.1671641791045, "grad_norm": 18.415260314941406, "learning_rate": 9.657738095238096e-06, "loss": 34.7202, "step": 13069 }, { "epoch": 311.1910447761194, "grad_norm": 16.84659194946289, "learning_rate": 9.656994047619048e-06, "loss": 32.8358, "step": 13070 }, { "epoch": 311.21492537313435, "grad_norm": 16.88626480102539, "learning_rate": 9.656250000000001e-06, "loss": 34.8116, "step": 13071 }, { "epoch": 311.23880597014926, "grad_norm": 18.828983306884766, "learning_rate": 9.655505952380954e-06, "loss": 33.107, "step": 13072 }, { "epoch": 311.26268656716417, "grad_norm": 15.67547607421875, "learning_rate": 9.654761904761906e-06, "loss": 33.8678, "step": 13073 }, { "epoch": 311.28656716417913, "grad_norm": 16.83783721923828, "learning_rate": 9.654017857142857e-06, "loss": 33.0033, "step": 13074 }, { "epoch": 311.31044776119404, "grad_norm": 17.924421310424805, "learning_rate": 9.65327380952381e-06, "loss": 33.6108, "step": 13075 }, { "epoch": 311.33432835820895, "grad_norm": 17.57686424255371, "learning_rate": 9.652529761904763e-06, "loss": 33.5779, "step": 13076 }, { "epoch": 311.35820895522386, "grad_norm": 21.02503776550293, "learning_rate": 9.651785714285715e-06, "loss": 34.783, "step": 13077 }, { "epoch": 311.3820895522388, "grad_norm": 15.584885597229004, "learning_rate": 9.651041666666668e-06, "loss": 33.3379, "step": 13078 }, { "epoch": 311.40597014925373, "grad_norm": 14.647343635559082, "learning_rate": 9.650297619047619e-06, "loss": 34.2257, "step": 13079 }, { "epoch": 311.42985074626864, "grad_norm": 16.879671096801758, "learning_rate": 9.649553571428573e-06, "loss": 34.051, "step": 13080 }, { "epoch": 311.4537313432836, "grad_norm": 18.13416290283203, "learning_rate": 9.648809523809524e-06, "loss": 33.135, "step": 13081 }, { "epoch": 311.4776119402985, "grad_norm": 20.190101623535156, "learning_rate": 9.648065476190477e-06, "loss": 34.515, "step": 13082 }, { "epoch": 311.5014925373134, "grad_norm": 14.234061241149902, "learning_rate": 9.64732142857143e-06, "loss": 33.8728, "step": 13083 }, { "epoch": 311.52537313432833, "grad_norm": 18.39228630065918, "learning_rate": 9.646577380952382e-06, "loss": 34.3784, "step": 13084 }, { "epoch": 311.5492537313433, "grad_norm": 15.494501113891602, "learning_rate": 9.645833333333333e-06, "loss": 33.5588, "step": 13085 }, { "epoch": 311.5731343283582, "grad_norm": 20.299917221069336, "learning_rate": 9.645089285714286e-06, "loss": 33.2555, "step": 13086 }, { "epoch": 311.5970149253731, "grad_norm": 14.360240936279297, "learning_rate": 9.644345238095239e-06, "loss": 32.3675, "step": 13087 }, { "epoch": 311.6208955223881, "grad_norm": 20.752193450927734, "learning_rate": 9.643601190476192e-06, "loss": 34.6455, "step": 13088 }, { "epoch": 311.644776119403, "grad_norm": 18.01266860961914, "learning_rate": 9.642857142857144e-06, "loss": 33.6162, "step": 13089 }, { "epoch": 311.6686567164179, "grad_norm": 18.568958282470703, "learning_rate": 9.642113095238095e-06, "loss": 33.3481, "step": 13090 }, { "epoch": 311.6925373134328, "grad_norm": 17.04592514038086, "learning_rate": 9.641369047619048e-06, "loss": 33.1764, "step": 13091 }, { "epoch": 311.7164179104478, "grad_norm": 16.930374145507812, "learning_rate": 9.640625e-06, "loss": 34.5636, "step": 13092 }, { "epoch": 311.7402985074627, "grad_norm": 15.942654609680176, "learning_rate": 9.639880952380953e-06, "loss": 33.9937, "step": 13093 }, { "epoch": 311.7641791044776, "grad_norm": 14.966394424438477, "learning_rate": 9.639136904761906e-06, "loss": 32.1193, "step": 13094 }, { "epoch": 311.78805970149256, "grad_norm": 14.562058448791504, "learning_rate": 9.638392857142857e-06, "loss": 33.5501, "step": 13095 }, { "epoch": 311.81194029850747, "grad_norm": 14.743454933166504, "learning_rate": 9.637648809523811e-06, "loss": 34.1824, "step": 13096 }, { "epoch": 311.8358208955224, "grad_norm": 15.229239463806152, "learning_rate": 9.636904761904762e-06, "loss": 34.4654, "step": 13097 }, { "epoch": 311.85970149253734, "grad_norm": 18.067148208618164, "learning_rate": 9.636160714285715e-06, "loss": 33.4182, "step": 13098 }, { "epoch": 311.88358208955225, "grad_norm": 16.98919677734375, "learning_rate": 9.635416666666668e-06, "loss": 33.4746, "step": 13099 }, { "epoch": 311.90746268656716, "grad_norm": 18.955854415893555, "learning_rate": 9.63467261904762e-06, "loss": 33.9087, "step": 13100 }, { "epoch": 311.93134328358207, "grad_norm": 12.923398971557617, "learning_rate": 9.633928571428573e-06, "loss": 33.244, "step": 13101 }, { "epoch": 311.95522388059703, "grad_norm": 18.70138168334961, "learning_rate": 9.633184523809524e-06, "loss": 34.3946, "step": 13102 }, { "epoch": 311.97910447761194, "grad_norm": 15.275166511535645, "learning_rate": 9.632440476190477e-06, "loss": 33.0189, "step": 13103 }, { "epoch": 312.0, "grad_norm": 25.734006881713867, "learning_rate": 9.63169642857143e-06, "loss": 30.0553, "step": 13104 }, { "epoch": 312.0238805970149, "grad_norm": 21.362735748291016, "learning_rate": 9.630952380952382e-06, "loss": 33.4624, "step": 13105 }, { "epoch": 312.0477611940299, "grad_norm": 22.884366989135742, "learning_rate": 9.630208333333335e-06, "loss": 33.4485, "step": 13106 }, { "epoch": 312.0716417910448, "grad_norm": 26.826204299926758, "learning_rate": 9.629464285714286e-06, "loss": 33.3296, "step": 13107 }, { "epoch": 312.0955223880597, "grad_norm": 17.927284240722656, "learning_rate": 9.62872023809524e-06, "loss": 33.7918, "step": 13108 }, { "epoch": 312.1194029850746, "grad_norm": 32.59355163574219, "learning_rate": 9.627976190476191e-06, "loss": 33.1228, "step": 13109 }, { "epoch": 312.14328358208957, "grad_norm": 19.587862014770508, "learning_rate": 9.627232142857144e-06, "loss": 33.6721, "step": 13110 }, { "epoch": 312.1671641791045, "grad_norm": 32.56916427612305, "learning_rate": 9.626488095238096e-06, "loss": 34.2825, "step": 13111 }, { "epoch": 312.1910447761194, "grad_norm": 19.549453735351562, "learning_rate": 9.62574404761905e-06, "loss": 33.9742, "step": 13112 }, { "epoch": 312.21492537313435, "grad_norm": 35.69709014892578, "learning_rate": 9.625e-06, "loss": 33.0387, "step": 13113 }, { "epoch": 312.23880597014926, "grad_norm": 27.12348747253418, "learning_rate": 9.624255952380953e-06, "loss": 34.3725, "step": 13114 }, { "epoch": 312.26268656716417, "grad_norm": 36.873992919921875, "learning_rate": 9.623511904761906e-06, "loss": 33.9663, "step": 13115 }, { "epoch": 312.28656716417913, "grad_norm": 29.34783363342285, "learning_rate": 9.622767857142858e-06, "loss": 34.7257, "step": 13116 }, { "epoch": 312.31044776119404, "grad_norm": 33.33332443237305, "learning_rate": 9.622023809523811e-06, "loss": 33.4715, "step": 13117 }, { "epoch": 312.33432835820895, "grad_norm": 26.727169036865234, "learning_rate": 9.621279761904762e-06, "loss": 32.0446, "step": 13118 }, { "epoch": 312.35820895522386, "grad_norm": 42.13288497924805, "learning_rate": 9.620535714285715e-06, "loss": 32.7165, "step": 13119 }, { "epoch": 312.3820895522388, "grad_norm": 33.71300506591797, "learning_rate": 9.619791666666667e-06, "loss": 33.8545, "step": 13120 }, { "epoch": 312.40597014925373, "grad_norm": 32.45813751220703, "learning_rate": 9.61904761904762e-06, "loss": 34.1513, "step": 13121 }, { "epoch": 312.42985074626864, "grad_norm": 30.027828216552734, "learning_rate": 9.618303571428573e-06, "loss": 33.3435, "step": 13122 }, { "epoch": 312.4537313432836, "grad_norm": 33.02032470703125, "learning_rate": 9.617559523809524e-06, "loss": 33.3302, "step": 13123 }, { "epoch": 312.4776119402985, "grad_norm": 24.927526473999023, "learning_rate": 9.616815476190478e-06, "loss": 32.2108, "step": 13124 }, { "epoch": 312.5014925373134, "grad_norm": 39.30095672607422, "learning_rate": 9.616071428571429e-06, "loss": 34.9208, "step": 13125 }, { "epoch": 312.52537313432833, "grad_norm": 32.02609634399414, "learning_rate": 9.615327380952382e-06, "loss": 33.3949, "step": 13126 }, { "epoch": 312.5492537313433, "grad_norm": 31.11638832092285, "learning_rate": 9.614583333333334e-06, "loss": 33.3447, "step": 13127 }, { "epoch": 312.5731343283582, "grad_norm": 30.669084548950195, "learning_rate": 9.613839285714287e-06, "loss": 34.5168, "step": 13128 }, { "epoch": 312.5970149253731, "grad_norm": 29.634124755859375, "learning_rate": 9.61309523809524e-06, "loss": 34.6274, "step": 13129 }, { "epoch": 312.6208955223881, "grad_norm": 27.90740966796875, "learning_rate": 9.61235119047619e-06, "loss": 33.4124, "step": 13130 }, { "epoch": 312.644776119403, "grad_norm": 34.158634185791016, "learning_rate": 9.611607142857143e-06, "loss": 33.0961, "step": 13131 }, { "epoch": 312.6686567164179, "grad_norm": 32.520843505859375, "learning_rate": 9.610863095238096e-06, "loss": 33.2238, "step": 13132 }, { "epoch": 312.6925373134328, "grad_norm": 33.50034713745117, "learning_rate": 9.610119047619049e-06, "loss": 32.6968, "step": 13133 }, { "epoch": 312.7164179104478, "grad_norm": 30.337360382080078, "learning_rate": 9.609375000000001e-06, "loss": 34.2137, "step": 13134 }, { "epoch": 312.7402985074627, "grad_norm": 30.0190372467041, "learning_rate": 9.608630952380952e-06, "loss": 33.8018, "step": 13135 }, { "epoch": 312.7641791044776, "grad_norm": 29.226835250854492, "learning_rate": 9.607886904761905e-06, "loss": 32.7436, "step": 13136 }, { "epoch": 312.78805970149256, "grad_norm": 31.76357650756836, "learning_rate": 9.607142857142858e-06, "loss": 33.9807, "step": 13137 }, { "epoch": 312.81194029850747, "grad_norm": 28.754568099975586, "learning_rate": 9.60639880952381e-06, "loss": 34.8479, "step": 13138 }, { "epoch": 312.8358208955224, "grad_norm": 31.17036247253418, "learning_rate": 9.605654761904763e-06, "loss": 33.1828, "step": 13139 }, { "epoch": 312.85970149253734, "grad_norm": 25.772232055664062, "learning_rate": 9.604910714285714e-06, "loss": 33.6004, "step": 13140 }, { "epoch": 312.88358208955225, "grad_norm": 35.69013214111328, "learning_rate": 9.604166666666669e-06, "loss": 33.6171, "step": 13141 }, { "epoch": 312.90746268656716, "grad_norm": 32.91059112548828, "learning_rate": 9.60342261904762e-06, "loss": 33.9886, "step": 13142 }, { "epoch": 312.93134328358207, "grad_norm": 29.913450241088867, "learning_rate": 9.602678571428572e-06, "loss": 35.3789, "step": 13143 }, { "epoch": 312.95522388059703, "grad_norm": 26.861034393310547, "learning_rate": 9.601934523809523e-06, "loss": 34.2793, "step": 13144 }, { "epoch": 312.97910447761194, "grad_norm": 30.770784378051758, "learning_rate": 9.601190476190478e-06, "loss": 35.0147, "step": 13145 }, { "epoch": 313.0, "grad_norm": 23.079103469848633, "learning_rate": 9.600446428571429e-06, "loss": 28.4521, "step": 13146 }, { "epoch": 313.0238805970149, "grad_norm": 33.886207580566406, "learning_rate": 9.599702380952381e-06, "loss": 33.3916, "step": 13147 }, { "epoch": 313.0477611940299, "grad_norm": 31.83485221862793, "learning_rate": 9.598958333333334e-06, "loss": 33.3536, "step": 13148 }, { "epoch": 313.0716417910448, "grad_norm": 29.32632827758789, "learning_rate": 9.598214285714287e-06, "loss": 32.6224, "step": 13149 }, { "epoch": 313.0955223880597, "grad_norm": 24.941131591796875, "learning_rate": 9.59747023809524e-06, "loss": 34.0434, "step": 13150 }, { "epoch": 313.1194029850746, "grad_norm": 34.72981643676758, "learning_rate": 9.59672619047619e-06, "loss": 32.9394, "step": 13151 }, { "epoch": 313.14328358208957, "grad_norm": 29.2474365234375, "learning_rate": 9.595982142857143e-06, "loss": 33.6498, "step": 13152 }, { "epoch": 313.1671641791045, "grad_norm": 30.723867416381836, "learning_rate": 9.595238095238096e-06, "loss": 32.7678, "step": 13153 }, { "epoch": 313.1910447761194, "grad_norm": 25.35555648803711, "learning_rate": 9.594494047619048e-06, "loss": 33.7004, "step": 13154 }, { "epoch": 313.21492537313435, "grad_norm": 31.069316864013672, "learning_rate": 9.593750000000001e-06, "loss": 33.2519, "step": 13155 }, { "epoch": 313.23880597014926, "grad_norm": 25.28008460998535, "learning_rate": 9.593005952380952e-06, "loss": 33.5377, "step": 13156 }, { "epoch": 313.26268656716417, "grad_norm": 31.649578094482422, "learning_rate": 9.592261904761906e-06, "loss": 33.7276, "step": 13157 }, { "epoch": 313.28656716417913, "grad_norm": 28.521392822265625, "learning_rate": 9.591517857142857e-06, "loss": 35.3197, "step": 13158 }, { "epoch": 313.31044776119404, "grad_norm": 33.34397506713867, "learning_rate": 9.59077380952381e-06, "loss": 34.1836, "step": 13159 }, { "epoch": 313.33432835820895, "grad_norm": 27.889270782470703, "learning_rate": 9.590029761904763e-06, "loss": 33.4468, "step": 13160 }, { "epoch": 313.35820895522386, "grad_norm": 32.41287612915039, "learning_rate": 9.589285714285716e-06, "loss": 33.6495, "step": 13161 }, { "epoch": 313.3820895522388, "grad_norm": 26.815305709838867, "learning_rate": 9.588541666666668e-06, "loss": 35.5384, "step": 13162 }, { "epoch": 313.40597014925373, "grad_norm": 32.66407012939453, "learning_rate": 9.58779761904762e-06, "loss": 33.0279, "step": 13163 }, { "epoch": 313.42985074626864, "grad_norm": 30.003036499023438, "learning_rate": 9.587053571428572e-06, "loss": 33.8409, "step": 13164 }, { "epoch": 313.4537313432836, "grad_norm": 28.761077880859375, "learning_rate": 9.586309523809525e-06, "loss": 32.8447, "step": 13165 }, { "epoch": 313.4776119402985, "grad_norm": 27.731586456298828, "learning_rate": 9.585565476190477e-06, "loss": 33.9266, "step": 13166 }, { "epoch": 313.5014925373134, "grad_norm": 31.831533432006836, "learning_rate": 9.58482142857143e-06, "loss": 33.5968, "step": 13167 }, { "epoch": 313.52537313432833, "grad_norm": 28.910619735717773, "learning_rate": 9.584077380952381e-06, "loss": 33.6701, "step": 13168 }, { "epoch": 313.5492537313433, "grad_norm": 31.396425247192383, "learning_rate": 9.583333333333335e-06, "loss": 33.6382, "step": 13169 }, { "epoch": 313.5731343283582, "grad_norm": 28.483938217163086, "learning_rate": 9.582589285714286e-06, "loss": 33.671, "step": 13170 }, { "epoch": 313.5970149253731, "grad_norm": 32.068485260009766, "learning_rate": 9.581845238095239e-06, "loss": 33.3914, "step": 13171 }, { "epoch": 313.6208955223881, "grad_norm": 29.09919548034668, "learning_rate": 9.58110119047619e-06, "loss": 32.9918, "step": 13172 }, { "epoch": 313.644776119403, "grad_norm": 30.923269271850586, "learning_rate": 9.580357142857144e-06, "loss": 33.3338, "step": 13173 }, { "epoch": 313.6686567164179, "grad_norm": 26.374174118041992, "learning_rate": 9.579613095238095e-06, "loss": 33.6883, "step": 13174 }, { "epoch": 313.6925373134328, "grad_norm": 29.18936538696289, "learning_rate": 9.578869047619048e-06, "loss": 33.8253, "step": 13175 }, { "epoch": 313.7164179104478, "grad_norm": 25.20747947692871, "learning_rate": 9.578125e-06, "loss": 34.1158, "step": 13176 }, { "epoch": 313.7402985074627, "grad_norm": 32.462371826171875, "learning_rate": 9.577380952380953e-06, "loss": 34.0465, "step": 13177 }, { "epoch": 313.7641791044776, "grad_norm": 33.96836471557617, "learning_rate": 9.576636904761906e-06, "loss": 35.0779, "step": 13178 }, { "epoch": 313.78805970149256, "grad_norm": 27.8643856048584, "learning_rate": 9.575892857142857e-06, "loss": 33.4839, "step": 13179 }, { "epoch": 313.81194029850747, "grad_norm": 25.58667755126953, "learning_rate": 9.57514880952381e-06, "loss": 33.3427, "step": 13180 }, { "epoch": 313.8358208955224, "grad_norm": 31.390897750854492, "learning_rate": 9.574404761904762e-06, "loss": 32.8023, "step": 13181 }, { "epoch": 313.85970149253734, "grad_norm": 27.014434814453125, "learning_rate": 9.573660714285715e-06, "loss": 34.274, "step": 13182 }, { "epoch": 313.88358208955225, "grad_norm": 29.599308013916016, "learning_rate": 9.572916666666668e-06, "loss": 34.3252, "step": 13183 }, { "epoch": 313.90746268656716, "grad_norm": 27.959007263183594, "learning_rate": 9.572172619047619e-06, "loss": 34.3166, "step": 13184 }, { "epoch": 313.93134328358207, "grad_norm": 30.436656951904297, "learning_rate": 9.571428571428573e-06, "loss": 32.5167, "step": 13185 }, { "epoch": 313.95522388059703, "grad_norm": 26.427406311035156, "learning_rate": 9.570684523809524e-06, "loss": 33.1055, "step": 13186 }, { "epoch": 313.97910447761194, "grad_norm": 29.928449630737305, "learning_rate": 9.569940476190477e-06, "loss": 33.9663, "step": 13187 }, { "epoch": 314.0, "grad_norm": 24.019044876098633, "learning_rate": 9.56919642857143e-06, "loss": 29.408, "step": 13188 }, { "epoch": 314.0238805970149, "grad_norm": 30.695575714111328, "learning_rate": 9.568452380952382e-06, "loss": 33.567, "step": 13189 }, { "epoch": 314.0477611940299, "grad_norm": 26.884841918945312, "learning_rate": 9.567708333333335e-06, "loss": 32.6028, "step": 13190 }, { "epoch": 314.0716417910448, "grad_norm": 28.075511932373047, "learning_rate": 9.566964285714286e-06, "loss": 32.629, "step": 13191 }, { "epoch": 314.0955223880597, "grad_norm": 28.43887710571289, "learning_rate": 9.566220238095239e-06, "loss": 33.202, "step": 13192 }, { "epoch": 314.1194029850746, "grad_norm": 30.43881607055664, "learning_rate": 9.565476190476191e-06, "loss": 33.9846, "step": 13193 }, { "epoch": 314.14328358208957, "grad_norm": 28.45292854309082, "learning_rate": 9.564732142857144e-06, "loss": 33.7065, "step": 13194 }, { "epoch": 314.1671641791045, "grad_norm": 32.04459762573242, "learning_rate": 9.563988095238097e-06, "loss": 33.7555, "step": 13195 }, { "epoch": 314.1910447761194, "grad_norm": 25.435636520385742, "learning_rate": 9.563244047619048e-06, "loss": 33.9608, "step": 13196 }, { "epoch": 314.21492537313435, "grad_norm": 29.92577362060547, "learning_rate": 9.562500000000002e-06, "loss": 32.9624, "step": 13197 }, { "epoch": 314.23880597014926, "grad_norm": 26.913516998291016, "learning_rate": 9.561755952380953e-06, "loss": 33.7854, "step": 13198 }, { "epoch": 314.26268656716417, "grad_norm": 28.719717025756836, "learning_rate": 9.561011904761906e-06, "loss": 33.8993, "step": 13199 }, { "epoch": 314.28656716417913, "grad_norm": 25.073945999145508, "learning_rate": 9.560267857142857e-06, "loss": 32.7555, "step": 13200 }, { "epoch": 314.31044776119404, "grad_norm": 34.63137435913086, "learning_rate": 9.559523809523811e-06, "loss": 34.0187, "step": 13201 }, { "epoch": 314.33432835820895, "grad_norm": 28.46396827697754, "learning_rate": 9.558779761904762e-06, "loss": 34.6484, "step": 13202 }, { "epoch": 314.35820895522386, "grad_norm": 29.185827255249023, "learning_rate": 9.558035714285715e-06, "loss": 34.3272, "step": 13203 }, { "epoch": 314.3820895522388, "grad_norm": 30.818063735961914, "learning_rate": 9.557291666666667e-06, "loss": 34.4256, "step": 13204 }, { "epoch": 314.40597014925373, "grad_norm": 27.696474075317383, "learning_rate": 9.55654761904762e-06, "loss": 34.1674, "step": 13205 }, { "epoch": 314.42985074626864, "grad_norm": 24.56853675842285, "learning_rate": 9.555803571428573e-06, "loss": 34.6993, "step": 13206 }, { "epoch": 314.4537313432836, "grad_norm": 34.097747802734375, "learning_rate": 9.555059523809524e-06, "loss": 32.0478, "step": 13207 }, { "epoch": 314.4776119402985, "grad_norm": 30.866113662719727, "learning_rate": 9.554315476190477e-06, "loss": 34.5621, "step": 13208 }, { "epoch": 314.5014925373134, "grad_norm": 30.24768829345703, "learning_rate": 9.55357142857143e-06, "loss": 32.2858, "step": 13209 }, { "epoch": 314.52537313432833, "grad_norm": 27.956802368164062, "learning_rate": 9.552827380952382e-06, "loss": 32.5598, "step": 13210 }, { "epoch": 314.5492537313433, "grad_norm": 29.042564392089844, "learning_rate": 9.552083333333335e-06, "loss": 33.5197, "step": 13211 }, { "epoch": 314.5731343283582, "grad_norm": 25.140233993530273, "learning_rate": 9.551339285714286e-06, "loss": 34.1419, "step": 13212 }, { "epoch": 314.5970149253731, "grad_norm": 32.64944076538086, "learning_rate": 9.55059523809524e-06, "loss": 33.718, "step": 13213 }, { "epoch": 314.6208955223881, "grad_norm": 25.82027816772461, "learning_rate": 9.549851190476191e-06, "loss": 33.3611, "step": 13214 }, { "epoch": 314.644776119403, "grad_norm": 31.322126388549805, "learning_rate": 9.549107142857144e-06, "loss": 35.1478, "step": 13215 }, { "epoch": 314.6686567164179, "grad_norm": 29.41959571838379, "learning_rate": 9.548363095238096e-06, "loss": 33.1272, "step": 13216 }, { "epoch": 314.6925373134328, "grad_norm": 26.451223373413086, "learning_rate": 9.547619047619049e-06, "loss": 34.8698, "step": 13217 }, { "epoch": 314.7164179104478, "grad_norm": 24.3366756439209, "learning_rate": 9.546875000000002e-06, "loss": 33.8223, "step": 13218 }, { "epoch": 314.7402985074627, "grad_norm": 31.37820053100586, "learning_rate": 9.546130952380953e-06, "loss": 34.2753, "step": 13219 }, { "epoch": 314.7641791044776, "grad_norm": 26.769657135009766, "learning_rate": 9.545386904761905e-06, "loss": 33.1156, "step": 13220 }, { "epoch": 314.78805970149256, "grad_norm": 32.10585021972656, "learning_rate": 9.544642857142858e-06, "loss": 34.1055, "step": 13221 }, { "epoch": 314.81194029850747, "grad_norm": 30.26285171508789, "learning_rate": 9.54389880952381e-06, "loss": 33.7381, "step": 13222 }, { "epoch": 314.8358208955224, "grad_norm": 27.461288452148438, "learning_rate": 9.543154761904763e-06, "loss": 33.7444, "step": 13223 }, { "epoch": 314.85970149253734, "grad_norm": 26.743072509765625, "learning_rate": 9.542410714285714e-06, "loss": 34.2697, "step": 13224 }, { "epoch": 314.88358208955225, "grad_norm": 28.258312225341797, "learning_rate": 9.541666666666669e-06, "loss": 32.4133, "step": 13225 }, { "epoch": 314.90746268656716, "grad_norm": 22.394941329956055, "learning_rate": 9.54092261904762e-06, "loss": 32.903, "step": 13226 }, { "epoch": 314.93134328358207, "grad_norm": 30.91286277770996, "learning_rate": 9.540178571428572e-06, "loss": 33.2735, "step": 13227 }, { "epoch": 314.95522388059703, "grad_norm": 26.42424774169922, "learning_rate": 9.539434523809525e-06, "loss": 33.1956, "step": 13228 }, { "epoch": 314.97910447761194, "grad_norm": 27.133575439453125, "learning_rate": 9.538690476190478e-06, "loss": 33.8882, "step": 13229 }, { "epoch": 315.0, "grad_norm": 22.723310470581055, "learning_rate": 9.537946428571429e-06, "loss": 28.9, "step": 13230 }, { "epoch": 315.0238805970149, "grad_norm": 26.996562957763672, "learning_rate": 9.537202380952381e-06, "loss": 33.7979, "step": 13231 }, { "epoch": 315.0477611940299, "grad_norm": 23.435802459716797, "learning_rate": 9.536458333333334e-06, "loss": 34.8608, "step": 13232 }, { "epoch": 315.0716417910448, "grad_norm": 32.33258819580078, "learning_rate": 9.535714285714287e-06, "loss": 32.982, "step": 13233 }, { "epoch": 315.0955223880597, "grad_norm": 28.66262435913086, "learning_rate": 9.53497023809524e-06, "loss": 33.3808, "step": 13234 }, { "epoch": 315.1194029850746, "grad_norm": 31.232383728027344, "learning_rate": 9.53422619047619e-06, "loss": 33.6076, "step": 13235 }, { "epoch": 315.14328358208957, "grad_norm": 28.15165138244629, "learning_rate": 9.533482142857143e-06, "loss": 33.4063, "step": 13236 }, { "epoch": 315.1671641791045, "grad_norm": 26.927061080932617, "learning_rate": 9.532738095238096e-06, "loss": 34.2987, "step": 13237 }, { "epoch": 315.1910447761194, "grad_norm": 25.32600975036621, "learning_rate": 9.531994047619049e-06, "loss": 33.1051, "step": 13238 }, { "epoch": 315.21492537313435, "grad_norm": 27.133333206176758, "learning_rate": 9.531250000000001e-06, "loss": 34.715, "step": 13239 }, { "epoch": 315.23880597014926, "grad_norm": 23.734281539916992, "learning_rate": 9.530505952380952e-06, "loss": 32.7595, "step": 13240 }, { "epoch": 315.26268656716417, "grad_norm": 28.86510467529297, "learning_rate": 9.529761904761905e-06, "loss": 33.33, "step": 13241 }, { "epoch": 315.28656716417913, "grad_norm": 28.08711051940918, "learning_rate": 9.529017857142858e-06, "loss": 33.5788, "step": 13242 }, { "epoch": 315.31044776119404, "grad_norm": 30.653244018554688, "learning_rate": 9.52827380952381e-06, "loss": 33.0861, "step": 13243 }, { "epoch": 315.33432835820895, "grad_norm": 28.1591796875, "learning_rate": 9.527529761904763e-06, "loss": 34.6708, "step": 13244 }, { "epoch": 315.35820895522386, "grad_norm": 26.023433685302734, "learning_rate": 9.526785714285714e-06, "loss": 32.9737, "step": 13245 }, { "epoch": 315.3820895522388, "grad_norm": 20.71750831604004, "learning_rate": 9.526041666666668e-06, "loss": 32.6744, "step": 13246 }, { "epoch": 315.40597014925373, "grad_norm": 29.809507369995117, "learning_rate": 9.52529761904762e-06, "loss": 33.5887, "step": 13247 }, { "epoch": 315.42985074626864, "grad_norm": 24.837921142578125, "learning_rate": 9.524553571428572e-06, "loss": 33.7889, "step": 13248 }, { "epoch": 315.4537313432836, "grad_norm": 34.88161087036133, "learning_rate": 9.523809523809525e-06, "loss": 34.8549, "step": 13249 }, { "epoch": 315.4776119402985, "grad_norm": 28.89891242980957, "learning_rate": 9.523065476190477e-06, "loss": 32.6047, "step": 13250 }, { "epoch": 315.5014925373134, "grad_norm": 24.327743530273438, "learning_rate": 9.52232142857143e-06, "loss": 32.3216, "step": 13251 }, { "epoch": 315.52537313432833, "grad_norm": 25.089570999145508, "learning_rate": 9.521577380952381e-06, "loss": 33.4233, "step": 13252 }, { "epoch": 315.5492537313433, "grad_norm": 23.953487396240234, "learning_rate": 9.520833333333334e-06, "loss": 33.0815, "step": 13253 }, { "epoch": 315.5731343283582, "grad_norm": 20.837909698486328, "learning_rate": 9.520089285714286e-06, "loss": 34.3127, "step": 13254 }, { "epoch": 315.5970149253731, "grad_norm": 26.91604995727539, "learning_rate": 9.51934523809524e-06, "loss": 34.0851, "step": 13255 }, { "epoch": 315.6208955223881, "grad_norm": 20.253692626953125, "learning_rate": 9.518601190476192e-06, "loss": 33.4698, "step": 13256 }, { "epoch": 315.644776119403, "grad_norm": 31.84703826904297, "learning_rate": 9.517857142857143e-06, "loss": 33.5676, "step": 13257 }, { "epoch": 315.6686567164179, "grad_norm": 27.487943649291992, "learning_rate": 9.517113095238096e-06, "loss": 34.4207, "step": 13258 }, { "epoch": 315.6925373134328, "grad_norm": 33.138919830322266, "learning_rate": 9.516369047619048e-06, "loss": 34.3429, "step": 13259 }, { "epoch": 315.7164179104478, "grad_norm": 28.251028060913086, "learning_rate": 9.515625000000001e-06, "loss": 33.6487, "step": 13260 }, { "epoch": 315.7402985074627, "grad_norm": 27.63349151611328, "learning_rate": 9.514880952380952e-06, "loss": 32.2774, "step": 13261 }, { "epoch": 315.7641791044776, "grad_norm": 24.83226776123047, "learning_rate": 9.514136904761906e-06, "loss": 33.2531, "step": 13262 }, { "epoch": 315.78805970149256, "grad_norm": 26.027666091918945, "learning_rate": 9.513392857142857e-06, "loss": 33.6466, "step": 13263 }, { "epoch": 315.81194029850747, "grad_norm": 20.55242919921875, "learning_rate": 9.51264880952381e-06, "loss": 32.6482, "step": 13264 }, { "epoch": 315.8358208955224, "grad_norm": 25.64906120300293, "learning_rate": 9.511904761904763e-06, "loss": 35.2883, "step": 13265 }, { "epoch": 315.85970149253734, "grad_norm": 19.154644012451172, "learning_rate": 9.511160714285715e-06, "loss": 33.8606, "step": 13266 }, { "epoch": 315.88358208955225, "grad_norm": 26.634958267211914, "learning_rate": 9.510416666666668e-06, "loss": 33.1455, "step": 13267 }, { "epoch": 315.90746268656716, "grad_norm": 21.878103256225586, "learning_rate": 9.509672619047619e-06, "loss": 33.9436, "step": 13268 }, { "epoch": 315.93134328358207, "grad_norm": 27.495351791381836, "learning_rate": 9.508928571428572e-06, "loss": 34.0173, "step": 13269 }, { "epoch": 315.95522388059703, "grad_norm": 23.8179874420166, "learning_rate": 9.508184523809524e-06, "loss": 35.3989, "step": 13270 }, { "epoch": 315.97910447761194, "grad_norm": 24.536479949951172, "learning_rate": 9.507440476190477e-06, "loss": 32.9694, "step": 13271 }, { "epoch": 316.0, "grad_norm": 18.76787757873535, "learning_rate": 9.50669642857143e-06, "loss": 27.8328, "step": 13272 }, { "epoch": 316.0238805970149, "grad_norm": 25.226884841918945, "learning_rate": 9.50595238095238e-06, "loss": 32.6869, "step": 13273 }, { "epoch": 316.0477611940299, "grad_norm": 20.781539916992188, "learning_rate": 9.505208333333335e-06, "loss": 34.4851, "step": 13274 }, { "epoch": 316.0716417910448, "grad_norm": 25.45966148376465, "learning_rate": 9.504464285714286e-06, "loss": 32.5034, "step": 13275 }, { "epoch": 316.0955223880597, "grad_norm": 21.303007125854492, "learning_rate": 9.503720238095239e-06, "loss": 34.2349, "step": 13276 }, { "epoch": 316.1194029850746, "grad_norm": 19.09128761291504, "learning_rate": 9.502976190476191e-06, "loss": 33.4227, "step": 13277 }, { "epoch": 316.14328358208957, "grad_norm": 23.925003051757812, "learning_rate": 9.502232142857144e-06, "loss": 34.4937, "step": 13278 }, { "epoch": 316.1671641791045, "grad_norm": 17.679929733276367, "learning_rate": 9.501488095238097e-06, "loss": 33.3494, "step": 13279 }, { "epoch": 316.1910447761194, "grad_norm": 27.09799575805664, "learning_rate": 9.500744047619048e-06, "loss": 33.8014, "step": 13280 }, { "epoch": 316.21492537313435, "grad_norm": 18.58030128479004, "learning_rate": 9.5e-06, "loss": 33.3887, "step": 13281 }, { "epoch": 316.23880597014926, "grad_norm": 27.787328720092773, "learning_rate": 9.499255952380953e-06, "loss": 33.9049, "step": 13282 }, { "epoch": 316.26268656716417, "grad_norm": 24.678585052490234, "learning_rate": 9.498511904761906e-06, "loss": 33.7619, "step": 13283 }, { "epoch": 316.28656716417913, "grad_norm": 24.157333374023438, "learning_rate": 9.497767857142859e-06, "loss": 33.4499, "step": 13284 }, { "epoch": 316.31044776119404, "grad_norm": 21.003877639770508, "learning_rate": 9.49702380952381e-06, "loss": 33.6756, "step": 13285 }, { "epoch": 316.33432835820895, "grad_norm": 24.18219757080078, "learning_rate": 9.496279761904762e-06, "loss": 33.0057, "step": 13286 }, { "epoch": 316.35820895522386, "grad_norm": 19.8808650970459, "learning_rate": 9.495535714285715e-06, "loss": 34.1521, "step": 13287 }, { "epoch": 316.3820895522388, "grad_norm": 26.05632781982422, "learning_rate": 9.494791666666668e-06, "loss": 34.3963, "step": 13288 }, { "epoch": 316.40597014925373, "grad_norm": 21.351581573486328, "learning_rate": 9.494047619047619e-06, "loss": 34.2365, "step": 13289 }, { "epoch": 316.42985074626864, "grad_norm": 26.41434097290039, "learning_rate": 9.493303571428573e-06, "loss": 34.328, "step": 13290 }, { "epoch": 316.4537313432836, "grad_norm": 19.21407699584961, "learning_rate": 9.492559523809524e-06, "loss": 33.1316, "step": 13291 }, { "epoch": 316.4776119402985, "grad_norm": 25.514041900634766, "learning_rate": 9.491815476190477e-06, "loss": 32.9505, "step": 13292 }, { "epoch": 316.5014925373134, "grad_norm": 22.2117919921875, "learning_rate": 9.49107142857143e-06, "loss": 33.7875, "step": 13293 }, { "epoch": 316.52537313432833, "grad_norm": 22.043800354003906, "learning_rate": 9.490327380952382e-06, "loss": 34.766, "step": 13294 }, { "epoch": 316.5492537313433, "grad_norm": 19.45757293701172, "learning_rate": 9.489583333333335e-06, "loss": 33.4709, "step": 13295 }, { "epoch": 316.5731343283582, "grad_norm": 19.759675979614258, "learning_rate": 9.488839285714286e-06, "loss": 33.7494, "step": 13296 }, { "epoch": 316.5970149253731, "grad_norm": 19.454973220825195, "learning_rate": 9.488095238095238e-06, "loss": 34.3957, "step": 13297 }, { "epoch": 316.6208955223881, "grad_norm": 20.610828399658203, "learning_rate": 9.487351190476191e-06, "loss": 32.684, "step": 13298 }, { "epoch": 316.644776119403, "grad_norm": 17.435138702392578, "learning_rate": 9.486607142857144e-06, "loss": 32.6552, "step": 13299 }, { "epoch": 316.6686567164179, "grad_norm": 20.769853591918945, "learning_rate": 9.485863095238096e-06, "loss": 33.4368, "step": 13300 }, { "epoch": 316.6925373134328, "grad_norm": 16.646352767944336, "learning_rate": 9.485119047619047e-06, "loss": 33.1157, "step": 13301 }, { "epoch": 316.7164179104478, "grad_norm": 18.582733154296875, "learning_rate": 9.484375000000002e-06, "loss": 34.6879, "step": 13302 }, { "epoch": 316.7402985074627, "grad_norm": 17.33199119567871, "learning_rate": 9.483630952380953e-06, "loss": 34.0831, "step": 13303 }, { "epoch": 316.7641791044776, "grad_norm": 18.06012725830078, "learning_rate": 9.482886904761906e-06, "loss": 32.4395, "step": 13304 }, { "epoch": 316.78805970149256, "grad_norm": 16.276689529418945, "learning_rate": 9.482142857142858e-06, "loss": 33.2182, "step": 13305 }, { "epoch": 316.81194029850747, "grad_norm": 17.239099502563477, "learning_rate": 9.481398809523811e-06, "loss": 33.6361, "step": 13306 }, { "epoch": 316.8358208955224, "grad_norm": 14.224556922912598, "learning_rate": 9.480654761904764e-06, "loss": 33.7979, "step": 13307 }, { "epoch": 316.85970149253734, "grad_norm": 17.398347854614258, "learning_rate": 9.479910714285715e-06, "loss": 32.7602, "step": 13308 }, { "epoch": 316.88358208955225, "grad_norm": 16.360572814941406, "learning_rate": 9.479166666666667e-06, "loss": 32.7625, "step": 13309 }, { "epoch": 316.90746268656716, "grad_norm": 19.49298858642578, "learning_rate": 9.47842261904762e-06, "loss": 32.7342, "step": 13310 }, { "epoch": 316.93134328358207, "grad_norm": 16.53718376159668, "learning_rate": 9.477678571428573e-06, "loss": 34.6333, "step": 13311 }, { "epoch": 316.95522388059703, "grad_norm": 17.28750228881836, "learning_rate": 9.476934523809525e-06, "loss": 33.6695, "step": 13312 }, { "epoch": 316.97910447761194, "grad_norm": 17.403059005737305, "learning_rate": 9.476190476190476e-06, "loss": 33.5419, "step": 13313 }, { "epoch": 317.0, "grad_norm": 15.339287757873535, "learning_rate": 9.475446428571429e-06, "loss": 29.8176, "step": 13314 }, { "epoch": 317.0238805970149, "grad_norm": 16.032194137573242, "learning_rate": 9.474702380952382e-06, "loss": 33.0393, "step": 13315 }, { "epoch": 317.0477611940299, "grad_norm": 15.282236099243164, "learning_rate": 9.473958333333334e-06, "loss": 31.657, "step": 13316 }, { "epoch": 317.0716417910448, "grad_norm": 19.055511474609375, "learning_rate": 9.473214285714285e-06, "loss": 33.7761, "step": 13317 }, { "epoch": 317.0955223880597, "grad_norm": 17.56702423095703, "learning_rate": 9.47247023809524e-06, "loss": 32.8771, "step": 13318 }, { "epoch": 317.1194029850746, "grad_norm": 17.464839935302734, "learning_rate": 9.47172619047619e-06, "loss": 33.3261, "step": 13319 }, { "epoch": 317.14328358208957, "grad_norm": 13.382113456726074, "learning_rate": 9.470982142857143e-06, "loss": 31.7969, "step": 13320 }, { "epoch": 317.1671641791045, "grad_norm": 18.935489654541016, "learning_rate": 9.470238095238096e-06, "loss": 33.3907, "step": 13321 }, { "epoch": 317.1910447761194, "grad_norm": 15.993891716003418, "learning_rate": 9.469494047619049e-06, "loss": 33.4379, "step": 13322 }, { "epoch": 317.21492537313435, "grad_norm": 19.52393913269043, "learning_rate": 9.468750000000001e-06, "loss": 34.6754, "step": 13323 }, { "epoch": 317.23880597014926, "grad_norm": 15.924966812133789, "learning_rate": 9.468005952380952e-06, "loss": 33.9633, "step": 13324 }, { "epoch": 317.26268656716417, "grad_norm": 20.609539031982422, "learning_rate": 9.467261904761905e-06, "loss": 33.7411, "step": 13325 }, { "epoch": 317.28656716417913, "grad_norm": 15.784844398498535, "learning_rate": 9.466517857142858e-06, "loss": 34.4262, "step": 13326 }, { "epoch": 317.31044776119404, "grad_norm": 19.99517822265625, "learning_rate": 9.46577380952381e-06, "loss": 32.7825, "step": 13327 }, { "epoch": 317.33432835820895, "grad_norm": 18.543437957763672, "learning_rate": 9.465029761904763e-06, "loss": 34.3556, "step": 13328 }, { "epoch": 317.35820895522386, "grad_norm": 16.29489517211914, "learning_rate": 9.464285714285714e-06, "loss": 32.9021, "step": 13329 }, { "epoch": 317.3820895522388, "grad_norm": 20.580856323242188, "learning_rate": 9.463541666666669e-06, "loss": 33.8491, "step": 13330 }, { "epoch": 317.40597014925373, "grad_norm": 20.105560302734375, "learning_rate": 9.46279761904762e-06, "loss": 33.9351, "step": 13331 }, { "epoch": 317.42985074626864, "grad_norm": 16.851537704467773, "learning_rate": 9.462053571428572e-06, "loss": 33.1687, "step": 13332 }, { "epoch": 317.4537313432836, "grad_norm": 21.92867660522461, "learning_rate": 9.461309523809525e-06, "loss": 32.6336, "step": 13333 }, { "epoch": 317.4776119402985, "grad_norm": 16.571556091308594, "learning_rate": 9.460565476190478e-06, "loss": 34.0379, "step": 13334 }, { "epoch": 317.5014925373134, "grad_norm": NaN, "learning_rate": 9.45982142857143e-06, "loss": 30.4668, "step": 13335 }, { "epoch": 317.52537313432833, "grad_norm": 18.779396057128906, "learning_rate": 9.45982142857143e-06, "loss": 33.8917, "step": 13336 }, { "epoch": 317.5492537313433, "grad_norm": NaN, "learning_rate": 9.459077380952381e-06, "loss": 42.368, "step": 13337 }, { "epoch": 317.5731343283582, "grad_norm": 19.674638748168945, "learning_rate": 9.459077380952381e-06, "loss": 34.1599, "step": 13338 }, { "epoch": 317.5970149253731, "grad_norm": 19.24485206604004, "learning_rate": 9.458333333333334e-06, "loss": 32.0584, "step": 13339 }, { "epoch": 317.6208955223881, "grad_norm": 14.414884567260742, "learning_rate": 9.457589285714287e-06, "loss": 34.9061, "step": 13340 }, { "epoch": 317.644776119403, "grad_norm": 21.131359100341797, "learning_rate": 9.45684523809524e-06, "loss": 33.7017, "step": 13341 }, { "epoch": 317.6686567164179, "grad_norm": 15.800726890563965, "learning_rate": 9.456101190476192e-06, "loss": 33.9411, "step": 13342 }, { "epoch": 317.6925373134328, "grad_norm": 21.80306625366211, "learning_rate": 9.455357142857143e-06, "loss": 33.8175, "step": 13343 }, { "epoch": 317.7164179104478, "grad_norm": 16.657373428344727, "learning_rate": 9.454613095238097e-06, "loss": 33.26, "step": 13344 }, { "epoch": 317.7402985074627, "grad_norm": 17.922826766967773, "learning_rate": 9.453869047619048e-06, "loss": 34.078, "step": 13345 }, { "epoch": 317.7641791044776, "grad_norm": 19.15614891052246, "learning_rate": 9.453125000000001e-06, "loss": 33.5982, "step": 13346 }, { "epoch": 317.78805970149256, "grad_norm": 15.744733810424805, "learning_rate": 9.452380952380952e-06, "loss": 33.6735, "step": 13347 }, { "epoch": 317.81194029850747, "grad_norm": 16.33230209350586, "learning_rate": 9.451636904761905e-06, "loss": 33.8818, "step": 13348 }, { "epoch": 317.8358208955224, "grad_norm": 17.132356643676758, "learning_rate": 9.450892857142857e-06, "loss": 33.674, "step": 13349 }, { "epoch": 317.85970149253734, "grad_norm": 15.774198532104492, "learning_rate": 9.45014880952381e-06, "loss": 32.8711, "step": 13350 }, { "epoch": 317.88358208955225, "grad_norm": 16.86773109436035, "learning_rate": 9.449404761904763e-06, "loss": 32.9124, "step": 13351 }, { "epoch": 317.90746268656716, "grad_norm": 16.00055503845215, "learning_rate": 9.448660714285714e-06, "loss": 33.6463, "step": 13352 }, { "epoch": 317.93134328358207, "grad_norm": 18.052488327026367, "learning_rate": 9.447916666666668e-06, "loss": 33.5122, "step": 13353 }, { "epoch": 317.95522388059703, "grad_norm": 17.009675979614258, "learning_rate": 9.44717261904762e-06, "loss": 34.4861, "step": 13354 }, { "epoch": 317.97910447761194, "grad_norm": 16.055465698242188, "learning_rate": 9.446428571428572e-06, "loss": 34.481, "step": 13355 }, { "epoch": 318.0, "grad_norm": 15.88033390045166, "learning_rate": 9.445684523809525e-06, "loss": 28.3286, "step": 13356 }, { "epoch": 318.0238805970149, "grad_norm": 15.0000638961792, "learning_rate": 9.444940476190477e-06, "loss": 31.8116, "step": 13357 }, { "epoch": 318.0477611940299, "grad_norm": 18.877826690673828, "learning_rate": 9.44419642857143e-06, "loss": 33.1175, "step": 13358 }, { "epoch": 318.0716417910448, "grad_norm": 22.391193389892578, "learning_rate": 9.443452380952381e-06, "loss": 31.6244, "step": 13359 }, { "epoch": 318.0955223880597, "grad_norm": 20.479522705078125, "learning_rate": 9.442708333333334e-06, "loss": 32.5579, "step": 13360 }, { "epoch": 318.1194029850746, "grad_norm": 17.490135192871094, "learning_rate": 9.441964285714286e-06, "loss": 34.694, "step": 13361 }, { "epoch": 318.14328358208957, "grad_norm": 19.028148651123047, "learning_rate": 9.441220238095239e-06, "loss": 34.1734, "step": 13362 }, { "epoch": 318.1671641791045, "grad_norm": 17.874404907226562, "learning_rate": 9.440476190476192e-06, "loss": 34.1923, "step": 13363 }, { "epoch": 318.1910447761194, "grad_norm": 15.767306327819824, "learning_rate": 9.439732142857143e-06, "loss": 32.3749, "step": 13364 }, { "epoch": 318.21492537313435, "grad_norm": 24.332582473754883, "learning_rate": 9.438988095238097e-06, "loss": 33.3351, "step": 13365 }, { "epoch": 318.23880597014926, "grad_norm": 16.11850929260254, "learning_rate": 9.438244047619048e-06, "loss": 34.5669, "step": 13366 }, { "epoch": 318.26268656716417, "grad_norm": 24.260330200195312, "learning_rate": 9.4375e-06, "loss": 33.4101, "step": 13367 }, { "epoch": 318.28656716417913, "grad_norm": 17.738794326782227, "learning_rate": 9.436755952380953e-06, "loss": 34.4332, "step": 13368 }, { "epoch": 318.31044776119404, "grad_norm": 21.241287231445312, "learning_rate": 9.436011904761906e-06, "loss": 33.3193, "step": 13369 }, { "epoch": 318.33432835820895, "grad_norm": 23.853557586669922, "learning_rate": 9.435267857142859e-06, "loss": 33.697, "step": 13370 }, { "epoch": 318.35820895522386, "grad_norm": 17.392017364501953, "learning_rate": 9.43452380952381e-06, "loss": 33.655, "step": 13371 }, { "epoch": 318.3820895522388, "grad_norm": 16.22450065612793, "learning_rate": 9.433779761904762e-06, "loss": 34.6262, "step": 13372 }, { "epoch": 318.40597014925373, "grad_norm": 17.703052520751953, "learning_rate": 9.433035714285715e-06, "loss": 33.3349, "step": 13373 }, { "epoch": 318.42985074626864, "grad_norm": 19.10869598388672, "learning_rate": 9.432291666666668e-06, "loss": 33.0067, "step": 13374 }, { "epoch": 318.4537313432836, "grad_norm": 18.446027755737305, "learning_rate": 9.431547619047619e-06, "loss": 33.9296, "step": 13375 }, { "epoch": 318.4776119402985, "grad_norm": 15.902649879455566, "learning_rate": 9.430803571428571e-06, "loss": 32.2868, "step": 13376 }, { "epoch": 318.5014925373134, "grad_norm": 14.964925765991211, "learning_rate": 9.430059523809524e-06, "loss": 33.1198, "step": 13377 }, { "epoch": 318.52537313432833, "grad_norm": 19.35045623779297, "learning_rate": 9.429315476190477e-06, "loss": 34.2293, "step": 13378 }, { "epoch": 318.5492537313433, "grad_norm": 20.05290412902832, "learning_rate": 9.42857142857143e-06, "loss": 34.2937, "step": 13379 }, { "epoch": 318.5731343283582, "grad_norm": 16.73937225341797, "learning_rate": 9.42782738095238e-06, "loss": 33.7444, "step": 13380 }, { "epoch": 318.5970149253731, "grad_norm": 21.402694702148438, "learning_rate": 9.427083333333335e-06, "loss": 33.3548, "step": 13381 }, { "epoch": 318.6208955223881, "grad_norm": 17.070066452026367, "learning_rate": 9.426339285714286e-06, "loss": 31.5075, "step": 13382 }, { "epoch": 318.644776119403, "grad_norm": 17.344465255737305, "learning_rate": 9.425595238095239e-06, "loss": 32.3674, "step": 13383 }, { "epoch": 318.6686567164179, "grad_norm": 21.59908103942871, "learning_rate": 9.424851190476191e-06, "loss": 34.1737, "step": 13384 }, { "epoch": 318.6925373134328, "grad_norm": 18.76130485534668, "learning_rate": 9.424107142857144e-06, "loss": 34.4727, "step": 13385 }, { "epoch": 318.7164179104478, "grad_norm": 22.039154052734375, "learning_rate": 9.423363095238097e-06, "loss": 33.493, "step": 13386 }, { "epoch": 318.7402985074627, "grad_norm": 29.23167610168457, "learning_rate": 9.422619047619048e-06, "loss": 34.2462, "step": 13387 }, { "epoch": 318.7641791044776, "grad_norm": 18.083833694458008, "learning_rate": 9.421875e-06, "loss": 33.8711, "step": 13388 }, { "epoch": 318.78805970149256, "grad_norm": 39.312557220458984, "learning_rate": 9.421130952380953e-06, "loss": 33.8413, "step": 13389 }, { "epoch": 318.81194029850747, "grad_norm": 31.588090896606445, "learning_rate": 9.420386904761906e-06, "loss": 32.65, "step": 13390 }, { "epoch": 318.8358208955224, "grad_norm": 36.38351821899414, "learning_rate": 9.419642857142858e-06, "loss": 33.493, "step": 13391 }, { "epoch": 318.85970149253734, "grad_norm": 33.490604400634766, "learning_rate": 9.41889880952381e-06, "loss": 33.9192, "step": 13392 }, { "epoch": 318.88358208955225, "grad_norm": 29.810832977294922, "learning_rate": 9.418154761904764e-06, "loss": 35.1813, "step": 13393 }, { "epoch": 318.90746268656716, "grad_norm": 26.900054931640625, "learning_rate": 9.417410714285715e-06, "loss": 33.8841, "step": 13394 }, { "epoch": 318.93134328358207, "grad_norm": 30.67934226989746, "learning_rate": 9.416666666666667e-06, "loss": 33.7924, "step": 13395 }, { "epoch": 318.95522388059703, "grad_norm": 23.40369987487793, "learning_rate": 9.41592261904762e-06, "loss": 33.03, "step": 13396 }, { "epoch": 318.97910447761194, "grad_norm": 32.82838821411133, "learning_rate": 9.415178571428573e-06, "loss": 33.2242, "step": 13397 }, { "epoch": 319.0, "grad_norm": 22.915206909179688, "learning_rate": 9.414434523809525e-06, "loss": 29.838, "step": 13398 }, { "epoch": 319.0238805970149, "grad_norm": 35.52446746826172, "learning_rate": 9.413690476190476e-06, "loss": 33.485, "step": 13399 }, { "epoch": 319.0477611940299, "grad_norm": 29.487043380737305, "learning_rate": 9.41294642857143e-06, "loss": 34.3338, "step": 13400 }, { "epoch": 319.0716417910448, "grad_norm": 36.47984313964844, "learning_rate": 9.412202380952382e-06, "loss": 34.2982, "step": 13401 }, { "epoch": 319.0955223880597, "grad_norm": 33.790008544921875, "learning_rate": 9.411458333333335e-06, "loss": 34.5241, "step": 13402 }, { "epoch": 319.1194029850746, "grad_norm": 27.084001541137695, "learning_rate": 9.410714285714286e-06, "loss": 34.5703, "step": 13403 }, { "epoch": 319.14328358208957, "grad_norm": 26.034971237182617, "learning_rate": 9.409970238095238e-06, "loss": 33.2482, "step": 13404 }, { "epoch": 319.1671641791045, "grad_norm": 28.040359497070312, "learning_rate": 9.409226190476191e-06, "loss": 33.4084, "step": 13405 }, { "epoch": 319.1910447761194, "grad_norm": 23.912641525268555, "learning_rate": 9.408482142857144e-06, "loss": 33.0828, "step": 13406 }, { "epoch": 319.21492537313435, "grad_norm": 31.83680534362793, "learning_rate": 9.407738095238096e-06, "loss": 33.9288, "step": 13407 }, { "epoch": 319.23880597014926, "grad_norm": 28.342632293701172, "learning_rate": 9.406994047619047e-06, "loss": 33.3371, "step": 13408 }, { "epoch": 319.26268656716417, "grad_norm": 34.55799102783203, "learning_rate": 9.406250000000002e-06, "loss": 33.6974, "step": 13409 }, { "epoch": 319.28656716417913, "grad_norm": 30.760406494140625, "learning_rate": 9.405505952380953e-06, "loss": 32.5653, "step": 13410 }, { "epoch": 319.31044776119404, "grad_norm": 28.75543785095215, "learning_rate": 9.404761904761905e-06, "loss": 33.0336, "step": 13411 }, { "epoch": 319.33432835820895, "grad_norm": 26.500202178955078, "learning_rate": 9.404017857142858e-06, "loss": 32.265, "step": 13412 }, { "epoch": 319.35820895522386, "grad_norm": 29.624961853027344, "learning_rate": 9.40327380952381e-06, "loss": 34.9516, "step": 13413 }, { "epoch": 319.3820895522388, "grad_norm": 29.508689880371094, "learning_rate": 9.402529761904763e-06, "loss": 33.3541, "step": 13414 }, { "epoch": 319.40597014925373, "grad_norm": 27.894058227539062, "learning_rate": 9.401785714285714e-06, "loss": 33.3962, "step": 13415 }, { "epoch": 319.42985074626864, "grad_norm": 25.896587371826172, "learning_rate": 9.401041666666667e-06, "loss": 32.8412, "step": 13416 }, { "epoch": 319.4537313432836, "grad_norm": 31.310272216796875, "learning_rate": 9.40029761904762e-06, "loss": 33.6401, "step": 13417 }, { "epoch": 319.4776119402985, "grad_norm": 25.35869598388672, "learning_rate": 9.399553571428572e-06, "loss": 33.2427, "step": 13418 }, { "epoch": 319.5014925373134, "grad_norm": 34.811676025390625, "learning_rate": 9.398809523809525e-06, "loss": 32.974, "step": 13419 }, { "epoch": 319.52537313432833, "grad_norm": 33.234920501708984, "learning_rate": 9.398065476190476e-06, "loss": 32.5361, "step": 13420 }, { "epoch": 319.5492537313433, "grad_norm": 30.192657470703125, "learning_rate": 9.39732142857143e-06, "loss": 32.8948, "step": 13421 }, { "epoch": 319.5731343283582, "grad_norm": 28.203994750976562, "learning_rate": 9.396577380952381e-06, "loss": 34.2486, "step": 13422 }, { "epoch": 319.5970149253731, "grad_norm": 27.182098388671875, "learning_rate": 9.395833333333334e-06, "loss": 33.7982, "step": 13423 }, { "epoch": 319.6208955223881, "grad_norm": 22.803674697875977, "learning_rate": 9.395089285714287e-06, "loss": 32.5016, "step": 13424 }, { "epoch": 319.644776119403, "grad_norm": 35.993038177490234, "learning_rate": 9.39434523809524e-06, "loss": 32.6825, "step": 13425 }, { "epoch": 319.6686567164179, "grad_norm": 31.017728805541992, "learning_rate": 9.393601190476192e-06, "loss": 32.9535, "step": 13426 }, { "epoch": 319.6925373134328, "grad_norm": 27.446929931640625, "learning_rate": 9.392857142857143e-06, "loss": 31.9826, "step": 13427 }, { "epoch": 319.7164179104478, "grad_norm": 28.538211822509766, "learning_rate": 9.392113095238096e-06, "loss": 33.4122, "step": 13428 }, { "epoch": 319.7402985074627, "grad_norm": 30.983957290649414, "learning_rate": 9.391369047619049e-06, "loss": 33.2667, "step": 13429 }, { "epoch": 319.7641791044776, "grad_norm": 26.290386199951172, "learning_rate": 9.390625000000001e-06, "loss": 34.2779, "step": 13430 }, { "epoch": 319.78805970149256, "grad_norm": 32.49992370605469, "learning_rate": 9.389880952380954e-06, "loss": 34.1709, "step": 13431 }, { "epoch": 319.81194029850747, "grad_norm": 29.13446044921875, "learning_rate": 9.389136904761905e-06, "loss": 33.7185, "step": 13432 }, { "epoch": 319.8358208955224, "grad_norm": 28.562192916870117, "learning_rate": 9.388392857142858e-06, "loss": 33.248, "step": 13433 }, { "epoch": 319.85970149253734, "grad_norm": 28.523258209228516, "learning_rate": 9.38764880952381e-06, "loss": 34.4778, "step": 13434 }, { "epoch": 319.88358208955225, "grad_norm": 29.057281494140625, "learning_rate": 9.386904761904763e-06, "loss": 34.3028, "step": 13435 }, { "epoch": 319.90746268656716, "grad_norm": 25.81745147705078, "learning_rate": 9.386160714285714e-06, "loss": 32.741, "step": 13436 }, { "epoch": 319.93134328358207, "grad_norm": 30.437976837158203, "learning_rate": 9.385416666666668e-06, "loss": 33.759, "step": 13437 }, { "epoch": 319.95522388059703, "grad_norm": 28.955860137939453, "learning_rate": 9.38467261904762e-06, "loss": 33.6976, "step": 13438 }, { "epoch": 319.97910447761194, "grad_norm": 29.05436897277832, "learning_rate": 9.383928571428572e-06, "loss": 33.157, "step": 13439 }, { "epoch": 320.0, "grad_norm": 22.26370620727539, "learning_rate": 9.383184523809525e-06, "loss": 29.4054, "step": 13440 }, { "epoch": 320.0, "step": 13440, "total_flos": 6.60699028639923e+17, "train_loss": 2.113268469345002, "train_runtime": 25654.5719, "train_samples_per_second": 66.758, "train_steps_per_second": 0.524 }, { "epoch": 320.0238805970149, "grad_norm": 28.20069122314453, "learning_rate": 1e-05, "loss": 34.3924, "step": 13441 }, { "epoch": 320.0477611940299, "grad_norm": Infinity, "learning_rate": 9.999299719887955e-06, "loss": 41.6766, "step": 13442 }, { "epoch": 320.0716417910448, "grad_norm": Infinity, "learning_rate": 9.999299719887955e-06, "loss": 40.1499, "step": 13443 }, { "epoch": 320.0955223880597, "grad_norm": 457.32623291015625, "learning_rate": 9.999299719887955e-06, "loss": 41.1706, "step": 13444 }, { "epoch": 320.1194029850746, "grad_norm": 280.8936462402344, "learning_rate": 9.998599439775911e-06, "loss": 38.602, "step": 13445 }, { "epoch": 320.14328358208957, "grad_norm": 97.15399932861328, "learning_rate": 9.997899159663866e-06, "loss": 35.4426, "step": 13446 }, { "epoch": 320.1671641791045, "grad_norm": 90.87565612792969, "learning_rate": 9.997198879551822e-06, "loss": 35.2902, "step": 13447 }, { "epoch": 320.1910447761194, "grad_norm": 83.8344497680664, "learning_rate": 9.996498599439777e-06, "loss": 34.5836, "step": 13448 }, { "epoch": 320.21492537313435, "grad_norm": 49.05875778198242, "learning_rate": 9.995798319327733e-06, "loss": 34.2659, "step": 13449 }, { "epoch": 320.23880597014926, "grad_norm": 40.812782287597656, "learning_rate": 9.995098039215687e-06, "loss": 34.6545, "step": 13450 }, { "epoch": 320.26268656716417, "grad_norm": 36.72706604003906, "learning_rate": 9.994397759103642e-06, "loss": 33.7638, "step": 13451 }, { "epoch": 320.28656716417913, "grad_norm": 26.809864044189453, "learning_rate": 9.993697478991598e-06, "loss": 34.318, "step": 13452 }, { "epoch": 320.31044776119404, "grad_norm": 24.38344383239746, "learning_rate": 9.992997198879552e-06, "loss": 34.3693, "step": 13453 }, { "epoch": 320.33432835820895, "grad_norm": 23.269834518432617, "learning_rate": 9.992296918767508e-06, "loss": 33.8004, "step": 13454 }, { "epoch": 320.35820895522386, "grad_norm": 25.902122497558594, "learning_rate": 9.991596638655463e-06, "loss": 33.1348, "step": 13455 }, { "epoch": 320.3820895522388, "grad_norm": 24.044321060180664, "learning_rate": 9.990896358543417e-06, "loss": 33.7546, "step": 13456 }, { "epoch": 320.40597014925373, "grad_norm": 21.03065299987793, "learning_rate": 9.990196078431374e-06, "loss": 36.0693, "step": 13457 }, { "epoch": 320.42985074626864, "grad_norm": 18.3806209564209, "learning_rate": 9.989495798319328e-06, "loss": 33.4097, "step": 13458 }, { "epoch": 320.4537313432836, "grad_norm": 21.226511001586914, "learning_rate": 9.988795518207284e-06, "loss": 34.5919, "step": 13459 }, { "epoch": 320.4776119402985, "grad_norm": 19.58074188232422, "learning_rate": 9.988095238095239e-06, "loss": 34.0975, "step": 13460 }, { "epoch": 320.5014925373134, "grad_norm": 20.49414825439453, "learning_rate": 9.987394957983195e-06, "loss": 32.4821, "step": 13461 }, { "epoch": 320.52537313432833, "grad_norm": 15.913312911987305, "learning_rate": 9.98669467787115e-06, "loss": 33.5827, "step": 13462 }, { "epoch": 320.5492537313433, "grad_norm": 19.110130310058594, "learning_rate": 9.985994397759104e-06, "loss": 32.8113, "step": 13463 }, { "epoch": 320.5731343283582, "grad_norm": 17.153520584106445, "learning_rate": 9.98529411764706e-06, "loss": 33.1583, "step": 13464 }, { "epoch": 320.5970149253731, "grad_norm": 21.01837158203125, "learning_rate": 9.984593837535014e-06, "loss": 34.1792, "step": 13465 }, { "epoch": 320.6208955223881, "grad_norm": 19.879121780395508, "learning_rate": 9.98389355742297e-06, "loss": 32.8184, "step": 13466 }, { "epoch": 320.644776119403, "grad_norm": 18.8907470703125, "learning_rate": 9.983193277310925e-06, "loss": 34.2799, "step": 13467 }, { "epoch": 320.6686567164179, "grad_norm": 15.665071487426758, "learning_rate": 9.982492997198881e-06, "loss": 32.427, "step": 13468 }, { "epoch": 320.6925373134328, "grad_norm": 21.888671875, "learning_rate": 9.981792717086836e-06, "loss": 33.1517, "step": 13469 }, { "epoch": 320.7164179104478, "grad_norm": 24.189502716064453, "learning_rate": 9.98109243697479e-06, "loss": 33.1814, "step": 13470 }, { "epoch": 320.7402985074627, "grad_norm": 16.405902862548828, "learning_rate": 9.980392156862746e-06, "loss": 34.7406, "step": 13471 }, { "epoch": 320.7641791044776, "grad_norm": 20.98548698425293, "learning_rate": 9.9796918767507e-06, "loss": 33.1669, "step": 13472 }, { "epoch": 320.78805970149256, "grad_norm": 21.629098892211914, "learning_rate": 9.978991596638657e-06, "loss": 32.2965, "step": 13473 }, { "epoch": 320.81194029850747, "grad_norm": 15.458964347839355, "learning_rate": 9.978291316526611e-06, "loss": 33.3416, "step": 13474 }, { "epoch": 320.8358208955224, "grad_norm": 17.867895126342773, "learning_rate": 9.977591036414566e-06, "loss": 33.2184, "step": 13475 }, { "epoch": 320.85970149253734, "grad_norm": 20.45394515991211, "learning_rate": 9.976890756302522e-06, "loss": 34.4215, "step": 13476 }, { "epoch": 320.88358208955225, "grad_norm": 17.875850677490234, "learning_rate": 9.976190476190477e-06, "loss": 33.7006, "step": 13477 }, { "epoch": 320.90746268656716, "grad_norm": 15.243967056274414, "learning_rate": 9.975490196078433e-06, "loss": 34.8511, "step": 13478 }, { "epoch": 320.93134328358207, "grad_norm": 15.511274337768555, "learning_rate": 9.974789915966387e-06, "loss": 32.6283, "step": 13479 }, { "epoch": 320.95522388059703, "grad_norm": 16.13764762878418, "learning_rate": 9.974089635854343e-06, "loss": 33.5455, "step": 13480 }, { "epoch": 320.97910447761194, "grad_norm": 17.346710205078125, "learning_rate": 9.973389355742298e-06, "loss": 33.0761, "step": 13481 }, { "epoch": 321.0, "grad_norm": 17.808698654174805, "learning_rate": 9.972689075630252e-06, "loss": 28.4847, "step": 13482 }, { "epoch": 321.0238805970149, "grad_norm": 18.072603225708008, "learning_rate": 9.971988795518209e-06, "loss": 32.941, "step": 13483 }, { "epoch": 321.0477611940299, "grad_norm": 17.086442947387695, "learning_rate": 9.971288515406163e-06, "loss": 33.6863, "step": 13484 }, { "epoch": 321.0716417910448, "grad_norm": 16.671628952026367, "learning_rate": 9.970588235294119e-06, "loss": 33.9508, "step": 13485 }, { "epoch": 321.0955223880597, "grad_norm": 18.872169494628906, "learning_rate": 9.969887955182074e-06, "loss": 33.033, "step": 13486 }, { "epoch": 321.1194029850746, "grad_norm": 15.115374565124512, "learning_rate": 9.969187675070028e-06, "loss": 34.2851, "step": 13487 }, { "epoch": 321.14328358208957, "grad_norm": 20.84256362915039, "learning_rate": 9.968487394957984e-06, "loss": 34.6891, "step": 13488 }, { "epoch": 321.1671641791045, "grad_norm": 17.528242111206055, "learning_rate": 9.967787114845939e-06, "loss": 33.3708, "step": 13489 }, { "epoch": 321.1910447761194, "grad_norm": 20.999752044677734, "learning_rate": 9.967086834733895e-06, "loss": 32.796, "step": 13490 }, { "epoch": 321.21492537313435, "grad_norm": 19.596973419189453, "learning_rate": 9.96638655462185e-06, "loss": 34.272, "step": 13491 }, { "epoch": 321.23880597014926, "grad_norm": 14.58626651763916, "learning_rate": 9.965686274509806e-06, "loss": 33.3363, "step": 13492 }, { "epoch": 321.26268656716417, "grad_norm": 17.104713439941406, "learning_rate": 9.96498599439776e-06, "loss": 33.3183, "step": 13493 }, { "epoch": 321.28656716417913, "grad_norm": 20.347379684448242, "learning_rate": 9.964285714285714e-06, "loss": 32.8743, "step": 13494 }, { "epoch": 321.31044776119404, "grad_norm": 18.913585662841797, "learning_rate": 9.96358543417367e-06, "loss": 33.2669, "step": 13495 }, { "epoch": 321.33432835820895, "grad_norm": 14.634831428527832, "learning_rate": 9.962885154061625e-06, "loss": 31.4955, "step": 13496 }, { "epoch": 321.35820895522386, "grad_norm": 27.42943572998047, "learning_rate": 9.962184873949581e-06, "loss": 33.7415, "step": 13497 }, { "epoch": 321.3820895522388, "grad_norm": 17.25787925720215, "learning_rate": 9.961484593837536e-06, "loss": 33.0441, "step": 13498 }, { "epoch": 321.40597014925373, "grad_norm": 24.47115135192871, "learning_rate": 9.960784313725492e-06, "loss": 33.32, "step": 13499 }, { "epoch": 321.42985074626864, "grad_norm": 23.04132652282715, "learning_rate": 9.960084033613446e-06, "loss": 33.6568, "step": 13500 }, { "epoch": 321.4537313432836, "grad_norm": 17.9874324798584, "learning_rate": 9.959383753501401e-06, "loss": 33.5015, "step": 13501 }, { "epoch": 321.4776119402985, "grad_norm": 23.682321548461914, "learning_rate": 9.958683473389357e-06, "loss": 33.7339, "step": 13502 }, { "epoch": 321.5014925373134, "grad_norm": 18.65334129333496, "learning_rate": 9.957983193277312e-06, "loss": 33.5823, "step": 13503 }, { "epoch": 321.52537313432833, "grad_norm": 24.34517478942871, "learning_rate": 9.957282913165268e-06, "loss": 32.9567, "step": 13504 }, { "epoch": 321.5492537313433, "grad_norm": 23.741682052612305, "learning_rate": 9.956582633053222e-06, "loss": 33.0551, "step": 13505 }, { "epoch": 321.5731343283582, "grad_norm": 18.40947723388672, "learning_rate": 9.955882352941177e-06, "loss": 34.1149, "step": 13506 }, { "epoch": 321.5970149253731, "grad_norm": 22.952302932739258, "learning_rate": 9.955182072829133e-06, "loss": 32.7585, "step": 13507 }, { "epoch": 321.6208955223881, "grad_norm": NaN, "learning_rate": 9.954481792717087e-06, "loss": 54.3415, "step": 13508 }, { "epoch": 321.644776119403, "grad_norm": 18.612133026123047, "learning_rate": 9.954481792717087e-06, "loss": 32.2534, "step": 13509 }, { "epoch": 321.6686567164179, "grad_norm": 16.69934844970703, "learning_rate": 9.953781512605043e-06, "loss": 34.2393, "step": 13510 }, { "epoch": 321.6925373134328, "grad_norm": 24.02232551574707, "learning_rate": 9.953081232492998e-06, "loss": 33.7258, "step": 13511 }, { "epoch": 321.7164179104478, "grad_norm": 21.12546730041504, "learning_rate": 9.952380952380954e-06, "loss": 33.6632, "step": 13512 }, { "epoch": 321.7402985074627, "grad_norm": 13.482281684875488, "learning_rate": 9.951680672268909e-06, "loss": 33.2125, "step": 13513 }, { "epoch": 321.7641791044776, "grad_norm": 20.659025192260742, "learning_rate": 9.950980392156863e-06, "loss": 33.9939, "step": 13514 }, { "epoch": 321.78805970149256, "grad_norm": 22.202695846557617, "learning_rate": 9.95028011204482e-06, "loss": 34.4735, "step": 13515 }, { "epoch": 321.81194029850747, "grad_norm": 14.220898628234863, "learning_rate": 9.949579831932774e-06, "loss": 33.6932, "step": 13516 }, { "epoch": 321.8358208955224, "grad_norm": 17.50078773498535, "learning_rate": 9.94887955182073e-06, "loss": 33.6944, "step": 13517 }, { "epoch": 321.85970149253734, "grad_norm": 21.947223663330078, "learning_rate": 9.948179271708684e-06, "loss": 33.8986, "step": 13518 }, { "epoch": 321.88358208955225, "grad_norm": 18.7849063873291, "learning_rate": 9.94747899159664e-06, "loss": 33.6639, "step": 13519 }, { "epoch": 321.90746268656716, "grad_norm": 15.1717529296875, "learning_rate": 9.946778711484595e-06, "loss": 33.1748, "step": 13520 }, { "epoch": 321.93134328358207, "grad_norm": 15.6008939743042, "learning_rate": 9.94607843137255e-06, "loss": 32.5078, "step": 13521 }, { "epoch": 321.95522388059703, "grad_norm": 16.304113388061523, "learning_rate": 9.945378151260506e-06, "loss": 32.8308, "step": 13522 }, { "epoch": 321.97910447761194, "grad_norm": NaN, "learning_rate": 9.94467787114846e-06, "loss": 33.8362, "step": 13523 }, { "epoch": 322.0, "grad_norm": 15.540569305419922, "learning_rate": 9.94467787114846e-06, "loss": 29.535, "step": 13524 }, { "epoch": 322.0238805970149, "grad_norm": 14.937338829040527, "learning_rate": 9.943977591036416e-06, "loss": 33.0239, "step": 13525 }, { "epoch": 322.0477611940299, "grad_norm": 22.070587158203125, "learning_rate": 9.94327731092437e-06, "loss": 34.0065, "step": 13526 }, { "epoch": 322.0716417910448, "grad_norm": 18.071996688842773, "learning_rate": 9.942577030812325e-06, "loss": 33.5174, "step": 13527 }, { "epoch": 322.0955223880597, "grad_norm": 12.858171463012695, "learning_rate": 9.941876750700281e-06, "loss": 33.5769, "step": 13528 }, { "epoch": 322.1194029850746, "grad_norm": 16.61067771911621, "learning_rate": 9.941176470588236e-06, "loss": 33.72, "step": 13529 }, { "epoch": 322.14328358208957, "grad_norm": 20.197229385375977, "learning_rate": 9.940476190476192e-06, "loss": 34.2886, "step": 13530 }, { "epoch": 322.1671641791045, "grad_norm": 16.887353897094727, "learning_rate": 9.939775910364146e-06, "loss": 34.51, "step": 13531 }, { "epoch": 322.1910447761194, "grad_norm": 16.60744857788086, "learning_rate": 9.939075630252103e-06, "loss": 33.5272, "step": 13532 }, { "epoch": 322.21492537313435, "grad_norm": 15.348031997680664, "learning_rate": 9.938375350140057e-06, "loss": 32.1391, "step": 13533 }, { "epoch": 322.23880597014926, "grad_norm": 19.63516616821289, "learning_rate": 9.937675070028012e-06, "loss": 32.9583, "step": 13534 }, { "epoch": 322.26268656716417, "grad_norm": 15.16062068939209, "learning_rate": 9.936974789915968e-06, "loss": 33.1537, "step": 13535 }, { "epoch": 322.28656716417913, "grad_norm": 17.731372833251953, "learning_rate": 9.936274509803922e-06, "loss": 33.2881, "step": 13536 }, { "epoch": 322.31044776119404, "grad_norm": 17.786243438720703, "learning_rate": 9.935574229691878e-06, "loss": 33.0445, "step": 13537 }, { "epoch": 322.33432835820895, "grad_norm": 17.036073684692383, "learning_rate": 9.934873949579833e-06, "loss": 33.6986, "step": 13538 }, { "epoch": 322.35820895522386, "grad_norm": 14.746337890625, "learning_rate": 9.934173669467789e-06, "loss": 33.6975, "step": 13539 }, { "epoch": 322.3820895522388, "grad_norm": 22.118701934814453, "learning_rate": 9.933473389355743e-06, "loss": 32.4272, "step": 13540 }, { "epoch": 322.40597014925373, "grad_norm": 14.167494773864746, "learning_rate": 9.932773109243698e-06, "loss": 32.7008, "step": 13541 }, { "epoch": 322.42985074626864, "grad_norm": 24.237863540649414, "learning_rate": 9.932072829131654e-06, "loss": 32.5664, "step": 13542 }, { "epoch": 322.4537313432836, "grad_norm": 20.92695426940918, "learning_rate": 9.931372549019609e-06, "loss": 34.5714, "step": 13543 }, { "epoch": 322.4776119402985, "grad_norm": 30.745403289794922, "learning_rate": 9.930672268907565e-06, "loss": 34.4972, "step": 13544 }, { "epoch": 322.5014925373134, "grad_norm": 19.916486740112305, "learning_rate": 9.92997198879552e-06, "loss": 34.0381, "step": 13545 }, { "epoch": 322.52537313432833, "grad_norm": 36.06111145019531, "learning_rate": 9.929271708683474e-06, "loss": 33.7978, "step": 13546 }, { "epoch": 322.5492537313433, "grad_norm": 29.741985321044922, "learning_rate": 9.92857142857143e-06, "loss": 34.1062, "step": 13547 }, { "epoch": 322.5731343283582, "grad_norm": 29.04802131652832, "learning_rate": 9.927871148459384e-06, "loss": 32.8579, "step": 13548 }, { "epoch": 322.5970149253731, "grad_norm": 24.02836036682129, "learning_rate": 9.92717086834734e-06, "loss": 33.5527, "step": 13549 }, { "epoch": 322.6208955223881, "grad_norm": 28.645822525024414, "learning_rate": 9.926470588235295e-06, "loss": 32.353, "step": 13550 }, { "epoch": 322.644776119403, "grad_norm": 24.66539192199707, "learning_rate": 9.925770308123251e-06, "loss": 34.2256, "step": 13551 }, { "epoch": 322.6686567164179, "grad_norm": 29.890790939331055, "learning_rate": 9.925070028011206e-06, "loss": 34.1128, "step": 13552 }, { "epoch": 322.6925373134328, "grad_norm": 25.194040298461914, "learning_rate": 9.92436974789916e-06, "loss": 33.6929, "step": 13553 }, { "epoch": 322.7164179104478, "grad_norm": 24.205354690551758, "learning_rate": 9.923669467787116e-06, "loss": 33.0597, "step": 13554 }, { "epoch": 322.7402985074627, "grad_norm": 27.894758224487305, "learning_rate": 9.92296918767507e-06, "loss": 32.5365, "step": 13555 }, { "epoch": 322.7641791044776, "grad_norm": 18.561086654663086, "learning_rate": 9.922268907563027e-06, "loss": 34.658, "step": 13556 }, { "epoch": 322.78805970149256, "grad_norm": 32.595176696777344, "learning_rate": 9.921568627450981e-06, "loss": 33.1968, "step": 13557 }, { "epoch": 322.81194029850747, "grad_norm": 24.86639404296875, "learning_rate": 9.920868347338937e-06, "loss": 33.1497, "step": 13558 }, { "epoch": 322.8358208955224, "grad_norm": 34.50840377807617, "learning_rate": 9.920168067226892e-06, "loss": 33.9656, "step": 13559 }, { "epoch": 322.85970149253734, "grad_norm": 28.974563598632812, "learning_rate": 9.919467787114846e-06, "loss": 34.0148, "step": 13560 }, { "epoch": 322.88358208955225, "grad_norm": 33.857723236083984, "learning_rate": 9.918767507002803e-06, "loss": 32.444, "step": 13561 }, { "epoch": 322.90746268656716, "grad_norm": 24.41487693786621, "learning_rate": 9.918067226890757e-06, "loss": 32.2329, "step": 13562 }, { "epoch": 322.93134328358207, "grad_norm": 34.2807731628418, "learning_rate": 9.917366946778713e-06, "loss": 32.5119, "step": 13563 }, { "epoch": 322.95522388059703, "grad_norm": 29.484289169311523, "learning_rate": 9.916666666666668e-06, "loss": 33.5622, "step": 13564 }, { "epoch": 322.97910447761194, "grad_norm": 34.17534637451172, "learning_rate": 9.915966386554622e-06, "loss": 33.6399, "step": 13565 }, { "epoch": 323.0, "grad_norm": 25.70732879638672, "learning_rate": 9.915266106442578e-06, "loss": 28.7622, "step": 13566 }, { "epoch": 323.0238805970149, "grad_norm": 37.89349365234375, "learning_rate": 9.914565826330533e-06, "loss": 32.6216, "step": 13567 }, { "epoch": 323.0477611940299, "grad_norm": 31.649921417236328, "learning_rate": 9.913865546218489e-06, "loss": 33.972, "step": 13568 }, { "epoch": 323.0716417910448, "grad_norm": 29.75125503540039, "learning_rate": 9.913165266106443e-06, "loss": 34.0283, "step": 13569 }, { "epoch": 323.0955223880597, "grad_norm": 28.45866584777832, "learning_rate": 9.9124649859944e-06, "loss": 34.4295, "step": 13570 }, { "epoch": 323.1194029850746, "grad_norm": 32.22605895996094, "learning_rate": 9.911764705882354e-06, "loss": 33.6811, "step": 13571 }, { "epoch": 323.14328358208957, "grad_norm": 23.550739288330078, "learning_rate": 9.911064425770309e-06, "loss": 33.5812, "step": 13572 }, { "epoch": 323.1671641791045, "grad_norm": 36.9298210144043, "learning_rate": 9.910364145658265e-06, "loss": 32.711, "step": 13573 }, { "epoch": 323.1910447761194, "grad_norm": 30.044294357299805, "learning_rate": 9.90966386554622e-06, "loss": 33.1097, "step": 13574 }, { "epoch": 323.21492537313435, "grad_norm": 30.1539249420166, "learning_rate": 9.908963585434175e-06, "loss": 33.0006, "step": 13575 }, { "epoch": 323.23880597014926, "grad_norm": 29.418554306030273, "learning_rate": 9.90826330532213e-06, "loss": 34.5263, "step": 13576 }, { "epoch": 323.26268656716417, "grad_norm": 29.885913848876953, "learning_rate": 9.907563025210084e-06, "loss": 32.6311, "step": 13577 }, { "epoch": 323.28656716417913, "grad_norm": 27.49765968322754, "learning_rate": 9.90686274509804e-06, "loss": 32.1066, "step": 13578 }, { "epoch": 323.31044776119404, "grad_norm": 34.50300979614258, "learning_rate": 9.906162464985995e-06, "loss": 34.8589, "step": 13579 }, { "epoch": 323.33432835820895, "grad_norm": 30.008867263793945, "learning_rate": 9.905462184873951e-06, "loss": 34.358, "step": 13580 }, { "epoch": 323.35820895522386, "grad_norm": 25.91068458557129, "learning_rate": 9.904761904761906e-06, "loss": 32.0865, "step": 13581 }, { "epoch": 323.3820895522388, "grad_norm": 27.69209098815918, "learning_rate": 9.904061624649862e-06, "loss": 33.5425, "step": 13582 }, { "epoch": 323.40597014925373, "grad_norm": 31.449119567871094, "learning_rate": 9.903361344537816e-06, "loss": 32.9621, "step": 13583 }, { "epoch": 323.42985074626864, "grad_norm": 27.970502853393555, "learning_rate": 9.90266106442577e-06, "loss": 33.5539, "step": 13584 }, { "epoch": 323.4537313432836, "grad_norm": 34.021053314208984, "learning_rate": 9.901960784313727e-06, "loss": 34.3935, "step": 13585 }, { "epoch": 323.4776119402985, "grad_norm": 31.655317306518555, "learning_rate": 9.901260504201681e-06, "loss": 33.3564, "step": 13586 }, { "epoch": 323.5014925373134, "grad_norm": 31.00661849975586, "learning_rate": 9.900560224089638e-06, "loss": 34.4517, "step": 13587 }, { "epoch": 323.52537313432833, "grad_norm": 27.06855010986328, "learning_rate": 9.899859943977592e-06, "loss": 34.3873, "step": 13588 }, { "epoch": 323.5492537313433, "grad_norm": 28.78382682800293, "learning_rate": 9.899159663865548e-06, "loss": 34.0071, "step": 13589 }, { "epoch": 323.5731343283582, "grad_norm": 23.918489456176758, "learning_rate": 9.898459383753503e-06, "loss": 32.5166, "step": 13590 }, { "epoch": 323.5970149253731, "grad_norm": 31.071475982666016, "learning_rate": 9.897759103641457e-06, "loss": 32.4659, "step": 13591 }, { "epoch": 323.6208955223881, "grad_norm": NaN, "learning_rate": 9.897058823529413e-06, "loss": 33.3437, "step": 13592 }, { "epoch": 323.644776119403, "grad_norm": 27.127063751220703, "learning_rate": 9.897058823529413e-06, "loss": 33.7336, "step": 13593 }, { "epoch": 323.6686567164179, "grad_norm": 32.65732955932617, "learning_rate": 9.896358543417368e-06, "loss": 34.4095, "step": 13594 }, { "epoch": 323.6925373134328, "grad_norm": 65.4323501586914, "learning_rate": 9.895658263305324e-06, "loss": 33.8844, "step": 13595 }, { "epoch": 323.7164179104478, "grad_norm": 17.279874801635742, "learning_rate": 9.894957983193278e-06, "loss": 32.8755, "step": 13596 }, { "epoch": 323.7402985074627, "grad_norm": 25.479598999023438, "learning_rate": 9.894257703081233e-06, "loss": 31.697, "step": 13597 }, { "epoch": 323.7641791044776, "grad_norm": 21.34996795654297, "learning_rate": 9.893557422969189e-06, "loss": 33.1568, "step": 13598 }, { "epoch": 323.78805970149256, "grad_norm": 21.393423080444336, "learning_rate": 9.892857142857143e-06, "loss": 33.6244, "step": 13599 }, { "epoch": 323.81194029850747, "grad_norm": 19.496997833251953, "learning_rate": 9.8921568627451e-06, "loss": 33.3905, "step": 13600 }, { "epoch": 323.8358208955224, "grad_norm": 21.00841522216797, "learning_rate": 9.891456582633054e-06, "loss": 34.5918, "step": 13601 }, { "epoch": 323.85970149253734, "grad_norm": 16.694263458251953, "learning_rate": 9.89075630252101e-06, "loss": 33.3754, "step": 13602 }, { "epoch": 323.88358208955225, "grad_norm": 23.65617561340332, "learning_rate": 9.890056022408965e-06, "loss": 34.0507, "step": 13603 }, { "epoch": 323.90746268656716, "grad_norm": 17.38776969909668, "learning_rate": 9.88935574229692e-06, "loss": 32.1662, "step": 13604 }, { "epoch": 323.93134328358207, "grad_norm": 20.21595001220703, "learning_rate": 9.888655462184875e-06, "loss": 32.3216, "step": 13605 }, { "epoch": 323.95522388059703, "grad_norm": 16.476106643676758, "learning_rate": 9.88795518207283e-06, "loss": 31.9163, "step": 13606 }, { "epoch": 323.97910447761194, "grad_norm": 22.86574935913086, "learning_rate": 9.887254901960786e-06, "loss": 33.1187, "step": 13607 }, { "epoch": 324.0, "grad_norm": 16.346389770507812, "learning_rate": 9.88655462184874e-06, "loss": 28.4992, "step": 13608 }, { "epoch": 324.0238805970149, "grad_norm": 19.007625579833984, "learning_rate": 9.885854341736697e-06, "loss": 32.1862, "step": 13609 }, { "epoch": 324.0477611940299, "grad_norm": 19.5588321685791, "learning_rate": 9.885154061624651e-06, "loss": 33.376, "step": 13610 }, { "epoch": 324.0716417910448, "grad_norm": 18.324626922607422, "learning_rate": 9.884453781512606e-06, "loss": 34.0184, "step": 13611 }, { "epoch": 324.0955223880597, "grad_norm": 19.581687927246094, "learning_rate": 9.883753501400562e-06, "loss": 33.5424, "step": 13612 }, { "epoch": 324.1194029850746, "grad_norm": 17.476701736450195, "learning_rate": 9.883053221288516e-06, "loss": 34.3911, "step": 13613 }, { "epoch": 324.14328358208957, "grad_norm": 17.81161880493164, "learning_rate": 9.882352941176472e-06, "loss": 34.0169, "step": 13614 }, { "epoch": 324.1671641791045, "grad_norm": 17.989206314086914, "learning_rate": 9.881652661064427e-06, "loss": 33.5374, "step": 13615 }, { "epoch": 324.1910447761194, "grad_norm": 19.036617279052734, "learning_rate": 9.880952380952381e-06, "loss": 34.3941, "step": 13616 }, { "epoch": 324.21492537313435, "grad_norm": 16.212793350219727, "learning_rate": 9.880252100840338e-06, "loss": 33.5002, "step": 13617 }, { "epoch": 324.23880597014926, "grad_norm": 20.590904235839844, "learning_rate": 9.879551820728292e-06, "loss": 32.86, "step": 13618 }, { "epoch": 324.26268656716417, "grad_norm": 17.457944869995117, "learning_rate": 9.878851540616248e-06, "loss": 32.5841, "step": 13619 }, { "epoch": 324.28656716417913, "grad_norm": 18.962217330932617, "learning_rate": 9.878151260504203e-06, "loss": 33.2863, "step": 13620 }, { "epoch": 324.31044776119404, "grad_norm": 18.682661056518555, "learning_rate": 9.877450980392159e-06, "loss": 32.7218, "step": 13621 }, { "epoch": 324.33432835820895, "grad_norm": 17.554895401000977, "learning_rate": 9.876750700280113e-06, "loss": 33.2864, "step": 13622 }, { "epoch": 324.35820895522386, "grad_norm": 18.783573150634766, "learning_rate": 9.876050420168068e-06, "loss": 32.5933, "step": 13623 }, { "epoch": 324.3820895522388, "grad_norm": 23.039701461791992, "learning_rate": 9.875350140056024e-06, "loss": 33.9732, "step": 13624 }, { "epoch": 324.40597014925373, "grad_norm": 17.68995475769043, "learning_rate": 9.874649859943978e-06, "loss": 34.6603, "step": 13625 }, { "epoch": 324.42985074626864, "grad_norm": 14.835780143737793, "learning_rate": 9.873949579831935e-06, "loss": 32.5128, "step": 13626 }, { "epoch": 324.4537313432836, "grad_norm": 16.030521392822266, "learning_rate": 9.873249299719889e-06, "loss": 33.105, "step": 13627 }, { "epoch": 324.4776119402985, "grad_norm": 17.529977798461914, "learning_rate": 9.872549019607845e-06, "loss": 33.6703, "step": 13628 }, { "epoch": 324.5014925373134, "grad_norm": 16.216899871826172, "learning_rate": 9.8718487394958e-06, "loss": 33.3219, "step": 13629 }, { "epoch": 324.52537313432833, "grad_norm": 18.147930145263672, "learning_rate": 9.871148459383754e-06, "loss": 33.5631, "step": 13630 }, { "epoch": 324.5492537313433, "grad_norm": 15.612064361572266, "learning_rate": 9.87044817927171e-06, "loss": 33.8981, "step": 13631 }, { "epoch": 324.5731343283582, "grad_norm": 18.85101890563965, "learning_rate": 9.869747899159665e-06, "loss": 33.0225, "step": 13632 }, { "epoch": 324.5970149253731, "grad_norm": 18.226970672607422, "learning_rate": 9.869047619047621e-06, "loss": 34.2184, "step": 13633 }, { "epoch": 324.6208955223881, "grad_norm": 15.451777458190918, "learning_rate": 9.868347338935575e-06, "loss": 33.7866, "step": 13634 }, { "epoch": 324.644776119403, "grad_norm": 16.593460083007812, "learning_rate": 9.86764705882353e-06, "loss": 33.2074, "step": 13635 }, { "epoch": 324.6686567164179, "grad_norm": 21.80547523498535, "learning_rate": 9.866946778711486e-06, "loss": 33.3635, "step": 13636 }, { "epoch": 324.6925373134328, "grad_norm": 16.522993087768555, "learning_rate": 9.86624649859944e-06, "loss": 32.4248, "step": 13637 }, { "epoch": 324.7164179104478, "grad_norm": 19.181533813476562, "learning_rate": 9.865546218487397e-06, "loss": 32.4989, "step": 13638 }, { "epoch": 324.7402985074627, "grad_norm": 15.311877250671387, "learning_rate": 9.864845938375351e-06, "loss": 33.9752, "step": 13639 }, { "epoch": 324.7641791044776, "grad_norm": 16.906938552856445, "learning_rate": 9.864145658263307e-06, "loss": 32.8355, "step": 13640 }, { "epoch": 324.78805970149256, "grad_norm": 19.86159324645996, "learning_rate": 9.863445378151262e-06, "loss": 33.4738, "step": 13641 }, { "epoch": 324.81194029850747, "grad_norm": 16.217754364013672, "learning_rate": 9.862745098039216e-06, "loss": 33.3106, "step": 13642 }, { "epoch": 324.8358208955224, "grad_norm": 18.017282485961914, "learning_rate": 9.862044817927172e-06, "loss": 33.6728, "step": 13643 }, { "epoch": 324.85970149253734, "grad_norm": 19.675174713134766, "learning_rate": 9.861344537815127e-06, "loss": 33.3716, "step": 13644 }, { "epoch": 324.88358208955225, "grad_norm": 16.68932342529297, "learning_rate": 9.860644257703083e-06, "loss": 32.9598, "step": 13645 }, { "epoch": 324.90746268656716, "grad_norm": 19.709884643554688, "learning_rate": 9.859943977591038e-06, "loss": 33.3069, "step": 13646 }, { "epoch": 324.93134328358207, "grad_norm": 16.139236450195312, "learning_rate": 9.859243697478994e-06, "loss": 32.6742, "step": 13647 }, { "epoch": 324.95522388059703, "grad_norm": 19.281776428222656, "learning_rate": 9.858543417366948e-06, "loss": 33.8049, "step": 13648 }, { "epoch": 324.97910447761194, "grad_norm": 20.00028419494629, "learning_rate": 9.857843137254903e-06, "loss": 33.6957, "step": 13649 }, { "epoch": 325.0, "grad_norm": 15.398335456848145, "learning_rate": 9.857142857142859e-06, "loss": 28.2842, "step": 13650 }, { "epoch": 325.0238805970149, "grad_norm": 16.986392974853516, "learning_rate": 9.856442577030813e-06, "loss": 32.5149, "step": 13651 }, { "epoch": 325.0477611940299, "grad_norm": 18.45128631591797, "learning_rate": 9.85574229691877e-06, "loss": 33.4055, "step": 13652 }, { "epoch": 325.0716417910448, "grad_norm": 18.596572875976562, "learning_rate": 9.855042016806724e-06, "loss": 33.3133, "step": 13653 }, { "epoch": 325.0955223880597, "grad_norm": 15.175846099853516, "learning_rate": 9.854341736694678e-06, "loss": 34.0683, "step": 13654 }, { "epoch": 325.1194029850746, "grad_norm": NaN, "learning_rate": 9.853641456582635e-06, "loss": 53.2419, "step": 13655 }, { "epoch": 325.14328358208957, "grad_norm": 18.705032348632812, "learning_rate": 9.853641456582635e-06, "loss": 34.0698, "step": 13656 }, { "epoch": 325.1671641791045, "grad_norm": 15.651199340820312, "learning_rate": 9.852941176470589e-06, "loss": 32.5187, "step": 13657 }, { "epoch": 325.1910447761194, "grad_norm": 22.214773178100586, "learning_rate": 9.852240896358545e-06, "loss": 32.1164, "step": 13658 }, { "epoch": 325.21492537313435, "grad_norm": 17.206459045410156, "learning_rate": 9.8515406162465e-06, "loss": 33.3022, "step": 13659 }, { "epoch": 325.23880597014926, "grad_norm": 22.133628845214844, "learning_rate": 9.850840336134456e-06, "loss": 34.2905, "step": 13660 }, { "epoch": 325.26268656716417, "grad_norm": 21.9619140625, "learning_rate": 9.85014005602241e-06, "loss": 32.8755, "step": 13661 }, { "epoch": 325.28656716417913, "grad_norm": 17.542688369750977, "learning_rate": 9.849439775910365e-06, "loss": 33.2138, "step": 13662 }, { "epoch": 325.31044776119404, "grad_norm": 17.905460357666016, "learning_rate": 9.848739495798321e-06, "loss": 33.2268, "step": 13663 }, { "epoch": 325.33432835820895, "grad_norm": 22.583412170410156, "learning_rate": 9.848039215686275e-06, "loss": 33.402, "step": 13664 }, { "epoch": 325.35820895522386, "grad_norm": 16.1328067779541, "learning_rate": 9.847338935574232e-06, "loss": 33.3237, "step": 13665 }, { "epoch": 325.3820895522388, "grad_norm": 18.228809356689453, "learning_rate": 9.846638655462186e-06, "loss": 33.5428, "step": 13666 }, { "epoch": 325.40597014925373, "grad_norm": 31.189430236816406, "learning_rate": 9.84593837535014e-06, "loss": 32.7037, "step": 13667 }, { "epoch": 325.42985074626864, "grad_norm": 17.707508087158203, "learning_rate": 9.845238095238097e-06, "loss": 32.8267, "step": 13668 }, { "epoch": 325.4537313432836, "grad_norm": 41.8672981262207, "learning_rate": 9.844537815126051e-06, "loss": 34.5395, "step": 13669 }, { "epoch": 325.4776119402985, "grad_norm": 36.355098724365234, "learning_rate": 9.843837535014007e-06, "loss": 32.3588, "step": 13670 }, { "epoch": 325.5014925373134, "grad_norm": 33.80672073364258, "learning_rate": 9.843137254901962e-06, "loss": 34.3203, "step": 13671 }, { "epoch": 325.52537313432833, "grad_norm": 33.45317459106445, "learning_rate": 9.842436974789916e-06, "loss": 34.4399, "step": 13672 }, { "epoch": 325.5492537313433, "grad_norm": 30.496381759643555, "learning_rate": 9.84173669467787e-06, "loss": 32.9872, "step": 13673 }, { "epoch": 325.5731343283582, "grad_norm": 26.961637496948242, "learning_rate": 9.841036414565827e-06, "loss": 32.7022, "step": 13674 }, { "epoch": 325.5970149253731, "grad_norm": 38.1742057800293, "learning_rate": 9.840336134453781e-06, "loss": 33.1485, "step": 13675 }, { "epoch": 325.6208955223881, "grad_norm": 28.846521377563477, "learning_rate": 9.839635854341738e-06, "loss": 33.2062, "step": 13676 }, { "epoch": 325.644776119403, "grad_norm": 31.336265563964844, "learning_rate": 9.838935574229692e-06, "loss": 33.4373, "step": 13677 }, { "epoch": 325.6686567164179, "grad_norm": 29.466768264770508, "learning_rate": 9.838235294117647e-06, "loss": 33.34, "step": 13678 }, { "epoch": 325.6925373134328, "grad_norm": 29.28076171875, "learning_rate": 9.837535014005603e-06, "loss": 32.5852, "step": 13679 }, { "epoch": 325.7164179104478, "grad_norm": 25.039804458618164, "learning_rate": 9.836834733893557e-06, "loss": 32.6094, "step": 13680 }, { "epoch": 325.7402985074627, "grad_norm": 34.17839050292969, "learning_rate": 9.836134453781513e-06, "loss": 32.9673, "step": 13681 }, { "epoch": 325.7641791044776, "grad_norm": 32.4091682434082, "learning_rate": 9.835434173669468e-06, "loss": 34.6231, "step": 13682 }, { "epoch": 325.78805970149256, "grad_norm": 33.738441467285156, "learning_rate": 9.834733893557424e-06, "loss": 33.7207, "step": 13683 }, { "epoch": 325.81194029850747, "grad_norm": 32.711727142333984, "learning_rate": 9.834033613445378e-06, "loss": 33.501, "step": 13684 }, { "epoch": 325.8358208955224, "grad_norm": 28.681665420532227, "learning_rate": 9.833333333333333e-06, "loss": 31.8158, "step": 13685 }, { "epoch": 325.85970149253734, "grad_norm": 26.327085494995117, "learning_rate": 9.832633053221289e-06, "loss": 33.5188, "step": 13686 }, { "epoch": 325.88358208955225, "grad_norm": 32.601863861083984, "learning_rate": 9.831932773109244e-06, "loss": 33.1276, "step": 13687 }, { "epoch": 325.90746268656716, "grad_norm": 27.080595016479492, "learning_rate": 9.8312324929972e-06, "loss": 33.992, "step": 13688 }, { "epoch": 325.93134328358207, "grad_norm": 29.46283531188965, "learning_rate": 9.830532212885154e-06, "loss": 33.9901, "step": 13689 }, { "epoch": 325.95522388059703, "grad_norm": 30.93303680419922, "learning_rate": 9.82983193277311e-06, "loss": 33.834, "step": 13690 }, { "epoch": 325.97910447761194, "grad_norm": 30.247838973999023, "learning_rate": 9.829131652661065e-06, "loss": 33.9371, "step": 13691 }, { "epoch": 326.0, "grad_norm": 23.760398864746094, "learning_rate": 9.82843137254902e-06, "loss": 28.7255, "step": 13692 }, { "epoch": 326.0238805970149, "grad_norm": 33.12372589111328, "learning_rate": 9.827731092436975e-06, "loss": 33.5896, "step": 13693 }, { "epoch": 326.0477611940299, "grad_norm": 26.778270721435547, "learning_rate": 9.82703081232493e-06, "loss": 32.5469, "step": 13694 }, { "epoch": 326.0716417910448, "grad_norm": 33.43372344970703, "learning_rate": 9.826330532212886e-06, "loss": 33.4109, "step": 13695 }, { "epoch": 326.0955223880597, "grad_norm": 31.532108306884766, "learning_rate": 9.82563025210084e-06, "loss": 32.6422, "step": 13696 }, { "epoch": 326.1194029850746, "grad_norm": 27.20742416381836, "learning_rate": 9.824929971988795e-06, "loss": 32.4127, "step": 13697 }, { "epoch": 326.14328358208957, "grad_norm": 26.293964385986328, "learning_rate": 9.824229691876751e-06, "loss": 34.7321, "step": 13698 }, { "epoch": 326.1671641791045, "grad_norm": 29.879390716552734, "learning_rate": 9.823529411764706e-06, "loss": 33.0367, "step": 13699 }, { "epoch": 326.1910447761194, "grad_norm": 23.536033630371094, "learning_rate": 9.822829131652662e-06, "loss": 32.8617, "step": 13700 }, { "epoch": 326.21492537313435, "grad_norm": 33.06008529663086, "learning_rate": 9.822128851540616e-06, "loss": 33.4983, "step": 13701 }, { "epoch": 326.23880597014926, "grad_norm": 30.79986000061035, "learning_rate": 9.821428571428573e-06, "loss": 32.9738, "step": 13702 }, { "epoch": 326.26268656716417, "grad_norm": 27.89885139465332, "learning_rate": 9.820728291316527e-06, "loss": 33.0293, "step": 13703 }, { "epoch": 326.28656716417913, "grad_norm": 24.00870132446289, "learning_rate": 9.820028011204481e-06, "loss": 32.3513, "step": 13704 }, { "epoch": 326.31044776119404, "grad_norm": 30.02005386352539, "learning_rate": 9.819327731092438e-06, "loss": 33.1095, "step": 13705 }, { "epoch": 326.33432835820895, "grad_norm": 21.62329864501953, "learning_rate": 9.818627450980392e-06, "loss": 32.3877, "step": 13706 }, { "epoch": 326.35820895522386, "grad_norm": 37.7509651184082, "learning_rate": 9.817927170868348e-06, "loss": 33.7486, "step": 13707 }, { "epoch": 326.3820895522388, "grad_norm": 33.87962341308594, "learning_rate": 9.817226890756303e-06, "loss": 32.0312, "step": 13708 }, { "epoch": 326.40597014925373, "grad_norm": 29.85454559326172, "learning_rate": 9.816526610644259e-06, "loss": 34.6575, "step": 13709 }, { "epoch": 326.42985074626864, "grad_norm": 27.53980255126953, "learning_rate": 9.815826330532213e-06, "loss": 34.0853, "step": 13710 }, { "epoch": 326.4537313432836, "grad_norm": 28.751605987548828, "learning_rate": 9.815126050420168e-06, "loss": 35.1167, "step": 13711 }, { "epoch": 326.4776119402985, "grad_norm": 23.75326919555664, "learning_rate": 9.814425770308124e-06, "loss": 33.5894, "step": 13712 }, { "epoch": 326.5014925373134, "grad_norm": 34.8567008972168, "learning_rate": 9.813725490196078e-06, "loss": 32.5829, "step": 13713 }, { "epoch": 326.52537313432833, "grad_norm": 28.78684425354004, "learning_rate": 9.813025210084035e-06, "loss": 33.82, "step": 13714 }, { "epoch": 326.5492537313433, "grad_norm": 30.194087982177734, "learning_rate": 9.812324929971989e-06, "loss": 34.3857, "step": 13715 }, { "epoch": 326.5731343283582, "grad_norm": 29.297618865966797, "learning_rate": 9.811624649859944e-06, "loss": 33.5312, "step": 13716 }, { "epoch": 326.5970149253731, "grad_norm": 30.788827896118164, "learning_rate": 9.8109243697479e-06, "loss": 32.5731, "step": 13717 }, { "epoch": 326.6208955223881, "grad_norm": 25.011629104614258, "learning_rate": 9.810224089635854e-06, "loss": 31.8591, "step": 13718 }, { "epoch": 326.644776119403, "grad_norm": 33.271392822265625, "learning_rate": 9.80952380952381e-06, "loss": 33.0879, "step": 13719 }, { "epoch": 326.6686567164179, "grad_norm": 30.057992935180664, "learning_rate": 9.808823529411765e-06, "loss": 34.2234, "step": 13720 }, { "epoch": 326.6925373134328, "grad_norm": 31.202980041503906, "learning_rate": 9.808123249299721e-06, "loss": 32.5695, "step": 13721 }, { "epoch": 326.7164179104478, "grad_norm": 27.695194244384766, "learning_rate": 9.807422969187676e-06, "loss": 32.8393, "step": 13722 }, { "epoch": 326.7402985074627, "grad_norm": 26.714426040649414, "learning_rate": 9.80672268907563e-06, "loss": 33.0164, "step": 13723 }, { "epoch": 326.7641791044776, "grad_norm": 26.310474395751953, "learning_rate": 9.806022408963586e-06, "loss": 33.0049, "step": 13724 }, { "epoch": 326.78805970149256, "grad_norm": 27.75501823425293, "learning_rate": 9.80532212885154e-06, "loss": 33.4657, "step": 13725 }, { "epoch": 326.81194029850747, "grad_norm": 29.981639862060547, "learning_rate": 9.804621848739497e-06, "loss": 33.5849, "step": 13726 }, { "epoch": 326.8358208955224, "grad_norm": 30.551393508911133, "learning_rate": 9.803921568627451e-06, "loss": 34.0015, "step": 13727 }, { "epoch": 326.85970149253734, "grad_norm": 29.319795608520508, "learning_rate": 9.803221288515406e-06, "loss": 33.4938, "step": 13728 }, { "epoch": 326.88358208955225, "grad_norm": 27.391109466552734, "learning_rate": 9.802521008403362e-06, "loss": 32.328, "step": 13729 }, { "epoch": 326.90746268656716, "grad_norm": 26.53662109375, "learning_rate": 9.801820728291316e-06, "loss": 34.162, "step": 13730 }, { "epoch": 326.93134328358207, "grad_norm": 29.478504180908203, "learning_rate": 9.801120448179273e-06, "loss": 33.469, "step": 13731 }, { "epoch": 326.95522388059703, "grad_norm": 25.39511489868164, "learning_rate": 9.800420168067227e-06, "loss": 34.1298, "step": 13732 }, { "epoch": 326.97910447761194, "grad_norm": 29.726009368896484, "learning_rate": 9.799719887955183e-06, "loss": 31.8594, "step": 13733 }, { "epoch": 327.0, "grad_norm": 24.417423248291016, "learning_rate": 9.799019607843138e-06, "loss": 29.1477, "step": 13734 }, { "epoch": 327.0238805970149, "grad_norm": 29.06763458251953, "learning_rate": 9.798319327731092e-06, "loss": 33.1099, "step": 13735 }, { "epoch": 327.0477611940299, "grad_norm": 24.732807159423828, "learning_rate": 9.797619047619048e-06, "loss": 32.7671, "step": 13736 }, { "epoch": 327.0716417910448, "grad_norm": 33.24147033691406, "learning_rate": 9.796918767507003e-06, "loss": 33.5277, "step": 13737 }, { "epoch": 327.0955223880597, "grad_norm": 29.07042694091797, "learning_rate": 9.796218487394959e-06, "loss": 32.2064, "step": 13738 }, { "epoch": 327.1194029850746, "grad_norm": 26.222522735595703, "learning_rate": 9.795518207282913e-06, "loss": 32.2448, "step": 13739 }, { "epoch": 327.14328358208957, "grad_norm": 21.09087371826172, "learning_rate": 9.79481792717087e-06, "loss": 32.451, "step": 13740 }, { "epoch": 327.1671641791045, "grad_norm": 31.495807647705078, "learning_rate": 9.794117647058824e-06, "loss": 33.4598, "step": 13741 }, { "epoch": 327.1910447761194, "grad_norm": 24.461978912353516, "learning_rate": 9.793417366946778e-06, "loss": 33.8964, "step": 13742 }, { "epoch": 327.21492537313435, "grad_norm": 32.850738525390625, "learning_rate": 9.792717086834735e-06, "loss": 33.3842, "step": 13743 }, { "epoch": 327.23880597014926, "grad_norm": 30.899707794189453, "learning_rate": 9.792016806722689e-06, "loss": 34.2587, "step": 13744 }, { "epoch": 327.26268656716417, "grad_norm": 26.87644386291504, "learning_rate": 9.791316526610645e-06, "loss": 33.047, "step": 13745 }, { "epoch": 327.28656716417913, "grad_norm": 29.510034561157227, "learning_rate": 9.7906162464986e-06, "loss": 33.3014, "step": 13746 }, { "epoch": 327.31044776119404, "grad_norm": 27.24442481994629, "learning_rate": 9.789915966386554e-06, "loss": 33.0478, "step": 13747 }, { "epoch": 327.33432835820895, "grad_norm": 24.131513595581055, "learning_rate": 9.78921568627451e-06, "loss": 32.6325, "step": 13748 }, { "epoch": 327.35820895522386, "grad_norm": 30.028263092041016, "learning_rate": 9.788515406162465e-06, "loss": 33.5486, "step": 13749 }, { "epoch": 327.3820895522388, "grad_norm": 23.574739456176758, "learning_rate": 9.787815126050421e-06, "loss": 32.6552, "step": 13750 }, { "epoch": 327.40597014925373, "grad_norm": 32.62236022949219, "learning_rate": 9.787114845938376e-06, "loss": 34.3096, "step": 13751 }, { "epoch": 327.42985074626864, "grad_norm": 27.095157623291016, "learning_rate": 9.786414565826332e-06, "loss": 32.5015, "step": 13752 }, { "epoch": 327.4537313432836, "grad_norm": 30.924848556518555, "learning_rate": 9.785714285714286e-06, "loss": 34.0637, "step": 13753 }, { "epoch": 327.4776119402985, "grad_norm": 28.348159790039062, "learning_rate": 9.78501400560224e-06, "loss": 33.0623, "step": 13754 }, { "epoch": 327.5014925373134, "grad_norm": 27.94837188720703, "learning_rate": 9.784313725490197e-06, "loss": 31.8369, "step": 13755 }, { "epoch": 327.52537313432833, "grad_norm": 26.77708625793457, "learning_rate": 9.783613445378151e-06, "loss": 34.8802, "step": 13756 }, { "epoch": 327.5492537313433, "grad_norm": 29.679018020629883, "learning_rate": 9.782913165266107e-06, "loss": 32.9828, "step": 13757 }, { "epoch": 327.5731343283582, "grad_norm": 25.49074363708496, "learning_rate": 9.782212885154062e-06, "loss": 32.5008, "step": 13758 }, { "epoch": 327.5970149253731, "grad_norm": 28.3179988861084, "learning_rate": 9.781512605042018e-06, "loss": 34.165, "step": 13759 }, { "epoch": 327.6208955223881, "grad_norm": 25.559968948364258, "learning_rate": 9.780812324929973e-06, "loss": 32.8886, "step": 13760 }, { "epoch": 327.644776119403, "grad_norm": NaN, "learning_rate": 9.780112044817927e-06, "loss": 39.2756, "step": 13761 }, { "epoch": 327.6686567164179, "grad_norm": 27.406524658203125, "learning_rate": 9.780112044817927e-06, "loss": 32.9339, "step": 13762 }, { "epoch": 327.6925373134328, "grad_norm": 21.1428165435791, "learning_rate": 9.779411764705883e-06, "loss": 33.5437, "step": 13763 }, { "epoch": 327.7164179104478, "grad_norm": 26.37807273864746, "learning_rate": 9.778711484593838e-06, "loss": 33.3502, "step": 13764 }, { "epoch": 327.7402985074627, "grad_norm": 20.410871505737305, "learning_rate": 9.778011204481794e-06, "loss": 32.9569, "step": 13765 }, { "epoch": 327.7641791044776, "grad_norm": 33.719017028808594, "learning_rate": 9.777310924369748e-06, "loss": 33.8595, "step": 13766 }, { "epoch": 327.78805970149256, "grad_norm": 25.57400894165039, "learning_rate": 9.776610644257703e-06, "loss": 33.9709, "step": 13767 }, { "epoch": 327.81194029850747, "grad_norm": 30.30806541442871, "learning_rate": 9.775910364145659e-06, "loss": 34.6261, "step": 13768 }, { "epoch": 327.8358208955224, "grad_norm": 26.39168930053711, "learning_rate": 9.775210084033613e-06, "loss": 31.7733, "step": 13769 }, { "epoch": 327.85970149253734, "grad_norm": 27.448068618774414, "learning_rate": 9.77450980392157e-06, "loss": 34.3574, "step": 13770 }, { "epoch": 327.88358208955225, "grad_norm": 24.3183650970459, "learning_rate": 9.773809523809524e-06, "loss": 33.3622, "step": 13771 }, { "epoch": 327.90746268656716, "grad_norm": 24.722354888916016, "learning_rate": 9.77310924369748e-06, "loss": 33.3757, "step": 13772 }, { "epoch": 327.93134328358207, "grad_norm": 21.480276107788086, "learning_rate": 9.772408963585435e-06, "loss": 33.0721, "step": 13773 }, { "epoch": 327.95522388059703, "grad_norm": 24.6737003326416, "learning_rate": 9.77170868347339e-06, "loss": 33.1352, "step": 13774 }, { "epoch": 327.97910447761194, "grad_norm": 19.086183547973633, "learning_rate": 9.771008403361345e-06, "loss": 33.8907, "step": 13775 }, { "epoch": 328.0, "grad_norm": 21.551876068115234, "learning_rate": 9.7703081232493e-06, "loss": 29.3541, "step": 13776 }, { "epoch": 328.0238805970149, "grad_norm": 20.208740234375, "learning_rate": 9.769607843137256e-06, "loss": 34.1627, "step": 13777 }, { "epoch": 328.0477611940299, "grad_norm": 22.711566925048828, "learning_rate": 9.76890756302521e-06, "loss": 33.5005, "step": 13778 }, { "epoch": 328.0716417910448, "grad_norm": 16.841520309448242, "learning_rate": 9.768207282913167e-06, "loss": 33.1527, "step": 13779 }, { "epoch": 328.0955223880597, "grad_norm": 24.088666915893555, "learning_rate": 9.767507002801121e-06, "loss": 32.2791, "step": 13780 }, { "epoch": 328.1194029850746, "grad_norm": 19.65779685974121, "learning_rate": 9.766806722689076e-06, "loss": 32.6904, "step": 13781 }, { "epoch": 328.14328358208957, "grad_norm": 22.842262268066406, "learning_rate": 9.766106442577032e-06, "loss": 33.4977, "step": 13782 }, { "epoch": 328.1671641791045, "grad_norm": 20.877944946289062, "learning_rate": 9.765406162464986e-06, "loss": 33.1758, "step": 13783 }, { "epoch": 328.1910447761194, "grad_norm": 21.674705505371094, "learning_rate": 9.764705882352942e-06, "loss": 33.8479, "step": 13784 }, { "epoch": 328.21492537313435, "grad_norm": 21.891651153564453, "learning_rate": 9.764005602240897e-06, "loss": 33.2256, "step": 13785 }, { "epoch": 328.23880597014926, "grad_norm": 18.32332992553711, "learning_rate": 9.763305322128851e-06, "loss": 33.4584, "step": 13786 }, { "epoch": 328.26268656716417, "grad_norm": 24.69319725036621, "learning_rate": 9.762605042016807e-06, "loss": 33.13, "step": 13787 }, { "epoch": 328.28656716417913, "grad_norm": 17.938190460205078, "learning_rate": 9.761904761904762e-06, "loss": 32.9584, "step": 13788 }, { "epoch": 328.31044776119404, "grad_norm": 19.446102142333984, "learning_rate": 9.761204481792718e-06, "loss": 33.2534, "step": 13789 }, { "epoch": 328.33432835820895, "grad_norm": 21.508390426635742, "learning_rate": 9.760504201680673e-06, "loss": 33.1961, "step": 13790 }, { "epoch": 328.35820895522386, "grad_norm": 16.572961807250977, "learning_rate": 9.759803921568629e-06, "loss": 34.3795, "step": 13791 }, { "epoch": 328.3820895522388, "grad_norm": 18.777446746826172, "learning_rate": 9.759103641456583e-06, "loss": 33.2926, "step": 13792 }, { "epoch": 328.40597014925373, "grad_norm": 20.581424713134766, "learning_rate": 9.758403361344538e-06, "loss": 33.4043, "step": 13793 }, { "epoch": 328.42985074626864, "grad_norm": 17.72150421142578, "learning_rate": 9.757703081232494e-06, "loss": 32.8018, "step": 13794 }, { "epoch": 328.4537313432836, "grad_norm": 16.94236946105957, "learning_rate": 9.757002801120448e-06, "loss": 32.8077, "step": 13795 }, { "epoch": 328.4776119402985, "grad_norm": 14.381454467773438, "learning_rate": 9.756302521008404e-06, "loss": 33.0152, "step": 13796 }, { "epoch": 328.5014925373134, "grad_norm": 17.40692138671875, "learning_rate": 9.755602240896359e-06, "loss": 33.7176, "step": 13797 }, { "epoch": 328.52537313432833, "grad_norm": 16.10710334777832, "learning_rate": 9.754901960784315e-06, "loss": 32.749, "step": 13798 }, { "epoch": 328.5492537313433, "grad_norm": 18.579635620117188, "learning_rate": 9.75420168067227e-06, "loss": 34.2422, "step": 13799 }, { "epoch": 328.5731343283582, "grad_norm": 20.709930419921875, "learning_rate": 9.753501400560224e-06, "loss": 33.2791, "step": 13800 }, { "epoch": 328.5970149253731, "grad_norm": 15.478633880615234, "learning_rate": 9.75280112044818e-06, "loss": 32.2788, "step": 13801 }, { "epoch": 328.6208955223881, "grad_norm": 25.690916061401367, "learning_rate": 9.752100840336135e-06, "loss": 33.4642, "step": 13802 }, { "epoch": 328.644776119403, "grad_norm": 17.838212966918945, "learning_rate": 9.751400560224091e-06, "loss": 33.502, "step": 13803 }, { "epoch": 328.6686567164179, "grad_norm": 16.468379974365234, "learning_rate": 9.750700280112045e-06, "loss": 32.5036, "step": 13804 }, { "epoch": 328.6925373134328, "grad_norm": 21.25994873046875, "learning_rate": 9.75e-06, "loss": 31.7945, "step": 13805 }, { "epoch": 328.7164179104478, "grad_norm": 15.796037673950195, "learning_rate": 9.749299719887956e-06, "loss": 33.8214, "step": 13806 }, { "epoch": 328.7402985074627, "grad_norm": 25.20906639099121, "learning_rate": 9.74859943977591e-06, "loss": 33.6889, "step": 13807 }, { "epoch": 328.7641791044776, "grad_norm": 20.97492218017578, "learning_rate": 9.747899159663867e-06, "loss": 33.1668, "step": 13808 }, { "epoch": 328.78805970149256, "grad_norm": 24.81602668762207, "learning_rate": 9.747198879551821e-06, "loss": 31.7059, "step": 13809 }, { "epoch": 328.81194029850747, "grad_norm": 22.154918670654297, "learning_rate": 9.746498599439777e-06, "loss": 33.4172, "step": 13810 }, { "epoch": 328.8358208955224, "grad_norm": 21.796457290649414, "learning_rate": 9.745798319327732e-06, "loss": 33.4108, "step": 13811 }, { "epoch": 328.85970149253734, "grad_norm": 23.29953384399414, "learning_rate": 9.745098039215686e-06, "loss": 33.3716, "step": 13812 }, { "epoch": 328.88358208955225, "grad_norm": 18.363006591796875, "learning_rate": 9.744397759103642e-06, "loss": 32.8874, "step": 13813 }, { "epoch": 328.90746268656716, "grad_norm": 21.968074798583984, "learning_rate": 9.743697478991597e-06, "loss": 33.6003, "step": 13814 }, { "epoch": 328.93134328358207, "grad_norm": 23.111059188842773, "learning_rate": 9.742997198879553e-06, "loss": 34.0608, "step": 13815 }, { "epoch": 328.95522388059703, "grad_norm": 16.447967529296875, "learning_rate": 9.742296918767507e-06, "loss": 34.2898, "step": 13816 }, { "epoch": 328.97910447761194, "grad_norm": 26.502540588378906, "learning_rate": 9.741596638655462e-06, "loss": 32.8774, "step": 13817 }, { "epoch": 329.0, "grad_norm": 15.590022087097168, "learning_rate": 9.740896358543418e-06, "loss": 29.0381, "step": 13818 }, { "epoch": 329.0238805970149, "grad_norm": 26.835067749023438, "learning_rate": 9.740196078431373e-06, "loss": 35.393, "step": 13819 }, { "epoch": 329.0477611940299, "grad_norm": 20.022567749023438, "learning_rate": 9.739495798319329e-06, "loss": 31.9097, "step": 13820 }, { "epoch": 329.0716417910448, "grad_norm": 24.32552146911621, "learning_rate": 9.738795518207283e-06, "loss": 33.6466, "step": 13821 }, { "epoch": 329.0955223880597, "grad_norm": 21.088720321655273, "learning_rate": 9.73809523809524e-06, "loss": 34.5795, "step": 13822 }, { "epoch": 329.1194029850746, "grad_norm": 20.32341194152832, "learning_rate": 9.737394957983194e-06, "loss": 33.4106, "step": 13823 }, { "epoch": 329.14328358208957, "grad_norm": 21.68365478515625, "learning_rate": 9.736694677871148e-06, "loss": 33.1667, "step": 13824 }, { "epoch": 329.1671641791045, "grad_norm": 21.892330169677734, "learning_rate": 9.735994397759105e-06, "loss": 32.7454, "step": 13825 }, { "epoch": 329.1910447761194, "grad_norm": 16.508142471313477, "learning_rate": 9.735294117647059e-06, "loss": 33.4753, "step": 13826 }, { "epoch": 329.21492537313435, "grad_norm": 24.337032318115234, "learning_rate": 9.734593837535015e-06, "loss": 32.7196, "step": 13827 }, { "epoch": 329.23880597014926, "grad_norm": 18.94352912902832, "learning_rate": 9.73389355742297e-06, "loss": 33.6667, "step": 13828 }, { "epoch": 329.26268656716417, "grad_norm": 19.301166534423828, "learning_rate": 9.733193277310926e-06, "loss": 32.2252, "step": 13829 }, { "epoch": 329.28656716417913, "grad_norm": 17.05006217956543, "learning_rate": 9.73249299719888e-06, "loss": 31.9598, "step": 13830 }, { "epoch": 329.31044776119404, "grad_norm": 20.985246658325195, "learning_rate": 9.731792717086835e-06, "loss": 33.0741, "step": 13831 }, { "epoch": 329.33432835820895, "grad_norm": 16.112356185913086, "learning_rate": 9.731092436974791e-06, "loss": 33.7621, "step": 13832 }, { "epoch": 329.35820895522386, "grad_norm": 21.292627334594727, "learning_rate": 9.730392156862745e-06, "loss": 33.2529, "step": 13833 }, { "epoch": 329.3820895522388, "grad_norm": 17.180091857910156, "learning_rate": 9.729691876750702e-06, "loss": 33.0345, "step": 13834 }, { "epoch": 329.40597014925373, "grad_norm": 23.38367462158203, "learning_rate": 9.728991596638656e-06, "loss": 32.5008, "step": 13835 }, { "epoch": 329.42985074626864, "grad_norm": 21.075260162353516, "learning_rate": 9.72829131652661e-06, "loss": 34.2434, "step": 13836 }, { "epoch": 329.4537313432836, "grad_norm": 19.69710350036621, "learning_rate": 9.727591036414567e-06, "loss": 32.2472, "step": 13837 }, { "epoch": 329.4776119402985, "grad_norm": 20.674158096313477, "learning_rate": 9.726890756302521e-06, "loss": 33.478, "step": 13838 }, { "epoch": 329.5014925373134, "grad_norm": 18.335329055786133, "learning_rate": 9.726190476190477e-06, "loss": 33.369, "step": 13839 }, { "epoch": 329.52537313432833, "grad_norm": 19.51376724243164, "learning_rate": 9.725490196078432e-06, "loss": 32.905, "step": 13840 }, { "epoch": 329.5492537313433, "grad_norm": 15.209076881408691, "learning_rate": 9.724789915966388e-06, "loss": 33.9427, "step": 13841 }, { "epoch": 329.5731343283582, "grad_norm": 15.64625072479248, "learning_rate": 9.724089635854342e-06, "loss": 32.1097, "step": 13842 }, { "epoch": 329.5970149253731, "grad_norm": 16.06794548034668, "learning_rate": 9.723389355742297e-06, "loss": 33.6461, "step": 13843 }, { "epoch": 329.6208955223881, "grad_norm": 19.47937774658203, "learning_rate": 9.722689075630253e-06, "loss": 33.8203, "step": 13844 }, { "epoch": 329.644776119403, "grad_norm": 17.933704376220703, "learning_rate": 9.721988795518208e-06, "loss": 32.5787, "step": 13845 }, { "epoch": 329.6686567164179, "grad_norm": 16.346975326538086, "learning_rate": 9.721288515406164e-06, "loss": 32.7298, "step": 13846 }, { "epoch": 329.6925373134328, "grad_norm": 16.253942489624023, "learning_rate": 9.720588235294118e-06, "loss": 34.1091, "step": 13847 }, { "epoch": 329.7164179104478, "grad_norm": 14.6922607421875, "learning_rate": 9.719887955182074e-06, "loss": 33.5265, "step": 13848 }, { "epoch": 329.7402985074627, "grad_norm": 17.18062973022461, "learning_rate": 9.719187675070029e-06, "loss": 32.9379, "step": 13849 }, { "epoch": 329.7641791044776, "grad_norm": 16.036989212036133, "learning_rate": 9.718487394957983e-06, "loss": 33.8745, "step": 13850 }, { "epoch": 329.78805970149256, "grad_norm": 17.42502784729004, "learning_rate": 9.71778711484594e-06, "loss": 33.2884, "step": 13851 }, { "epoch": 329.81194029850747, "grad_norm": 15.50560474395752, "learning_rate": 9.717086834733894e-06, "loss": 33.3494, "step": 13852 }, { "epoch": 329.8358208955224, "grad_norm": 17.172216415405273, "learning_rate": 9.71638655462185e-06, "loss": 32.7325, "step": 13853 }, { "epoch": 329.85970149253734, "grad_norm": NaN, "learning_rate": 9.715686274509805e-06, "loss": 50.0166, "step": 13854 }, { "epoch": 329.88358208955225, "grad_norm": 17.617584228515625, "learning_rate": 9.715686274509805e-06, "loss": 33.122, "step": 13855 }, { "epoch": 329.90746268656716, "grad_norm": 14.241527557373047, "learning_rate": 9.714985994397759e-06, "loss": 33.2442, "step": 13856 }, { "epoch": 329.93134328358207, "grad_norm": 14.549175262451172, "learning_rate": 9.714285714285715e-06, "loss": 33.4723, "step": 13857 }, { "epoch": 329.95522388059703, "grad_norm": 19.67365837097168, "learning_rate": 9.71358543417367e-06, "loss": 31.7926, "step": 13858 }, { "epoch": 329.97910447761194, "grad_norm": 19.864776611328125, "learning_rate": 9.712885154061626e-06, "loss": 33.3995, "step": 13859 }, { "epoch": 330.0, "grad_norm": 14.34056282043457, "learning_rate": 9.71218487394958e-06, "loss": 28.802, "step": 13860 }, { "epoch": 330.0238805970149, "grad_norm": 14.028547286987305, "learning_rate": 9.711484593837536e-06, "loss": 31.7811, "step": 13861 }, { "epoch": 330.0477611940299, "grad_norm": 15.838154792785645, "learning_rate": 9.710784313725491e-06, "loss": 33.3897, "step": 13862 }, { "epoch": 330.0716417910448, "grad_norm": 15.39886474609375, "learning_rate": 9.710084033613445e-06, "loss": 33.2612, "step": 13863 }, { "epoch": 330.0955223880597, "grad_norm": 15.418465614318848, "learning_rate": 9.709383753501402e-06, "loss": 33.7369, "step": 13864 }, { "epoch": 330.1194029850746, "grad_norm": 16.513729095458984, "learning_rate": 9.708683473389356e-06, "loss": 32.0597, "step": 13865 }, { "epoch": 330.14328358208957, "grad_norm": 16.598331451416016, "learning_rate": 9.707983193277312e-06, "loss": 33.886, "step": 13866 }, { "epoch": 330.1671641791045, "grad_norm": 19.797420501708984, "learning_rate": 9.707282913165267e-06, "loss": 32.8522, "step": 13867 }, { "epoch": 330.1910447761194, "grad_norm": 20.554298400878906, "learning_rate": 9.706582633053223e-06, "loss": 33.5831, "step": 13868 }, { "epoch": 330.21492537313435, "grad_norm": 17.538660049438477, "learning_rate": 9.705882352941177e-06, "loss": 33.5932, "step": 13869 }, { "epoch": 330.23880597014926, "grad_norm": 20.958953857421875, "learning_rate": 9.705182072829132e-06, "loss": 33.0203, "step": 13870 }, { "epoch": 330.26268656716417, "grad_norm": 22.544586181640625, "learning_rate": 9.704481792717088e-06, "loss": 33.6431, "step": 13871 }, { "epoch": 330.28656716417913, "grad_norm": 14.928828239440918, "learning_rate": 9.703781512605042e-06, "loss": 32.5074, "step": 13872 }, { "epoch": 330.31044776119404, "grad_norm": 28.557313919067383, "learning_rate": 9.703081232492999e-06, "loss": 33.2598, "step": 13873 }, { "epoch": 330.33432835820895, "grad_norm": 17.667194366455078, "learning_rate": 9.702380952380953e-06, "loss": 33.1412, "step": 13874 }, { "epoch": 330.35820895522386, "grad_norm": 26.1396541595459, "learning_rate": 9.701680672268908e-06, "loss": 33.7127, "step": 13875 }, { "epoch": 330.3820895522388, "grad_norm": 22.766267776489258, "learning_rate": 9.700980392156864e-06, "loss": 32.7905, "step": 13876 }, { "epoch": 330.40597014925373, "grad_norm": 16.63962745666504, "learning_rate": 9.700280112044818e-06, "loss": 33.6493, "step": 13877 }, { "epoch": 330.42985074626864, "grad_norm": 26.941375732421875, "learning_rate": 9.699579831932774e-06, "loss": 33.0094, "step": 13878 }, { "epoch": 330.4537313432836, "grad_norm": 20.222993850708008, "learning_rate": 9.698879551820729e-06, "loss": 32.7451, "step": 13879 }, { "epoch": 330.4776119402985, "grad_norm": 21.765905380249023, "learning_rate": 9.698179271708685e-06, "loss": 33.3798, "step": 13880 }, { "epoch": 330.5014925373134, "grad_norm": 22.75497817993164, "learning_rate": 9.69747899159664e-06, "loss": 33.6727, "step": 13881 }, { "epoch": 330.52537313432833, "grad_norm": 16.854711532592773, "learning_rate": 9.696778711484594e-06, "loss": 33.3058, "step": 13882 }, { "epoch": 330.5492537313433, "grad_norm": 24.363628387451172, "learning_rate": 9.69607843137255e-06, "loss": 33.4192, "step": 13883 }, { "epoch": 330.5731343283582, "grad_norm": 20.040849685668945, "learning_rate": 9.695378151260505e-06, "loss": 33.0446, "step": 13884 }, { "epoch": 330.5970149253731, "grad_norm": 17.442806243896484, "learning_rate": 9.69467787114846e-06, "loss": 32.7335, "step": 13885 }, { "epoch": 330.6208955223881, "grad_norm": 19.68790626525879, "learning_rate": 9.693977591036415e-06, "loss": 31.7422, "step": 13886 }, { "epoch": 330.644776119403, "grad_norm": 20.931894302368164, "learning_rate": 9.693277310924371e-06, "loss": 33.383, "step": 13887 }, { "epoch": 330.6686567164179, "grad_norm": 18.293874740600586, "learning_rate": 9.692577030812326e-06, "loss": 33.3228, "step": 13888 }, { "epoch": 330.6925373134328, "grad_norm": 23.76629066467285, "learning_rate": 9.69187675070028e-06, "loss": 33.1281, "step": 13889 }, { "epoch": 330.7164179104478, "grad_norm": 22.490915298461914, "learning_rate": 9.691176470588236e-06, "loss": 34.066, "step": 13890 }, { "epoch": 330.7402985074627, "grad_norm": 17.901865005493164, "learning_rate": 9.690476190476191e-06, "loss": 32.9509, "step": 13891 }, { "epoch": 330.7641791044776, "grad_norm": 27.096599578857422, "learning_rate": 9.689775910364147e-06, "loss": 32.5744, "step": 13892 }, { "epoch": 330.78805970149256, "grad_norm": 18.732515335083008, "learning_rate": 9.689075630252102e-06, "loss": 32.6709, "step": 13893 }, { "epoch": 330.81194029850747, "grad_norm": 30.85783576965332, "learning_rate": 9.688375350140056e-06, "loss": 32.5919, "step": 13894 }, { "epoch": 330.8358208955224, "grad_norm": 18.67921257019043, "learning_rate": 9.687675070028012e-06, "loss": 32.8729, "step": 13895 }, { "epoch": 330.85970149253734, "grad_norm": 32.25803756713867, "learning_rate": 9.686974789915967e-06, "loss": 33.9697, "step": 13896 }, { "epoch": 330.88358208955225, "grad_norm": 16.962650299072266, "learning_rate": 9.686274509803923e-06, "loss": 33.5899, "step": 13897 }, { "epoch": 330.90746268656716, "grad_norm": 38.310264587402344, "learning_rate": 9.685574229691877e-06, "loss": 34.1434, "step": 13898 }, { "epoch": 330.93134328358207, "grad_norm": 25.830568313598633, "learning_rate": 9.684873949579834e-06, "loss": 33.5231, "step": 13899 }, { "epoch": 330.95522388059703, "grad_norm": 32.52248001098633, "learning_rate": 9.684173669467788e-06, "loss": 32.9083, "step": 13900 }, { "epoch": 330.97910447761194, "grad_norm": 28.310823440551758, "learning_rate": 9.683473389355742e-06, "loss": 32.5874, "step": 13901 }, { "epoch": 331.0, "grad_norm": 28.601337432861328, "learning_rate": 9.682773109243699e-06, "loss": 29.7366, "step": 13902 }, { "epoch": 331.0238805970149, "grad_norm": 30.404438018798828, "learning_rate": 9.682072829131653e-06, "loss": 34.2454, "step": 13903 }, { "epoch": 331.0477611940299, "grad_norm": 28.96394157409668, "learning_rate": 9.68137254901961e-06, "loss": 32.3148, "step": 13904 }, { "epoch": 331.0716417910448, "grad_norm": 25.191787719726562, "learning_rate": 9.680672268907564e-06, "loss": 33.5151, "step": 13905 }, { "epoch": 331.0955223880597, "grad_norm": 31.659870147705078, "learning_rate": 9.679971988795518e-06, "loss": 33.4593, "step": 13906 }, { "epoch": 331.1194029850746, "grad_norm": 23.066295623779297, "learning_rate": 9.679271708683474e-06, "loss": 33.3316, "step": 13907 }, { "epoch": 331.14328358208957, "grad_norm": 26.33380699157715, "learning_rate": 9.678571428571429e-06, "loss": 33.7243, "step": 13908 }, { "epoch": 331.1671641791045, "grad_norm": 23.34626579284668, "learning_rate": 9.677871148459385e-06, "loss": 34.1913, "step": 13909 }, { "epoch": 331.1910447761194, "grad_norm": 28.661367416381836, "learning_rate": 9.67717086834734e-06, "loss": 32.7433, "step": 13910 }, { "epoch": 331.21492537313435, "grad_norm": 24.23579216003418, "learning_rate": 9.676470588235296e-06, "loss": 33.5343, "step": 13911 }, { "epoch": 331.23880597014926, "grad_norm": 25.383020401000977, "learning_rate": 9.67577030812325e-06, "loss": 33.8556, "step": 13912 }, { "epoch": 331.26268656716417, "grad_norm": 26.202468872070312, "learning_rate": 9.675070028011205e-06, "loss": 33.7905, "step": 13913 }, { "epoch": 331.28656716417913, "grad_norm": 17.759017944335938, "learning_rate": 9.67436974789916e-06, "loss": 32.7533, "step": 13914 }, { "epoch": 331.31044776119404, "grad_norm": 27.195541381835938, "learning_rate": 9.673669467787115e-06, "loss": 33.0935, "step": 13915 }, { "epoch": 331.33432835820895, "grad_norm": 20.963619232177734, "learning_rate": 9.672969187675071e-06, "loss": 33.7381, "step": 13916 }, { "epoch": 331.35820895522386, "grad_norm": 28.524673461914062, "learning_rate": 9.672268907563026e-06, "loss": 34.3043, "step": 13917 }, { "epoch": 331.3820895522388, "grad_norm": 23.942951202392578, "learning_rate": 9.671568627450982e-06, "loss": 33.1677, "step": 13918 }, { "epoch": 331.40597014925373, "grad_norm": 20.868167877197266, "learning_rate": 9.670868347338937e-06, "loss": 31.3524, "step": 13919 }, { "epoch": 331.42985074626864, "grad_norm": 22.58417510986328, "learning_rate": 9.670168067226891e-06, "loss": 32.643, "step": 13920 }, { "epoch": 331.4537313432836, "grad_norm": 17.47084617614746, "learning_rate": 9.669467787114847e-06, "loss": 34.4185, "step": 13921 }, { "epoch": 331.4776119402985, "grad_norm": 25.291521072387695, "learning_rate": 9.668767507002802e-06, "loss": 32.9547, "step": 13922 }, { "epoch": 331.5014925373134, "grad_norm": 18.16826820373535, "learning_rate": 9.668067226890758e-06, "loss": 32.4785, "step": 13923 }, { "epoch": 331.52537313432833, "grad_norm": 22.666799545288086, "learning_rate": 9.667366946778712e-06, "loss": 33.1056, "step": 13924 }, { "epoch": 331.5492537313433, "grad_norm": 23.67050552368164, "learning_rate": 9.666666666666667e-06, "loss": 33.1822, "step": 13925 }, { "epoch": 331.5731343283582, "grad_norm": 15.939326286315918, "learning_rate": 9.665966386554623e-06, "loss": 33.1894, "step": 13926 }, { "epoch": 331.5970149253731, "grad_norm": 21.62961769104004, "learning_rate": 9.665266106442577e-06, "loss": 33.0778, "step": 13927 }, { "epoch": 331.6208955223881, "grad_norm": 18.050323486328125, "learning_rate": 9.664565826330534e-06, "loss": 31.8663, "step": 13928 }, { "epoch": 331.644776119403, "grad_norm": 20.63920783996582, "learning_rate": 9.663865546218488e-06, "loss": 32.4513, "step": 13929 }, { "epoch": 331.6686567164179, "grad_norm": 22.199052810668945, "learning_rate": 9.663165266106444e-06, "loss": 32.0772, "step": 13930 }, { "epoch": 331.6925373134328, "grad_norm": 15.793177604675293, "learning_rate": 9.662464985994399e-06, "loss": 33.7744, "step": 13931 }, { "epoch": 331.7164179104478, "grad_norm": 20.595054626464844, "learning_rate": 9.661764705882353e-06, "loss": 32.2003, "step": 13932 }, { "epoch": 331.7402985074627, "grad_norm": 17.664241790771484, "learning_rate": 9.66106442577031e-06, "loss": 33.8355, "step": 13933 }, { "epoch": 331.7641791044776, "grad_norm": 22.56214141845703, "learning_rate": 9.660364145658264e-06, "loss": 33.6375, "step": 13934 }, { "epoch": 331.78805970149256, "grad_norm": 16.93956756591797, "learning_rate": 9.65966386554622e-06, "loss": 32.1945, "step": 13935 }, { "epoch": 331.81194029850747, "grad_norm": 16.997602462768555, "learning_rate": 9.658963585434174e-06, "loss": 34.3467, "step": 13936 }, { "epoch": 331.8358208955224, "grad_norm": 17.921266555786133, "learning_rate": 9.65826330532213e-06, "loss": 33.2735, "step": 13937 }, { "epoch": 331.85970149253734, "grad_norm": 14.891024589538574, "learning_rate": 9.657563025210085e-06, "loss": 31.8917, "step": 13938 }, { "epoch": 331.88358208955225, "grad_norm": 20.693897247314453, "learning_rate": 9.65686274509804e-06, "loss": 32.9931, "step": 13939 }, { "epoch": 331.90746268656716, "grad_norm": 18.60137176513672, "learning_rate": 9.656162464985996e-06, "loss": 34.6641, "step": 13940 }, { "epoch": 331.93134328358207, "grad_norm": 20.24396514892578, "learning_rate": 9.65546218487395e-06, "loss": 33.1107, "step": 13941 }, { "epoch": 331.95522388059703, "grad_norm": 16.75682258605957, "learning_rate": 9.654761904761906e-06, "loss": 31.5462, "step": 13942 }, { "epoch": 331.97910447761194, "grad_norm": 15.7743501663208, "learning_rate": 9.65406162464986e-06, "loss": 33.2106, "step": 13943 }, { "epoch": 332.0, "grad_norm": 16.924671173095703, "learning_rate": 9.653361344537815e-06, "loss": 27.6228, "step": 13944 }, { "epoch": 332.0238805970149, "grad_norm": 16.688642501831055, "learning_rate": 9.652661064425771e-06, "loss": 33.0291, "step": 13945 }, { "epoch": 332.0477611940299, "grad_norm": 19.363203048706055, "learning_rate": 9.651960784313726e-06, "loss": 33.4778, "step": 13946 }, { "epoch": 332.0716417910448, "grad_norm": 17.70576286315918, "learning_rate": 9.651260504201682e-06, "loss": 33.9613, "step": 13947 }, { "epoch": 332.0955223880597, "grad_norm": 21.67820930480957, "learning_rate": 9.650560224089637e-06, "loss": 33.6494, "step": 13948 }, { "epoch": 332.1194029850746, "grad_norm": 24.726451873779297, "learning_rate": 9.649859943977593e-06, "loss": 32.8248, "step": 13949 }, { "epoch": 332.14328358208957, "grad_norm": 15.790563583374023, "learning_rate": 9.649159663865547e-06, "loss": 32.1063, "step": 13950 }, { "epoch": 332.1671641791045, "grad_norm": 19.399120330810547, "learning_rate": 9.648459383753502e-06, "loss": 32.1274, "step": 13951 }, { "epoch": 332.1910447761194, "grad_norm": 24.268129348754883, "learning_rate": 9.647759103641458e-06, "loss": 32.1004, "step": 13952 }, { "epoch": 332.21492537313435, "grad_norm": 16.836997985839844, "learning_rate": 9.647058823529412e-06, "loss": 32.8896, "step": 13953 }, { "epoch": 332.23880597014926, "grad_norm": 21.344093322753906, "learning_rate": 9.646358543417368e-06, "loss": 32.7662, "step": 13954 }, { "epoch": 332.26268656716417, "grad_norm": 16.73006820678711, "learning_rate": 9.645658263305323e-06, "loss": 33.0612, "step": 13955 }, { "epoch": 332.28656716417913, "grad_norm": 19.402740478515625, "learning_rate": 9.644957983193279e-06, "loss": 32.4261, "step": 13956 }, { "epoch": 332.31044776119404, "grad_norm": 17.733530044555664, "learning_rate": 9.644257703081234e-06, "loss": 33.215, "step": 13957 }, { "epoch": 332.33432835820895, "grad_norm": 16.590065002441406, "learning_rate": 9.643557422969188e-06, "loss": 33.7284, "step": 13958 }, { "epoch": 332.35820895522386, "grad_norm": 20.320560455322266, "learning_rate": 9.642857142857144e-06, "loss": 33.1951, "step": 13959 }, { "epoch": 332.3820895522388, "grad_norm": 15.218728065490723, "learning_rate": 9.642156862745099e-06, "loss": 33.8555, "step": 13960 }, { "epoch": 332.40597014925373, "grad_norm": 20.751352310180664, "learning_rate": 9.641456582633055e-06, "loss": 33.3031, "step": 13961 }, { "epoch": 332.42985074626864, "grad_norm": 16.567758560180664, "learning_rate": 9.64075630252101e-06, "loss": 33.3654, "step": 13962 }, { "epoch": 332.4537313432836, "grad_norm": 18.319110870361328, "learning_rate": 9.640056022408964e-06, "loss": 32.4031, "step": 13963 }, { "epoch": 332.4776119402985, "grad_norm": 15.850713729858398, "learning_rate": 9.63935574229692e-06, "loss": 33.133, "step": 13964 }, { "epoch": 332.5014925373134, "grad_norm": 19.366064071655273, "learning_rate": 9.638655462184874e-06, "loss": 33.4396, "step": 13965 }, { "epoch": 332.52537313432833, "grad_norm": 15.888068199157715, "learning_rate": 9.63795518207283e-06, "loss": 33.0262, "step": 13966 }, { "epoch": 332.5492537313433, "grad_norm": 18.64305305480957, "learning_rate": 9.637254901960785e-06, "loss": 32.2376, "step": 13967 }, { "epoch": 332.5731343283582, "grad_norm": 17.76449203491211, "learning_rate": 9.636554621848741e-06, "loss": 33.7193, "step": 13968 }, { "epoch": 332.5970149253731, "grad_norm": 16.376487731933594, "learning_rate": 9.635854341736696e-06, "loss": 33.5743, "step": 13969 }, { "epoch": 332.6208955223881, "grad_norm": 15.979970932006836, "learning_rate": 9.63515406162465e-06, "loss": 33.1573, "step": 13970 }, { "epoch": 332.644776119403, "grad_norm": 19.043180465698242, "learning_rate": 9.634453781512606e-06, "loss": 32.1751, "step": 13971 }, { "epoch": 332.6686567164179, "grad_norm": 20.143051147460938, "learning_rate": 9.63375350140056e-06, "loss": 32.593, "step": 13972 }, { "epoch": 332.6925373134328, "grad_norm": 18.239852905273438, "learning_rate": 9.633053221288517e-06, "loss": 33.3211, "step": 13973 }, { "epoch": 332.7164179104478, "grad_norm": 14.168808937072754, "learning_rate": 9.632352941176471e-06, "loss": 32.7175, "step": 13974 }, { "epoch": 332.7402985074627, "grad_norm": 14.109582901000977, "learning_rate": 9.631652661064426e-06, "loss": 33.0436, "step": 13975 }, { "epoch": 332.7641791044776, "grad_norm": 16.472972869873047, "learning_rate": 9.630952380952382e-06, "loss": 33.6842, "step": 13976 }, { "epoch": 332.78805970149256, "grad_norm": 18.264995574951172, "learning_rate": 9.630252100840337e-06, "loss": 33.4841, "step": 13977 }, { "epoch": 332.81194029850747, "grad_norm": 15.346653938293457, "learning_rate": 9.629551820728293e-06, "loss": 33.1951, "step": 13978 }, { "epoch": 332.8358208955224, "grad_norm": 16.865934371948242, "learning_rate": 9.628851540616247e-06, "loss": 33.9217, "step": 13979 }, { "epoch": 332.85970149253734, "grad_norm": 18.869901657104492, "learning_rate": 9.628151260504203e-06, "loss": 32.4363, "step": 13980 }, { "epoch": 332.88358208955225, "grad_norm": 23.717571258544922, "learning_rate": 9.627450980392158e-06, "loss": 33.8844, "step": 13981 }, { "epoch": 332.90746268656716, "grad_norm": 15.471491813659668, "learning_rate": 9.626750700280112e-06, "loss": 33.0996, "step": 13982 }, { "epoch": 332.93134328358207, "grad_norm": 16.660005569458008, "learning_rate": 9.626050420168068e-06, "loss": 33.1315, "step": 13983 }, { "epoch": 332.95522388059703, "grad_norm": 17.334712982177734, "learning_rate": 9.625350140056023e-06, "loss": 32.7679, "step": 13984 }, { "epoch": 332.97910447761194, "grad_norm": 22.827878952026367, "learning_rate": 9.624649859943979e-06, "loss": 33.2822, "step": 13985 }, { "epoch": 333.0, "grad_norm": 14.875100135803223, "learning_rate": 9.623949579831934e-06, "loss": 29.5673, "step": 13986 }, { "epoch": 333.0238805970149, "grad_norm": 22.145021438598633, "learning_rate": 9.62324929971989e-06, "loss": 31.9636, "step": 13987 }, { "epoch": 333.0477611940299, "grad_norm": 19.351001739501953, "learning_rate": 9.622549019607844e-06, "loss": 32.8588, "step": 13988 }, { "epoch": 333.0716417910448, "grad_norm": 18.321104049682617, "learning_rate": 9.621848739495799e-06, "loss": 33.3639, "step": 13989 }, { "epoch": 333.0955223880597, "grad_norm": 20.317712783813477, "learning_rate": 9.621148459383755e-06, "loss": 32.5452, "step": 13990 }, { "epoch": 333.1194029850746, "grad_norm": 23.717632293701172, "learning_rate": 9.62044817927171e-06, "loss": 33.1989, "step": 13991 }, { "epoch": 333.14328358208957, "grad_norm": 15.063462257385254, "learning_rate": 9.619747899159665e-06, "loss": 32.3719, "step": 13992 }, { "epoch": 333.1671641791045, "grad_norm": 18.103557586669922, "learning_rate": 9.61904761904762e-06, "loss": 33.6221, "step": 13993 }, { "epoch": 333.1910447761194, "grad_norm": 19.14454460144043, "learning_rate": 9.618347338935574e-06, "loss": 33.2016, "step": 13994 }, { "epoch": 333.21492537313435, "grad_norm": 18.08968734741211, "learning_rate": 9.61764705882353e-06, "loss": 33.3974, "step": 13995 }, { "epoch": 333.23880597014926, "grad_norm": 15.61563777923584, "learning_rate": 9.616946778711485e-06, "loss": 32.0759, "step": 13996 }, { "epoch": 333.26268656716417, "grad_norm": 16.365129470825195, "learning_rate": 9.616246498599441e-06, "loss": 33.7363, "step": 13997 }, { "epoch": 333.28656716417913, "grad_norm": 16.383771896362305, "learning_rate": 9.615546218487396e-06, "loss": 33.9902, "step": 13998 }, { "epoch": 333.31044776119404, "grad_norm": 18.144445419311523, "learning_rate": 9.614845938375352e-06, "loss": 32.1076, "step": 13999 }, { "epoch": 333.33432835820895, "grad_norm": 15.468334197998047, "learning_rate": 9.614145658263306e-06, "loss": 32.1578, "step": 14000 }, { "epoch": 333.35820895522386, "grad_norm": 16.751747131347656, "learning_rate": 9.61344537815126e-06, "loss": 33.2815, "step": 14001 }, { "epoch": 333.3820895522388, "grad_norm": 19.470293045043945, "learning_rate": 9.612745098039217e-06, "loss": 33.7582, "step": 14002 }, { "epoch": 333.40597014925373, "grad_norm": 16.38135528564453, "learning_rate": 9.612044817927171e-06, "loss": 33.5953, "step": 14003 }, { "epoch": 333.42985074626864, "grad_norm": 20.763050079345703, "learning_rate": 9.611344537815128e-06, "loss": 32.5819, "step": 14004 }, { "epoch": 333.4537313432836, "grad_norm": 16.893936157226562, "learning_rate": 9.610644257703082e-06, "loss": 33.2231, "step": 14005 }, { "epoch": 333.4776119402985, "grad_norm": 15.477121353149414, "learning_rate": 9.609943977591038e-06, "loss": 32.3317, "step": 14006 }, { "epoch": 333.5014925373134, "grad_norm": 16.337400436401367, "learning_rate": 9.609243697478993e-06, "loss": 33.1102, "step": 14007 }, { "epoch": 333.52537313432833, "grad_norm": 19.2496337890625, "learning_rate": 9.608543417366947e-06, "loss": 33.1967, "step": 14008 }, { "epoch": 333.5492537313433, "grad_norm": 18.249853134155273, "learning_rate": 9.607843137254903e-06, "loss": 33.4722, "step": 14009 }, { "epoch": 333.5731343283582, "grad_norm": 16.634292602539062, "learning_rate": 9.607142857142858e-06, "loss": 33.5561, "step": 14010 }, { "epoch": 333.5970149253731, "grad_norm": 18.253963470458984, "learning_rate": 9.606442577030814e-06, "loss": 34.2951, "step": 14011 }, { "epoch": 333.6208955223881, "grad_norm": 17.239595413208008, "learning_rate": 9.605742296918768e-06, "loss": 32.8808, "step": 14012 }, { "epoch": 333.644776119403, "grad_norm": 24.83218002319336, "learning_rate": 9.605042016806723e-06, "loss": 33.9055, "step": 14013 }, { "epoch": 333.6686567164179, "grad_norm": 17.564966201782227, "learning_rate": 9.604341736694679e-06, "loss": 32.2637, "step": 14014 }, { "epoch": 333.6925373134328, "grad_norm": 15.301651954650879, "learning_rate": 9.603641456582634e-06, "loss": 34.7816, "step": 14015 }, { "epoch": 333.7164179104478, "grad_norm": 14.405645370483398, "learning_rate": 9.60294117647059e-06, "loss": 32.6637, "step": 14016 }, { "epoch": 333.7402985074627, "grad_norm": 19.288917541503906, "learning_rate": 9.602240896358544e-06, "loss": 33.4437, "step": 14017 }, { "epoch": 333.7641791044776, "grad_norm": 21.326412200927734, "learning_rate": 9.6015406162465e-06, "loss": 32.9978, "step": 14018 }, { "epoch": 333.78805970149256, "grad_norm": 14.843213081359863, "learning_rate": 9.600840336134455e-06, "loss": 33.1911, "step": 14019 }, { "epoch": 333.81194029850747, "grad_norm": 15.07947826385498, "learning_rate": 9.60014005602241e-06, "loss": 33.2798, "step": 14020 }, { "epoch": 333.8358208955224, "grad_norm": 19.959087371826172, "learning_rate": 9.599439775910366e-06, "loss": 33.4415, "step": 14021 }, { "epoch": 333.85970149253734, "grad_norm": 13.796445846557617, "learning_rate": 9.59873949579832e-06, "loss": 32.0115, "step": 14022 }, { "epoch": 333.88358208955225, "grad_norm": 21.80230712890625, "learning_rate": 9.598039215686276e-06, "loss": 33.4, "step": 14023 }, { "epoch": 333.90746268656716, "grad_norm": 17.446523666381836, "learning_rate": 9.59733893557423e-06, "loss": 32.1369, "step": 14024 }, { "epoch": 333.93134328358207, "grad_norm": 18.63462257385254, "learning_rate": 9.596638655462187e-06, "loss": 32.3353, "step": 14025 }, { "epoch": 333.95522388059703, "grad_norm": 17.132659912109375, "learning_rate": 9.595938375350141e-06, "loss": 32.9065, "step": 14026 }, { "epoch": 333.97910447761194, "grad_norm": 15.178614616394043, "learning_rate": 9.595238095238096e-06, "loss": 32.0331, "step": 14027 }, { "epoch": 334.0, "grad_norm": 17.505582809448242, "learning_rate": 9.594537815126052e-06, "loss": 29.6946, "step": 14028 }, { "epoch": 334.0238805970149, "grad_norm": 15.85994815826416, "learning_rate": 9.593837535014006e-06, "loss": 32.9535, "step": 14029 }, { "epoch": 334.0477611940299, "grad_norm": 19.577545166015625, "learning_rate": 9.593137254901963e-06, "loss": 34.2717, "step": 14030 }, { "epoch": 334.0716417910448, "grad_norm": 16.205463409423828, "learning_rate": 9.592436974789917e-06, "loss": 32.1737, "step": 14031 }, { "epoch": 334.0955223880597, "grad_norm": 19.973770141601562, "learning_rate": 9.591736694677871e-06, "loss": 32.687, "step": 14032 }, { "epoch": 334.1194029850746, "grad_norm": 19.85550308227539, "learning_rate": 9.591036414565828e-06, "loss": 32.7678, "step": 14033 }, { "epoch": 334.14328358208957, "grad_norm": 16.340126037597656, "learning_rate": 9.590336134453782e-06, "loss": 33.6692, "step": 14034 }, { "epoch": 334.1671641791045, "grad_norm": 19.330669403076172, "learning_rate": 9.589635854341738e-06, "loss": 32.5186, "step": 14035 }, { "epoch": 334.1910447761194, "grad_norm": 21.676372528076172, "learning_rate": 9.588935574229693e-06, "loss": 32.613, "step": 14036 }, { "epoch": 334.21492537313435, "grad_norm": 17.480918884277344, "learning_rate": 9.588235294117649e-06, "loss": 32.9289, "step": 14037 }, { "epoch": 334.23880597014926, "grad_norm": 20.410520553588867, "learning_rate": 9.587535014005603e-06, "loss": 33.6284, "step": 14038 }, { "epoch": 334.26268656716417, "grad_norm": 26.22980499267578, "learning_rate": 9.586834733893558e-06, "loss": 33.986, "step": 14039 }, { "epoch": 334.28656716417913, "grad_norm": 17.150678634643555, "learning_rate": 9.586134453781514e-06, "loss": 33.1126, "step": 14040 }, { "epoch": 334.31044776119404, "grad_norm": 16.347063064575195, "learning_rate": 9.585434173669469e-06, "loss": 32.9667, "step": 14041 }, { "epoch": 334.33432835820895, "grad_norm": 18.706422805786133, "learning_rate": 9.584733893557425e-06, "loss": 33.6479, "step": 14042 }, { "epoch": 334.35820895522386, "grad_norm": 22.569150924682617, "learning_rate": 9.584033613445379e-06, "loss": 32.4412, "step": 14043 }, { "epoch": 334.3820895522388, "grad_norm": 16.099870681762695, "learning_rate": 9.583333333333335e-06, "loss": 32.0928, "step": 14044 }, { "epoch": 334.40597014925373, "grad_norm": NaN, "learning_rate": 9.58263305322129e-06, "loss": 52.6876, "step": 14045 }, { "epoch": 334.42985074626864, "grad_norm": 16.205289840698242, "learning_rate": 9.58263305322129e-06, "loss": 32.7782, "step": 14046 }, { "epoch": 334.4537313432836, "grad_norm": 15.66600513458252, "learning_rate": 9.581932773109244e-06, "loss": 34.0963, "step": 14047 }, { "epoch": 334.4776119402985, "grad_norm": 16.28512191772461, "learning_rate": 9.5812324929972e-06, "loss": 33.2025, "step": 14048 }, { "epoch": 334.5014925373134, "grad_norm": 18.70054817199707, "learning_rate": 9.580532212885155e-06, "loss": 32.3075, "step": 14049 }, { "epoch": 334.52537313432833, "grad_norm": 18.112051010131836, "learning_rate": 9.579831932773111e-06, "loss": 32.886, "step": 14050 }, { "epoch": 334.5492537313433, "grad_norm": 18.727407455444336, "learning_rate": 9.579131652661066e-06, "loss": 32.1961, "step": 14051 }, { "epoch": 334.5731343283582, "grad_norm": 15.974007606506348, "learning_rate": 9.57843137254902e-06, "loss": 33.9628, "step": 14052 }, { "epoch": 334.5970149253731, "grad_norm": 18.82468605041504, "learning_rate": 9.577731092436976e-06, "loss": 34.2077, "step": 14053 }, { "epoch": 334.6208955223881, "grad_norm": 16.585046768188477, "learning_rate": 9.57703081232493e-06, "loss": 32.2484, "step": 14054 }, { "epoch": 334.644776119403, "grad_norm": 21.884016036987305, "learning_rate": 9.576330532212887e-06, "loss": 32.4839, "step": 14055 }, { "epoch": 334.6686567164179, "grad_norm": 22.25446891784668, "learning_rate": 9.575630252100841e-06, "loss": 32.5081, "step": 14056 }, { "epoch": 334.6925373134328, "grad_norm": 18.503116607666016, "learning_rate": 9.574929971988797e-06, "loss": 33.2844, "step": 14057 }, { "epoch": 334.7164179104478, "grad_norm": 17.866487503051758, "learning_rate": 9.574229691876752e-06, "loss": 33.05, "step": 14058 }, { "epoch": 334.7402985074627, "grad_norm": 17.552989959716797, "learning_rate": 9.573529411764706e-06, "loss": 32.8999, "step": 14059 }, { "epoch": 334.7641791044776, "grad_norm": 18.457199096679688, "learning_rate": 9.572829131652663e-06, "loss": 33.4919, "step": 14060 }, { "epoch": 334.78805970149256, "grad_norm": 16.289291381835938, "learning_rate": 9.572128851540617e-06, "loss": 33.1962, "step": 14061 }, { "epoch": 334.81194029850747, "grad_norm": 17.95917320251465, "learning_rate": 9.571428571428573e-06, "loss": 33.0722, "step": 14062 }, { "epoch": 334.8358208955224, "grad_norm": 17.195363998413086, "learning_rate": 9.570728291316528e-06, "loss": 32.9456, "step": 14063 }, { "epoch": 334.85970149253734, "grad_norm": 19.218435287475586, "learning_rate": 9.570028011204482e-06, "loss": 33.409, "step": 14064 }, { "epoch": 334.88358208955225, "grad_norm": 20.15607261657715, "learning_rate": 9.569327731092438e-06, "loss": 31.6368, "step": 14065 }, { "epoch": 334.90746268656716, "grad_norm": 20.235383987426758, "learning_rate": 9.568627450980393e-06, "loss": 33.4735, "step": 14066 }, { "epoch": 334.93134328358207, "grad_norm": 15.10827350616455, "learning_rate": 9.567927170868349e-06, "loss": 32.3908, "step": 14067 }, { "epoch": 334.95522388059703, "grad_norm": 14.904987335205078, "learning_rate": 9.567226890756303e-06, "loss": 33.6434, "step": 14068 }, { "epoch": 334.97910447761194, "grad_norm": 16.30361557006836, "learning_rate": 9.56652661064426e-06, "loss": 33.929, "step": 14069 }, { "epoch": 335.0, "grad_norm": 14.410323143005371, "learning_rate": 9.565826330532214e-06, "loss": 28.253, "step": 14070 }, { "epoch": 335.0238805970149, "grad_norm": 16.226764678955078, "learning_rate": 9.565126050420169e-06, "loss": 32.8985, "step": 14071 }, { "epoch": 335.0477611940299, "grad_norm": 19.064477920532227, "learning_rate": 9.564425770308125e-06, "loss": 33.6443, "step": 14072 }, { "epoch": 335.0716417910448, "grad_norm": 20.508211135864258, "learning_rate": 9.56372549019608e-06, "loss": 31.2859, "step": 14073 }, { "epoch": 335.0955223880597, "grad_norm": 16.088598251342773, "learning_rate": 9.563025210084035e-06, "loss": 32.9429, "step": 14074 }, { "epoch": 335.1194029850746, "grad_norm": 17.129329681396484, "learning_rate": 9.56232492997199e-06, "loss": 32.0086, "step": 14075 }, { "epoch": 335.14328358208957, "grad_norm": 22.49455451965332, "learning_rate": 9.561624649859946e-06, "loss": 32.0695, "step": 14076 }, { "epoch": 335.1671641791045, "grad_norm": 22.862733840942383, "learning_rate": 9.5609243697479e-06, "loss": 32.9638, "step": 14077 }, { "epoch": 335.1910447761194, "grad_norm": 14.46081256866455, "learning_rate": 9.560224089635855e-06, "loss": 33.0671, "step": 14078 }, { "epoch": 335.21492537313435, "grad_norm": 22.365453720092773, "learning_rate": 9.559523809523811e-06, "loss": 33.8696, "step": 14079 }, { "epoch": 335.23880597014926, "grad_norm": 18.069684982299805, "learning_rate": 9.558823529411766e-06, "loss": 34.1228, "step": 14080 }, { "epoch": 335.26268656716417, "grad_norm": 19.54884147644043, "learning_rate": 9.558123249299722e-06, "loss": 32.6555, "step": 14081 }, { "epoch": 335.28656716417913, "grad_norm": 18.429580688476562, "learning_rate": 9.557422969187676e-06, "loss": 32.3539, "step": 14082 }, { "epoch": 335.31044776119404, "grad_norm": 20.241392135620117, "learning_rate": 9.55672268907563e-06, "loss": 33.1865, "step": 14083 }, { "epoch": 335.33432835820895, "grad_norm": 16.518409729003906, "learning_rate": 9.556022408963587e-06, "loss": 33.8972, "step": 14084 }, { "epoch": 335.35820895522386, "grad_norm": 18.1431884765625, "learning_rate": 9.555322128851541e-06, "loss": 33.9726, "step": 14085 }, { "epoch": 335.3820895522388, "grad_norm": 16.836904525756836, "learning_rate": 9.554621848739497e-06, "loss": 34.3594, "step": 14086 }, { "epoch": 335.40597014925373, "grad_norm": 23.994831085205078, "learning_rate": 9.553921568627452e-06, "loss": 32.9725, "step": 14087 }, { "epoch": 335.42985074626864, "grad_norm": 19.052099227905273, "learning_rate": 9.553221288515408e-06, "loss": 32.6177, "step": 14088 }, { "epoch": 335.4537313432836, "grad_norm": 21.062023162841797, "learning_rate": 9.552521008403363e-06, "loss": 34.386, "step": 14089 }, { "epoch": 335.4776119402985, "grad_norm": 20.814306259155273, "learning_rate": 9.551820728291317e-06, "loss": 32.3291, "step": 14090 }, { "epoch": 335.5014925373134, "grad_norm": 18.172229766845703, "learning_rate": 9.551120448179273e-06, "loss": 34.0514, "step": 14091 }, { "epoch": 335.52537313432833, "grad_norm": 15.15807056427002, "learning_rate": 9.550420168067228e-06, "loss": 32.2528, "step": 14092 }, { "epoch": 335.5492537313433, "grad_norm": 23.364456176757812, "learning_rate": 9.549719887955184e-06, "loss": 32.6772, "step": 14093 }, { "epoch": 335.5731343283582, "grad_norm": 20.49326515197754, "learning_rate": 9.549019607843138e-06, "loss": 32.9553, "step": 14094 }, { "epoch": 335.5970149253731, "grad_norm": 18.637468338012695, "learning_rate": 9.548319327731095e-06, "loss": 33.2998, "step": 14095 }, { "epoch": 335.6208955223881, "grad_norm": 16.121950149536133, "learning_rate": 9.547619047619049e-06, "loss": 32.7043, "step": 14096 }, { "epoch": 335.644776119403, "grad_norm": 15.5372314453125, "learning_rate": 9.546918767507003e-06, "loss": 33.3733, "step": 14097 }, { "epoch": 335.6686567164179, "grad_norm": 22.66282844543457, "learning_rate": 9.54621848739496e-06, "loss": 32.5467, "step": 14098 }, { "epoch": 335.6925373134328, "grad_norm": 17.79058265686035, "learning_rate": 9.545518207282914e-06, "loss": 31.6198, "step": 14099 }, { "epoch": 335.7164179104478, "grad_norm": 16.973054885864258, "learning_rate": 9.54481792717087e-06, "loss": 33.6689, "step": 14100 }, { "epoch": 335.7402985074627, "grad_norm": 17.641132354736328, "learning_rate": 9.544117647058825e-06, "loss": 32.5873, "step": 14101 }, { "epoch": 335.7641791044776, "grad_norm": 19.078208923339844, "learning_rate": 9.54341736694678e-06, "loss": 32.9321, "step": 14102 }, { "epoch": 335.78805970149256, "grad_norm": 16.99062156677246, "learning_rate": 9.542717086834735e-06, "loss": 33.0054, "step": 14103 }, { "epoch": 335.81194029850747, "grad_norm": 15.178858757019043, "learning_rate": 9.54201680672269e-06, "loss": 32.666, "step": 14104 }, { "epoch": 335.8358208955224, "grad_norm": 25.96713638305664, "learning_rate": 9.541316526610646e-06, "loss": 33.3611, "step": 14105 }, { "epoch": 335.85970149253734, "grad_norm": 19.31339454650879, "learning_rate": 9.5406162464986e-06, "loss": 34.1691, "step": 14106 }, { "epoch": 335.88358208955225, "grad_norm": 14.538851737976074, "learning_rate": 9.539915966386557e-06, "loss": 32.2317, "step": 14107 }, { "epoch": 335.90746268656716, "grad_norm": 20.270048141479492, "learning_rate": 9.539215686274511e-06, "loss": 31.934, "step": 14108 }, { "epoch": 335.93134328358207, "grad_norm": 15.543272972106934, "learning_rate": 9.538515406162466e-06, "loss": 32.8392, "step": 14109 }, { "epoch": 335.95522388059703, "grad_norm": 16.131620407104492, "learning_rate": 9.537815126050422e-06, "loss": 32.7216, "step": 14110 }, { "epoch": 335.97910447761194, "grad_norm": 17.379873275756836, "learning_rate": 9.537114845938376e-06, "loss": 33.9147, "step": 14111 }, { "epoch": 336.0, "grad_norm": 17.886241912841797, "learning_rate": 9.536414565826332e-06, "loss": 28.57, "step": 14112 }, { "epoch": 336.0238805970149, "grad_norm": 16.74209213256836, "learning_rate": 9.535714285714287e-06, "loss": 33.6038, "step": 14113 }, { "epoch": 336.0477611940299, "grad_norm": 18.454927444458008, "learning_rate": 9.535014005602243e-06, "loss": 33.8839, "step": 14114 }, { "epoch": 336.0716417910448, "grad_norm": 17.335752487182617, "learning_rate": 9.534313725490198e-06, "loss": 32.6463, "step": 14115 }, { "epoch": 336.0955223880597, "grad_norm": 14.936349868774414, "learning_rate": 9.533613445378152e-06, "loss": 32.6998, "step": 14116 }, { "epoch": 336.1194029850746, "grad_norm": 20.028823852539062, "learning_rate": 9.532913165266108e-06, "loss": 32.6714, "step": 14117 }, { "epoch": 336.14328358208957, "grad_norm": 19.465471267700195, "learning_rate": 9.532212885154063e-06, "loss": 32.4848, "step": 14118 }, { "epoch": 336.1671641791045, "grad_norm": 15.841813087463379, "learning_rate": 9.531512605042019e-06, "loss": 33.5726, "step": 14119 }, { "epoch": 336.1910447761194, "grad_norm": 20.18979835510254, "learning_rate": 9.530812324929972e-06, "loss": 33.4828, "step": 14120 }, { "epoch": 336.21492537313435, "grad_norm": 19.321622848510742, "learning_rate": 9.530112044817928e-06, "loss": 33.9114, "step": 14121 }, { "epoch": 336.23880597014926, "grad_norm": 20.468835830688477, "learning_rate": 9.529411764705882e-06, "loss": 34.7023, "step": 14122 }, { "epoch": 336.26268656716417, "grad_norm": 19.255521774291992, "learning_rate": 9.528711484593838e-06, "loss": 32.7568, "step": 14123 }, { "epoch": 336.28656716417913, "grad_norm": 15.941425323486328, "learning_rate": 9.528011204481793e-06, "loss": 32.7218, "step": 14124 }, { "epoch": 336.31044776119404, "grad_norm": 15.645880699157715, "learning_rate": 9.527310924369747e-06, "loss": 32.4347, "step": 14125 }, { "epoch": 336.33432835820895, "grad_norm": 18.97342872619629, "learning_rate": 9.526610644257703e-06, "loss": 32.9201, "step": 14126 }, { "epoch": 336.35820895522386, "grad_norm": 17.18836212158203, "learning_rate": 9.525910364145658e-06, "loss": 33.3319, "step": 14127 }, { "epoch": 336.3820895522388, "grad_norm": 21.085561752319336, "learning_rate": 9.525210084033614e-06, "loss": 33.0258, "step": 14128 }, { "epoch": 336.40597014925373, "grad_norm": 23.493581771850586, "learning_rate": 9.524509803921569e-06, "loss": 32.7748, "step": 14129 }, { "epoch": 336.42985074626864, "grad_norm": 23.530038833618164, "learning_rate": 9.523809523809525e-06, "loss": 32.6311, "step": 14130 }, { "epoch": 336.4537313432836, "grad_norm": 15.708246231079102, "learning_rate": 9.52310924369748e-06, "loss": 32.34, "step": 14131 }, { "epoch": 336.4776119402985, "grad_norm": 16.440576553344727, "learning_rate": 9.522408963585434e-06, "loss": 30.9526, "step": 14132 }, { "epoch": 336.5014925373134, "grad_norm": 19.99802017211914, "learning_rate": 9.52170868347339e-06, "loss": 34.0244, "step": 14133 }, { "epoch": 336.52537313432833, "grad_norm": 14.879878997802734, "learning_rate": 9.521008403361344e-06, "loss": 32.7524, "step": 14134 }, { "epoch": 336.5492537313433, "grad_norm": 18.270427703857422, "learning_rate": 9.5203081232493e-06, "loss": 32.9277, "step": 14135 }, { "epoch": 336.5731343283582, "grad_norm": 15.599655151367188, "learning_rate": 9.519607843137255e-06, "loss": 33.0719, "step": 14136 }, { "epoch": 336.5970149253731, "grad_norm": 20.563583374023438, "learning_rate": 9.518907563025211e-06, "loss": 32.2879, "step": 14137 }, { "epoch": 336.6208955223881, "grad_norm": 19.32246208190918, "learning_rate": 9.518207282913166e-06, "loss": 33.6501, "step": 14138 }, { "epoch": 336.644776119403, "grad_norm": 17.838897705078125, "learning_rate": 9.51750700280112e-06, "loss": 32.6958, "step": 14139 }, { "epoch": 336.6686567164179, "grad_norm": 17.472997665405273, "learning_rate": 9.516806722689076e-06, "loss": 31.9068, "step": 14140 }, { "epoch": 336.6925373134328, "grad_norm": 17.576868057250977, "learning_rate": 9.51610644257703e-06, "loss": 33.8977, "step": 14141 }, { "epoch": 336.7164179104478, "grad_norm": 17.706974029541016, "learning_rate": 9.515406162464987e-06, "loss": 32.6955, "step": 14142 }, { "epoch": 336.7402985074627, "grad_norm": 19.88766098022461, "learning_rate": 9.514705882352941e-06, "loss": 31.2888, "step": 14143 }, { "epoch": 336.7641791044776, "grad_norm": 18.691804885864258, "learning_rate": 9.514005602240896e-06, "loss": 32.2483, "step": 14144 }, { "epoch": 336.78805970149256, "grad_norm": 14.847087860107422, "learning_rate": 9.513305322128852e-06, "loss": 34.7386, "step": 14145 }, { "epoch": 336.81194029850747, "grad_norm": 16.232799530029297, "learning_rate": 9.512605042016806e-06, "loss": 31.5283, "step": 14146 }, { "epoch": 336.8358208955224, "grad_norm": 17.496410369873047, "learning_rate": 9.511904761904763e-06, "loss": 32.8851, "step": 14147 }, { "epoch": 336.85970149253734, "grad_norm": 16.615825653076172, "learning_rate": 9.511204481792717e-06, "loss": 32.3683, "step": 14148 }, { "epoch": 336.88358208955225, "grad_norm": NaN, "learning_rate": 9.510504201680673e-06, "loss": 59.2316, "step": 14149 }, { "epoch": 336.90746268656716, "grad_norm": 15.206809043884277, "learning_rate": 9.510504201680673e-06, "loss": 32.6558, "step": 14150 }, { "epoch": 336.93134328358207, "grad_norm": 18.026573181152344, "learning_rate": 9.509803921568628e-06, "loss": 34.1254, "step": 14151 }, { "epoch": 336.95522388059703, "grad_norm": 20.462858200073242, "learning_rate": 9.509103641456582e-06, "loss": 32.6371, "step": 14152 }, { "epoch": 336.97910447761194, "grad_norm": 18.992738723754883, "learning_rate": 9.508403361344538e-06, "loss": 33.2755, "step": 14153 }, { "epoch": 337.0, "grad_norm": 14.200139999389648, "learning_rate": 9.507703081232493e-06, "loss": 29.2591, "step": 14154 }, { "epoch": 337.0238805970149, "grad_norm": 14.875282287597656, "learning_rate": 9.507002801120449e-06, "loss": 32.4425, "step": 14155 }, { "epoch": 337.0477611940299, "grad_norm": 17.062175750732422, "learning_rate": 9.506302521008403e-06, "loss": 32.4821, "step": 14156 }, { "epoch": 337.0716417910448, "grad_norm": 19.384296417236328, "learning_rate": 9.50560224089636e-06, "loss": 32.3341, "step": 14157 }, { "epoch": 337.0955223880597, "grad_norm": 18.337078094482422, "learning_rate": 9.504901960784314e-06, "loss": 33.4632, "step": 14158 }, { "epoch": 337.1194029850746, "grad_norm": 17.43202781677246, "learning_rate": 9.504201680672269e-06, "loss": 33.616, "step": 14159 }, { "epoch": 337.14328358208957, "grad_norm": 15.656824111938477, "learning_rate": 9.503501400560225e-06, "loss": 32.8101, "step": 14160 }, { "epoch": 337.1671641791045, "grad_norm": 15.518818855285645, "learning_rate": 9.50280112044818e-06, "loss": 32.4492, "step": 14161 }, { "epoch": 337.1910447761194, "grad_norm": 16.95665168762207, "learning_rate": 9.502100840336135e-06, "loss": 32.6265, "step": 14162 }, { "epoch": 337.21492537313435, "grad_norm": 20.434589385986328, "learning_rate": 9.50140056022409e-06, "loss": 33.026, "step": 14163 }, { "epoch": 337.23880597014926, "grad_norm": 16.341533660888672, "learning_rate": 9.500700280112044e-06, "loss": 32.0338, "step": 14164 }, { "epoch": 337.26268656716417, "grad_norm": 17.249235153198242, "learning_rate": 9.5e-06, "loss": 32.2044, "step": 14165 }, { "epoch": 337.28656716417913, "grad_norm": 24.686254501342773, "learning_rate": 9.499299719887955e-06, "loss": 32.7926, "step": 14166 }, { "epoch": 337.31044776119404, "grad_norm": 21.519001007080078, "learning_rate": 9.498599439775911e-06, "loss": 32.7667, "step": 14167 }, { "epoch": 337.33432835820895, "grad_norm": 18.206024169921875, "learning_rate": 9.497899159663866e-06, "loss": 33.5748, "step": 14168 }, { "epoch": 337.35820895522386, "grad_norm": 18.19744110107422, "learning_rate": 9.497198879551822e-06, "loss": 32.5232, "step": 14169 }, { "epoch": 337.3820895522388, "grad_norm": 27.09054183959961, "learning_rate": 9.496498599439776e-06, "loss": 33.1358, "step": 14170 }, { "epoch": 337.40597014925373, "grad_norm": 14.919368743896484, "learning_rate": 9.49579831932773e-06, "loss": 33.5639, "step": 14171 }, { "epoch": 337.42985074626864, "grad_norm": 24.887348175048828, "learning_rate": 9.495098039215687e-06, "loss": 32.4732, "step": 14172 }, { "epoch": 337.4537313432836, "grad_norm": 21.802900314331055, "learning_rate": 9.494397759103641e-06, "loss": 32.8481, "step": 14173 }, { "epoch": 337.4776119402985, "grad_norm": 17.48831558227539, "learning_rate": 9.493697478991598e-06, "loss": 32.9001, "step": 14174 }, { "epoch": 337.5014925373134, "grad_norm": 29.25938606262207, "learning_rate": 9.492997198879552e-06, "loss": 32.6143, "step": 14175 }, { "epoch": 337.52537313432833, "grad_norm": 18.84782600402832, "learning_rate": 9.492296918767508e-06, "loss": 32.6812, "step": 14176 }, { "epoch": 337.5492537313433, "grad_norm": 26.44182586669922, "learning_rate": 9.491596638655463e-06, "loss": 32.6849, "step": 14177 }, { "epoch": 337.5731343283582, "grad_norm": 19.890064239501953, "learning_rate": 9.490896358543417e-06, "loss": 32.6251, "step": 14178 }, { "epoch": 337.5970149253731, "grad_norm": 24.663236618041992, "learning_rate": 9.490196078431373e-06, "loss": 33.1442, "step": 14179 }, { "epoch": 337.6208955223881, "grad_norm": 21.09729766845703, "learning_rate": 9.489495798319328e-06, "loss": 32.1798, "step": 14180 }, { "epoch": 337.644776119403, "grad_norm": 24.468088150024414, "learning_rate": 9.488795518207284e-06, "loss": 33.5802, "step": 14181 }, { "epoch": 337.6686567164179, "grad_norm": 19.231128692626953, "learning_rate": 9.488095238095238e-06, "loss": 32.989, "step": 14182 }, { "epoch": 337.6925373134328, "grad_norm": 28.940576553344727, "learning_rate": 9.487394957983193e-06, "loss": 34.0419, "step": 14183 }, { "epoch": 337.7164179104478, "grad_norm": 21.28771209716797, "learning_rate": 9.486694677871149e-06, "loss": 33.7646, "step": 14184 }, { "epoch": 337.7402985074627, "grad_norm": 31.944252014160156, "learning_rate": 9.485994397759104e-06, "loss": 33.3507, "step": 14185 }, { "epoch": 337.7641791044776, "grad_norm": 28.118261337280273, "learning_rate": 9.48529411764706e-06, "loss": 34.0282, "step": 14186 }, { "epoch": 337.78805970149256, "grad_norm": 27.06432342529297, "learning_rate": 9.484593837535014e-06, "loss": 31.9742, "step": 14187 }, { "epoch": 337.81194029850747, "grad_norm": 24.204050064086914, "learning_rate": 9.48389355742297e-06, "loss": 33.7301, "step": 14188 }, { "epoch": 337.8358208955224, "grad_norm": 22.309860229492188, "learning_rate": 9.483193277310925e-06, "loss": 34.0992, "step": 14189 }, { "epoch": 337.85970149253734, "grad_norm": 27.0042724609375, "learning_rate": 9.48249299719888e-06, "loss": 32.8996, "step": 14190 }, { "epoch": 337.88358208955225, "grad_norm": 20.163787841796875, "learning_rate": 9.481792717086835e-06, "loss": 31.9279, "step": 14191 }, { "epoch": 337.90746268656716, "grad_norm": 26.834156036376953, "learning_rate": 9.48109243697479e-06, "loss": 33.403, "step": 14192 }, { "epoch": 337.93134328358207, "grad_norm": 19.65085792541504, "learning_rate": 9.480392156862746e-06, "loss": 32.299, "step": 14193 }, { "epoch": 337.95522388059703, "grad_norm": 24.564346313476562, "learning_rate": 9.4796918767507e-06, "loss": 32.4174, "step": 14194 }, { "epoch": 337.97910447761194, "grad_norm": 25.409894943237305, "learning_rate": 9.478991596638657e-06, "loss": 32.9881, "step": 14195 }, { "epoch": 338.0, "grad_norm": 15.833084106445312, "learning_rate": 9.478291316526611e-06, "loss": 29.332, "step": 14196 }, { "epoch": 338.0238805970149, "grad_norm": 26.590150833129883, "learning_rate": 9.477591036414566e-06, "loss": 33.5099, "step": 14197 }, { "epoch": 338.0477611940299, "grad_norm": 20.627695083618164, "learning_rate": 9.476890756302522e-06, "loss": 33.9682, "step": 14198 }, { "epoch": 338.0716417910448, "grad_norm": 22.874095916748047, "learning_rate": 9.476190476190476e-06, "loss": 32.7461, "step": 14199 }, { "epoch": 338.0955223880597, "grad_norm": 24.205812454223633, "learning_rate": 9.475490196078432e-06, "loss": 32.2976, "step": 14200 }, { "epoch": 338.1194029850746, "grad_norm": 19.102245330810547, "learning_rate": 9.474789915966387e-06, "loss": 33.6175, "step": 14201 }, { "epoch": 338.14328358208957, "grad_norm": 24.30849266052246, "learning_rate": 9.474089635854341e-06, "loss": 32.922, "step": 14202 }, { "epoch": 338.1671641791045, "grad_norm": 21.702083587646484, "learning_rate": 9.473389355742298e-06, "loss": 32.5274, "step": 14203 }, { "epoch": 338.1910447761194, "grad_norm": 18.384666442871094, "learning_rate": 9.472689075630252e-06, "loss": 33.8106, "step": 14204 }, { "epoch": 338.21492537313435, "grad_norm": 26.47401237487793, "learning_rate": 9.471988795518208e-06, "loss": 31.6716, "step": 14205 }, { "epoch": 338.23880597014926, "grad_norm": 21.23504638671875, "learning_rate": 9.471288515406163e-06, "loss": 32.9033, "step": 14206 }, { "epoch": 338.26268656716417, "grad_norm": 19.06757354736328, "learning_rate": 9.470588235294119e-06, "loss": 32.307, "step": 14207 }, { "epoch": 338.28656716417913, "grad_norm": 28.329666137695312, "learning_rate": 9.469887955182073e-06, "loss": 32.2881, "step": 14208 }, { "epoch": 338.31044776119404, "grad_norm": 15.802681922912598, "learning_rate": 9.469187675070028e-06, "loss": 31.7809, "step": 14209 }, { "epoch": 338.33432835820895, "grad_norm": 27.86570167541504, "learning_rate": 9.468487394957984e-06, "loss": 31.7268, "step": 14210 }, { "epoch": 338.35820895522386, "grad_norm": 19.854049682617188, "learning_rate": 9.467787114845938e-06, "loss": 32.9964, "step": 14211 }, { "epoch": 338.3820895522388, "grad_norm": 21.1588134765625, "learning_rate": 9.467086834733895e-06, "loss": 31.9979, "step": 14212 }, { "epoch": 338.40597014925373, "grad_norm": 28.05729103088379, "learning_rate": 9.466386554621849e-06, "loss": 33.3669, "step": 14213 }, { "epoch": 338.42985074626864, "grad_norm": 17.63733673095703, "learning_rate": 9.465686274509804e-06, "loss": 32.252, "step": 14214 }, { "epoch": 338.4537313432836, "grad_norm": 35.62979507446289, "learning_rate": 9.46498599439776e-06, "loss": 34.5511, "step": 14215 }, { "epoch": 338.4776119402985, "grad_norm": 21.031435012817383, "learning_rate": 9.464285714285714e-06, "loss": 32.5291, "step": 14216 }, { "epoch": 338.5014925373134, "grad_norm": 34.751930236816406, "learning_rate": 9.46358543417367e-06, "loss": 33.8158, "step": 14217 }, { "epoch": 338.52537313432833, "grad_norm": 22.33123779296875, "learning_rate": 9.462885154061625e-06, "loss": 32.7993, "step": 14218 }, { "epoch": 338.5492537313433, "grad_norm": 40.59553527832031, "learning_rate": 9.462184873949581e-06, "loss": 32.3558, "step": 14219 }, { "epoch": 338.5731343283582, "grad_norm": 30.890233993530273, "learning_rate": 9.461484593837535e-06, "loss": 32.699, "step": 14220 }, { "epoch": 338.5970149253731, "grad_norm": 40.78409194946289, "learning_rate": 9.46078431372549e-06, "loss": 32.6561, "step": 14221 }, { "epoch": 338.6208955223881, "grad_norm": 33.0892333984375, "learning_rate": 9.460084033613446e-06, "loss": 31.7217, "step": 14222 }, { "epoch": 338.644776119403, "grad_norm": 32.41324996948242, "learning_rate": 9.4593837535014e-06, "loss": 34.3416, "step": 14223 }, { "epoch": 338.6686567164179, "grad_norm": 32.27386474609375, "learning_rate": 9.458683473389357e-06, "loss": 33.208, "step": 14224 }, { "epoch": 338.6925373134328, "grad_norm": 32.213863372802734, "learning_rate": 9.457983193277311e-06, "loss": 33.9612, "step": 14225 }, { "epoch": 338.7164179104478, "grad_norm": 25.6570987701416, "learning_rate": 9.457282913165267e-06, "loss": 33.7285, "step": 14226 }, { "epoch": 338.7402985074627, "grad_norm": NaN, "learning_rate": 9.456582633053222e-06, "loss": 56.5918, "step": 14227 }, { "epoch": 338.7641791044776, "grad_norm": 31.062545776367188, "learning_rate": 9.456582633053222e-06, "loss": 32.0069, "step": 14228 }, { "epoch": 338.78805970149256, "grad_norm": 24.51154327392578, "learning_rate": 9.455882352941176e-06, "loss": 32.0931, "step": 14229 }, { "epoch": 338.81194029850747, "grad_norm": 39.734127044677734, "learning_rate": 9.455182072829132e-06, "loss": 33.1966, "step": 14230 }, { "epoch": 338.8358208955224, "grad_norm": 33.052085876464844, "learning_rate": 9.454481792717087e-06, "loss": 32.4541, "step": 14231 }, { "epoch": 338.85970149253734, "grad_norm": 37.13149642944336, "learning_rate": 9.453781512605043e-06, "loss": 33.1593, "step": 14232 }, { "epoch": 338.88358208955225, "grad_norm": 35.28886413574219, "learning_rate": 9.453081232492998e-06, "loss": 34.4264, "step": 14233 }, { "epoch": 338.90746268656716, "grad_norm": 26.909751892089844, "learning_rate": 9.452380952380952e-06, "loss": 33.0516, "step": 14234 }, { "epoch": 338.93134328358207, "grad_norm": 28.23269271850586, "learning_rate": 9.451680672268908e-06, "loss": 33.2262, "step": 14235 }, { "epoch": 338.95522388059703, "grad_norm": NaN, "learning_rate": 9.450980392156863e-06, "loss": 55.4614, "step": 14236 }, { "epoch": 338.97910447761194, "grad_norm": 31.025379180908203, "learning_rate": 9.450980392156863e-06, "loss": 32.6424, "step": 14237 }, { "epoch": 339.0, "grad_norm": 22.70488929748535, "learning_rate": 9.450280112044819e-06, "loss": 29.9982, "step": 14238 }, { "epoch": 339.0238805970149, "grad_norm": 36.64794158935547, "learning_rate": 9.449579831932773e-06, "loss": 32.7851, "step": 14239 }, { "epoch": 339.0477611940299, "grad_norm": 33.03408432006836, "learning_rate": 9.44887955182073e-06, "loss": 34.3438, "step": 14240 }, { "epoch": 339.0716417910448, "grad_norm": 32.48908996582031, "learning_rate": 9.448179271708684e-06, "loss": 32.5063, "step": 14241 }, { "epoch": 339.0955223880597, "grad_norm": 28.791791915893555, "learning_rate": 9.447478991596638e-06, "loss": 32.1059, "step": 14242 }, { "epoch": 339.1194029850746, "grad_norm": 34.80320358276367, "learning_rate": 9.446778711484595e-06, "loss": 33.997, "step": 14243 }, { "epoch": 339.14328358208957, "grad_norm": 29.57621955871582, "learning_rate": 9.446078431372549e-06, "loss": 31.8533, "step": 14244 }, { "epoch": 339.1671641791045, "grad_norm": 35.61138916015625, "learning_rate": 9.445378151260505e-06, "loss": 33.6139, "step": 14245 }, { "epoch": 339.1910447761194, "grad_norm": 34.38545608520508, "learning_rate": 9.44467787114846e-06, "loss": 33.6592, "step": 14246 }, { "epoch": 339.21492537313435, "grad_norm": 26.435632705688477, "learning_rate": 9.443977591036416e-06, "loss": 32.1824, "step": 14247 }, { "epoch": 339.23880597014926, "grad_norm": 27.746368408203125, "learning_rate": 9.44327731092437e-06, "loss": 32.9943, "step": 14248 }, { "epoch": 339.26268656716417, "grad_norm": 33.04074478149414, "learning_rate": 9.442577030812325e-06, "loss": 33.0518, "step": 14249 }, { "epoch": 339.28656716417913, "grad_norm": 23.841264724731445, "learning_rate": 9.441876750700281e-06, "loss": 32.3814, "step": 14250 }, { "epoch": 339.31044776119404, "grad_norm": 39.37582778930664, "learning_rate": 9.441176470588235e-06, "loss": 33.3083, "step": 14251 }, { "epoch": 339.33432835820895, "grad_norm": 34.970680236816406, "learning_rate": 9.440476190476192e-06, "loss": 31.2412, "step": 14252 }, { "epoch": 339.35820895522386, "grad_norm": 30.473628997802734, "learning_rate": 9.439775910364146e-06, "loss": 33.0853, "step": 14253 }, { "epoch": 339.3820895522388, "grad_norm": 31.16111946105957, "learning_rate": 9.4390756302521e-06, "loss": 32.9067, "step": 14254 }, { "epoch": 339.40597014925373, "grad_norm": 30.015321731567383, "learning_rate": 9.438375350140057e-06, "loss": 33.0965, "step": 14255 }, { "epoch": 339.42985074626864, "grad_norm": 24.88060188293457, "learning_rate": 9.437675070028011e-06, "loss": 33.2003, "step": 14256 }, { "epoch": 339.4537313432836, "grad_norm": 36.02987289428711, "learning_rate": 9.436974789915967e-06, "loss": 33.1847, "step": 14257 }, { "epoch": 339.4776119402985, "grad_norm": 29.7424259185791, "learning_rate": 9.436274509803922e-06, "loss": 32.0017, "step": 14258 }, { "epoch": 339.5014925373134, "grad_norm": 33.275089263916016, "learning_rate": 9.435574229691878e-06, "loss": 32.6896, "step": 14259 }, { "epoch": 339.52537313432833, "grad_norm": 28.949687957763672, "learning_rate": 9.434873949579833e-06, "loss": 31.5255, "step": 14260 }, { "epoch": 339.5492537313433, "grad_norm": 31.03632926940918, "learning_rate": 9.434173669467787e-06, "loss": 31.9781, "step": 14261 }, { "epoch": 339.5731343283582, "grad_norm": 28.23992919921875, "learning_rate": 9.433473389355743e-06, "loss": 31.3936, "step": 14262 }, { "epoch": 339.5970149253731, "grad_norm": 29.979907989501953, "learning_rate": 9.432773109243698e-06, "loss": 33.0441, "step": 14263 }, { "epoch": 339.6208955223881, "grad_norm": 24.1731014251709, "learning_rate": 9.432072829131654e-06, "loss": 33.505, "step": 14264 }, { "epoch": 339.644776119403, "grad_norm": 31.158857345581055, "learning_rate": 9.431372549019608e-06, "loss": 32.4075, "step": 14265 }, { "epoch": 339.6686567164179, "grad_norm": 24.861671447753906, "learning_rate": 9.430672268907564e-06, "loss": 32.0919, "step": 14266 }, { "epoch": 339.6925373134328, "grad_norm": 36.93232727050781, "learning_rate": 9.429971988795519e-06, "loss": 33.1194, "step": 14267 }, { "epoch": 339.7164179104478, "grad_norm": 34.79704284667969, "learning_rate": 9.429271708683473e-06, "loss": 33.9816, "step": 14268 }, { "epoch": 339.7402985074627, "grad_norm": 24.96257209777832, "learning_rate": 9.42857142857143e-06, "loss": 32.5782, "step": 14269 }, { "epoch": 339.7641791044776, "grad_norm": 26.007413864135742, "learning_rate": 9.427871148459384e-06, "loss": 32.541, "step": 14270 }, { "epoch": 339.78805970149256, "grad_norm": 29.220415115356445, "learning_rate": 9.42717086834734e-06, "loss": 31.3541, "step": 14271 }, { "epoch": 339.81194029850747, "grad_norm": 21.893226623535156, "learning_rate": 9.426470588235295e-06, "loss": 33.4779, "step": 14272 }, { "epoch": 339.8358208955224, "grad_norm": 38.02252197265625, "learning_rate": 9.425770308123249e-06, "loss": 33.7362, "step": 14273 }, { "epoch": 339.85970149253734, "grad_norm": 31.56594467163086, "learning_rate": 9.425070028011205e-06, "loss": 33.641, "step": 14274 }, { "epoch": 339.88358208955225, "grad_norm": 28.384130477905273, "learning_rate": 9.42436974789916e-06, "loss": 32.6683, "step": 14275 }, { "epoch": 339.90746268656716, "grad_norm": 26.01172637939453, "learning_rate": 9.423669467787116e-06, "loss": 33.7216, "step": 14276 }, { "epoch": 339.93134328358207, "grad_norm": 29.72218894958496, "learning_rate": 9.42296918767507e-06, "loss": 32.7691, "step": 14277 }, { "epoch": 339.95522388059703, "grad_norm": 25.04932403564453, "learning_rate": 9.422268907563027e-06, "loss": 33.1087, "step": 14278 }, { "epoch": 339.97910447761194, "grad_norm": 35.899986267089844, "learning_rate": 9.421568627450981e-06, "loss": 33.5594, "step": 14279 }, { "epoch": 340.0, "grad_norm": 26.640682220458984, "learning_rate": 9.420868347338936e-06, "loss": 29.6241, "step": 14280 }, { "epoch": 340.0, "step": 14280, "total_flos": 7.019919021570625e+17, "train_loss": 1.9593938636512649, "train_runtime": 25678.8464, "train_samples_per_second": 70.863, "train_steps_per_second": 0.556 }, { "epoch": 340.0238805970149, "grad_norm": 27.959510803222656, "learning_rate": 1e-05, "loss": 32.6037, "step": 14281 }, { "epoch": 340.0477611940299, "grad_norm": Infinity, "learning_rate": 9.999338624338626e-06, "loss": 40.8788, "step": 14282 }, { "epoch": 340.0716417910448, "grad_norm": Infinity, "learning_rate": 9.999338624338626e-06, "loss": 39.7832, "step": 14283 }, { "epoch": 340.0955223880597, "grad_norm": 477.1483459472656, "learning_rate": 9.999338624338626e-06, "loss": 40.5832, "step": 14284 }, { "epoch": 340.1194029850746, "grad_norm": 299.5267028808594, "learning_rate": 9.99867724867725e-06, "loss": 38.0195, "step": 14285 }, { "epoch": 340.14328358208957, "grad_norm": 93.81217956542969, "learning_rate": 9.998015873015874e-06, "loss": 35.5388, "step": 14286 }, { "epoch": 340.1671641791045, "grad_norm": 118.56687927246094, "learning_rate": 9.997354497354498e-06, "loss": 35.5971, "step": 14287 }, { "epoch": 340.1910447761194, "grad_norm": 92.6313705444336, "learning_rate": 9.996693121693123e-06, "loss": 33.6818, "step": 14288 }, { "epoch": 340.21492537313435, "grad_norm": 62.361122131347656, "learning_rate": 9.996031746031746e-06, "loss": 32.9934, "step": 14289 }, { "epoch": 340.23880597014926, "grad_norm": 43.54608917236328, "learning_rate": 9.995370370370371e-06, "loss": 34.6618, "step": 14290 }, { "epoch": 340.26268656716417, "grad_norm": 38.393516540527344, "learning_rate": 9.994708994708996e-06, "loss": 32.9076, "step": 14291 }, { "epoch": 340.28656716417913, "grad_norm": 31.553220748901367, "learning_rate": 9.99404761904762e-06, "loss": 32.6114, "step": 14292 }, { "epoch": 340.31044776119404, "grad_norm": 30.89167022705078, "learning_rate": 9.993386243386244e-06, "loss": 34.1163, "step": 14293 }, { "epoch": 340.33432835820895, "grad_norm": 27.52607536315918, "learning_rate": 9.992724867724869e-06, "loss": 32.6266, "step": 14294 }, { "epoch": 340.35820895522386, "grad_norm": 26.616046905517578, "learning_rate": 9.992063492063493e-06, "loss": 33.7885, "step": 14295 }, { "epoch": 340.3820895522388, "grad_norm": 26.06792640686035, "learning_rate": 9.991402116402116e-06, "loss": 31.6442, "step": 14296 }, { "epoch": 340.40597014925373, "grad_norm": 20.864423751831055, "learning_rate": 9.990740740740741e-06, "loss": 33.6534, "step": 14297 }, { "epoch": 340.42985074626864, "grad_norm": 18.44462013244629, "learning_rate": 9.990079365079366e-06, "loss": 32.3902, "step": 14298 }, { "epoch": 340.4537313432836, "grad_norm": 22.97857666015625, "learning_rate": 9.989417989417989e-06, "loss": 33.2743, "step": 14299 }, { "epoch": 340.4776119402985, "grad_norm": 24.830507278442383, "learning_rate": 9.988756613756616e-06, "loss": 33.1315, "step": 14300 }, { "epoch": 340.5014925373134, "grad_norm": 18.54153060913086, "learning_rate": 9.988095238095239e-06, "loss": 33.2882, "step": 14301 }, { "epoch": 340.52537313432833, "grad_norm": 24.46211051940918, "learning_rate": 9.987433862433864e-06, "loss": 33.7494, "step": 14302 }, { "epoch": 340.5492537313433, "grad_norm": 19.64615821838379, "learning_rate": 9.986772486772488e-06, "loss": 33.2569, "step": 14303 }, { "epoch": 340.5731343283582, "grad_norm": 20.825637817382812, "learning_rate": 9.986111111111111e-06, "loss": 33.0892, "step": 14304 }, { "epoch": 340.5970149253731, "grad_norm": 15.84910774230957, "learning_rate": 9.985449735449736e-06, "loss": 33.0573, "step": 14305 }, { "epoch": 340.6208955223881, "grad_norm": 21.941417694091797, "learning_rate": 9.984788359788361e-06, "loss": 33.7433, "step": 14306 }, { "epoch": 340.644776119403, "grad_norm": 18.16843605041504, "learning_rate": 9.984126984126986e-06, "loss": 32.7713, "step": 14307 }, { "epoch": 340.6686567164179, "grad_norm": 19.012653350830078, "learning_rate": 9.983465608465609e-06, "loss": 33.2743, "step": 14308 }, { "epoch": 340.6925373134328, "grad_norm": 17.871700286865234, "learning_rate": 9.982804232804234e-06, "loss": 33.5691, "step": 14309 }, { "epoch": 340.7164179104478, "grad_norm": 17.754318237304688, "learning_rate": 9.982142857142858e-06, "loss": 32.1907, "step": 14310 }, { "epoch": 340.7402985074627, "grad_norm": 17.912324905395508, "learning_rate": 9.981481481481482e-06, "loss": 33.3217, "step": 14311 }, { "epoch": 340.7641791044776, "grad_norm": 17.187820434570312, "learning_rate": 9.980820105820106e-06, "loss": 32.7716, "step": 14312 }, { "epoch": 340.78805970149256, "grad_norm": 21.306100845336914, "learning_rate": 9.980158730158731e-06, "loss": 32.4835, "step": 14313 }, { "epoch": 340.81194029850747, "grad_norm": 19.078615188598633, "learning_rate": 9.979497354497354e-06, "loss": 33.5441, "step": 14314 }, { "epoch": 340.8358208955224, "grad_norm": 17.367889404296875, "learning_rate": 9.97883597883598e-06, "loss": 32.7492, "step": 14315 }, { "epoch": 340.85970149253734, "grad_norm": 18.947738647460938, "learning_rate": 9.978174603174604e-06, "loss": 33.1542, "step": 14316 }, { "epoch": 340.88358208955225, "grad_norm": 22.891326904296875, "learning_rate": 9.977513227513229e-06, "loss": 32.932, "step": 14317 }, { "epoch": 340.90746268656716, "grad_norm": 15.350235939025879, "learning_rate": 9.976851851851853e-06, "loss": 32.6713, "step": 14318 }, { "epoch": 340.93134328358207, "grad_norm": 26.475204467773438, "learning_rate": 9.976190476190477e-06, "loss": 33.5343, "step": 14319 }, { "epoch": 340.95522388059703, "grad_norm": 21.695802688598633, "learning_rate": 9.975529100529101e-06, "loss": 32.3089, "step": 14320 }, { "epoch": 340.97910447761194, "grad_norm": 20.613195419311523, "learning_rate": 9.974867724867726e-06, "loss": 33.2205, "step": 14321 }, { "epoch": 341.0, "grad_norm": 14.636250495910645, "learning_rate": 9.97420634920635e-06, "loss": 28.0786, "step": 14322 }, { "epoch": 341.0238805970149, "grad_norm": 20.600358963012695, "learning_rate": 9.973544973544974e-06, "loss": 32.294, "step": 14323 }, { "epoch": 341.0477611940299, "grad_norm": 16.827571868896484, "learning_rate": 9.972883597883599e-06, "loss": 32.5052, "step": 14324 }, { "epoch": 341.0716417910448, "grad_norm": 20.91819953918457, "learning_rate": 9.972222222222224e-06, "loss": 33.0947, "step": 14325 }, { "epoch": 341.0955223880597, "grad_norm": 21.71983528137207, "learning_rate": 9.971560846560847e-06, "loss": 32.3301, "step": 14326 }, { "epoch": 341.1194029850746, "grad_norm": 17.501211166381836, "learning_rate": 9.970899470899472e-06, "loss": 33.0806, "step": 14327 }, { "epoch": 341.14328358208957, "grad_norm": 21.825477600097656, "learning_rate": 9.970238095238096e-06, "loss": 33.318, "step": 14328 }, { "epoch": 341.1671641791045, "grad_norm": 19.944629669189453, "learning_rate": 9.96957671957672e-06, "loss": 32.1097, "step": 14329 }, { "epoch": 341.1910447761194, "grad_norm": 20.726354598999023, "learning_rate": 9.968915343915346e-06, "loss": 32.8561, "step": 14330 }, { "epoch": 341.21492537313435, "grad_norm": 15.42965030670166, "learning_rate": 9.968253968253969e-06, "loss": 34.1319, "step": 14331 }, { "epoch": 341.23880597014926, "grad_norm": 19.265260696411133, "learning_rate": 9.967592592592594e-06, "loss": 33.0066, "step": 14332 }, { "epoch": 341.26268656716417, "grad_norm": 15.428890228271484, "learning_rate": 9.966931216931219e-06, "loss": 30.7677, "step": 14333 }, { "epoch": 341.28656716417913, "grad_norm": 17.121864318847656, "learning_rate": 9.966269841269842e-06, "loss": 32.5364, "step": 14334 }, { "epoch": 341.31044776119404, "grad_norm": 17.536283493041992, "learning_rate": 9.965608465608467e-06, "loss": 34.0252, "step": 14335 }, { "epoch": 341.33432835820895, "grad_norm": 18.656221389770508, "learning_rate": 9.964947089947091e-06, "loss": 33.532, "step": 14336 }, { "epoch": 341.35820895522386, "grad_norm": 18.07448387145996, "learning_rate": 9.964285714285714e-06, "loss": 32.296, "step": 14337 }, { "epoch": 341.3820895522388, "grad_norm": 15.17371654510498, "learning_rate": 9.96362433862434e-06, "loss": 33.5718, "step": 14338 }, { "epoch": 341.40597014925373, "grad_norm": 21.652860641479492, "learning_rate": 9.962962962962964e-06, "loss": 33.834, "step": 14339 }, { "epoch": 341.42985074626864, "grad_norm": 20.939512252807617, "learning_rate": 9.962301587301589e-06, "loss": 31.4863, "step": 14340 }, { "epoch": 341.4537313432836, "grad_norm": 16.739261627197266, "learning_rate": 9.961640211640212e-06, "loss": 31.4979, "step": 14341 }, { "epoch": 341.4776119402985, "grad_norm": 15.421792984008789, "learning_rate": 9.960978835978837e-06, "loss": 33.6466, "step": 14342 }, { "epoch": 341.5014925373134, "grad_norm": 17.82432746887207, "learning_rate": 9.960317460317462e-06, "loss": 33.2464, "step": 14343 }, { "epoch": 341.52537313432833, "grad_norm": 20.29499053955078, "learning_rate": 9.959656084656085e-06, "loss": 32.4114, "step": 14344 }, { "epoch": 341.5492537313433, "grad_norm": 16.78494644165039, "learning_rate": 9.958994708994711e-06, "loss": 32.0153, "step": 14345 }, { "epoch": 341.5731343283582, "grad_norm": 17.644960403442383, "learning_rate": 9.958333333333334e-06, "loss": 32.7187, "step": 14346 }, { "epoch": 341.5970149253731, "grad_norm": 14.848749160766602, "learning_rate": 9.957671957671959e-06, "loss": 33.4641, "step": 14347 }, { "epoch": 341.6208955223881, "grad_norm": 16.474485397338867, "learning_rate": 9.957010582010584e-06, "loss": 33.8312, "step": 14348 }, { "epoch": 341.644776119403, "grad_norm": 18.354982376098633, "learning_rate": 9.956349206349207e-06, "loss": 33.069, "step": 14349 }, { "epoch": 341.6686567164179, "grad_norm": 15.309164047241211, "learning_rate": 9.955687830687832e-06, "loss": 31.9663, "step": 14350 }, { "epoch": 341.6925373134328, "grad_norm": 16.006591796875, "learning_rate": 9.955026455026457e-06, "loss": 32.8982, "step": 14351 }, { "epoch": 341.7164179104478, "grad_norm": 16.061914443969727, "learning_rate": 9.95436507936508e-06, "loss": 32.4113, "step": 14352 }, { "epoch": 341.7402985074627, "grad_norm": 22.423648834228516, "learning_rate": 9.953703703703704e-06, "loss": 33.2202, "step": 14353 }, { "epoch": 341.7641791044776, "grad_norm": 15.490160942077637, "learning_rate": 9.95304232804233e-06, "loss": 33.1065, "step": 14354 }, { "epoch": 341.78805970149256, "grad_norm": 18.584867477416992, "learning_rate": 9.952380952380954e-06, "loss": 34.0665, "step": 14355 }, { "epoch": 341.81194029850747, "grad_norm": 16.091171264648438, "learning_rate": 9.951719576719577e-06, "loss": 33.7751, "step": 14356 }, { "epoch": 341.8358208955224, "grad_norm": 19.1419734954834, "learning_rate": 9.951058201058202e-06, "loss": 32.8372, "step": 14357 }, { "epoch": 341.85970149253734, "grad_norm": 16.991975784301758, "learning_rate": 9.950396825396827e-06, "loss": 32.4311, "step": 14358 }, { "epoch": 341.88358208955225, "grad_norm": 21.18120765686035, "learning_rate": 9.94973544973545e-06, "loss": 33.2953, "step": 14359 }, { "epoch": 341.90746268656716, "grad_norm": 19.203752517700195, "learning_rate": 9.949074074074075e-06, "loss": 32.4372, "step": 14360 }, { "epoch": 341.93134328358207, "grad_norm": 19.6485595703125, "learning_rate": 9.9484126984127e-06, "loss": 32.4662, "step": 14361 }, { "epoch": 341.95522388059703, "grad_norm": 15.468148231506348, "learning_rate": 9.947751322751323e-06, "loss": 32.9375, "step": 14362 }, { "epoch": 341.97910447761194, "grad_norm": 15.480298042297363, "learning_rate": 9.947089947089947e-06, "loss": 32.8921, "step": 14363 }, { "epoch": 342.0, "grad_norm": 13.073140144348145, "learning_rate": 9.946428571428572e-06, "loss": 27.3888, "step": 14364 }, { "epoch": 342.0238805970149, "grad_norm": 21.75962257385254, "learning_rate": 9.945767195767197e-06, "loss": 33.6421, "step": 14365 }, { "epoch": 342.0477611940299, "grad_norm": 18.32163429260254, "learning_rate": 9.94510582010582e-06, "loss": 32.984, "step": 14366 }, { "epoch": 342.0716417910448, "grad_norm": 13.932046890258789, "learning_rate": 9.944444444444445e-06, "loss": 32.6151, "step": 14367 }, { "epoch": 342.0955223880597, "grad_norm": 26.55956268310547, "learning_rate": 9.94378306878307e-06, "loss": 32.9738, "step": 14368 }, { "epoch": 342.1194029850746, "grad_norm": 22.966833114624023, "learning_rate": 9.943121693121693e-06, "loss": 32.3167, "step": 14369 }, { "epoch": 342.14328358208957, "grad_norm": 16.46741485595703, "learning_rate": 9.94246031746032e-06, "loss": 32.2897, "step": 14370 }, { "epoch": 342.1671641791045, "grad_norm": 22.425783157348633, "learning_rate": 9.941798941798942e-06, "loss": 32.5772, "step": 14371 }, { "epoch": 342.1910447761194, "grad_norm": 18.857101440429688, "learning_rate": 9.941137566137567e-06, "loss": 33.0011, "step": 14372 }, { "epoch": 342.21492537313435, "grad_norm": 19.67682456970215, "learning_rate": 9.940476190476192e-06, "loss": 32.5695, "step": 14373 }, { "epoch": 342.23880597014926, "grad_norm": 20.178865432739258, "learning_rate": 9.939814814814815e-06, "loss": 33.4321, "step": 14374 }, { "epoch": 342.26268656716417, "grad_norm": 18.824743270874023, "learning_rate": 9.93915343915344e-06, "loss": 32.723, "step": 14375 }, { "epoch": 342.28656716417913, "grad_norm": 18.322490692138672, "learning_rate": 9.938492063492065e-06, "loss": 31.9804, "step": 14376 }, { "epoch": 342.31044776119404, "grad_norm": 14.578727722167969, "learning_rate": 9.937830687830688e-06, "loss": 32.5729, "step": 14377 }, { "epoch": 342.33432835820895, "grad_norm": 24.338285446166992, "learning_rate": 9.937169312169313e-06, "loss": 33.423, "step": 14378 }, { "epoch": 342.35820895522386, "grad_norm": 19.33673858642578, "learning_rate": 9.936507936507937e-06, "loss": 33.3762, "step": 14379 }, { "epoch": 342.3820895522388, "grad_norm": 18.73155975341797, "learning_rate": 9.935846560846562e-06, "loss": 32.4231, "step": 14380 }, { "epoch": 342.40597014925373, "grad_norm": 22.124692916870117, "learning_rate": 9.935185185185185e-06, "loss": 34.2178, "step": 14381 }, { "epoch": 342.42985074626864, "grad_norm": 14.40739631652832, "learning_rate": 9.93452380952381e-06, "loss": 34.4726, "step": 14382 }, { "epoch": 342.4537313432836, "grad_norm": 24.34845733642578, "learning_rate": 9.933862433862435e-06, "loss": 33.1978, "step": 14383 }, { "epoch": 342.4776119402985, "grad_norm": 21.977155685424805, "learning_rate": 9.933201058201058e-06, "loss": 33.5581, "step": 14384 }, { "epoch": 342.5014925373134, "grad_norm": 20.774227142333984, "learning_rate": 9.932539682539684e-06, "loss": 33.1747, "step": 14385 }, { "epoch": 342.52537313432833, "grad_norm": 18.0212345123291, "learning_rate": 9.931878306878308e-06, "loss": 33.0015, "step": 14386 }, { "epoch": 342.5492537313433, "grad_norm": 23.497987747192383, "learning_rate": 9.931216931216932e-06, "loss": 33.0074, "step": 14387 }, { "epoch": 342.5731343283582, "grad_norm": 21.486970901489258, "learning_rate": 9.930555555555557e-06, "loss": 33.8315, "step": 14388 }, { "epoch": 342.5970149253731, "grad_norm": 16.688438415527344, "learning_rate": 9.92989417989418e-06, "loss": 33.0534, "step": 14389 }, { "epoch": 342.6208955223881, "grad_norm": 20.908672332763672, "learning_rate": 9.929232804232805e-06, "loss": 32.9924, "step": 14390 }, { "epoch": 342.644776119403, "grad_norm": 18.626937866210938, "learning_rate": 9.92857142857143e-06, "loss": 33.0165, "step": 14391 }, { "epoch": 342.6686567164179, "grad_norm": 19.471647262573242, "learning_rate": 9.927910052910053e-06, "loss": 32.2877, "step": 14392 }, { "epoch": 342.6925373134328, "grad_norm": 18.289623260498047, "learning_rate": 9.927248677248678e-06, "loss": 30.9186, "step": 14393 }, { "epoch": 342.7164179104478, "grad_norm": 15.482297897338867, "learning_rate": 9.926587301587303e-06, "loss": 32.0131, "step": 14394 }, { "epoch": 342.7402985074627, "grad_norm": 17.17262840270996, "learning_rate": 9.925925925925927e-06, "loss": 32.9729, "step": 14395 }, { "epoch": 342.7641791044776, "grad_norm": 16.10808753967285, "learning_rate": 9.92526455026455e-06, "loss": 32.9315, "step": 14396 }, { "epoch": 342.78805970149256, "grad_norm": 17.409530639648438, "learning_rate": 9.924603174603175e-06, "loss": 32.7374, "step": 14397 }, { "epoch": 342.81194029850747, "grad_norm": 15.100672721862793, "learning_rate": 9.9239417989418e-06, "loss": 31.1945, "step": 14398 }, { "epoch": 342.8358208955224, "grad_norm": 19.96903419494629, "learning_rate": 9.923280423280423e-06, "loss": 31.4145, "step": 14399 }, { "epoch": 342.85970149253734, "grad_norm": 19.02230453491211, "learning_rate": 9.922619047619048e-06, "loss": 33.2449, "step": 14400 }, { "epoch": 342.88358208955225, "grad_norm": 16.819826126098633, "learning_rate": 9.921957671957673e-06, "loss": 33.9047, "step": 14401 }, { "epoch": 342.90746268656716, "grad_norm": 14.629315376281738, "learning_rate": 9.921296296296296e-06, "loss": 32.331, "step": 14402 }, { "epoch": 342.93134328358207, "grad_norm": 15.982880592346191, "learning_rate": 9.920634920634922e-06, "loss": 32.6046, "step": 14403 }, { "epoch": 342.95522388059703, "grad_norm": 14.962193489074707, "learning_rate": 9.919973544973545e-06, "loss": 32.3762, "step": 14404 }, { "epoch": 342.97910447761194, "grad_norm": 22.48440170288086, "learning_rate": 9.91931216931217e-06, "loss": 32.403, "step": 14405 }, { "epoch": 343.0, "grad_norm": 15.533278465270996, "learning_rate": 9.918650793650795e-06, "loss": 27.591, "step": 14406 }, { "epoch": 343.0238805970149, "grad_norm": 15.413907051086426, "learning_rate": 9.917989417989418e-06, "loss": 33.8138, "step": 14407 }, { "epoch": 343.0477611940299, "grad_norm": 19.02525520324707, "learning_rate": 9.917328042328043e-06, "loss": 32.8945, "step": 14408 }, { "epoch": 343.0716417910448, "grad_norm": 16.191198348999023, "learning_rate": 9.916666666666668e-06, "loss": 32.4773, "step": 14409 }, { "epoch": 343.0955223880597, "grad_norm": 17.758012771606445, "learning_rate": 9.916005291005293e-06, "loss": 32.2462, "step": 14410 }, { "epoch": 343.1194029850746, "grad_norm": 16.209293365478516, "learning_rate": 9.915343915343916e-06, "loss": 33.1102, "step": 14411 }, { "epoch": 343.14328358208957, "grad_norm": 17.826519012451172, "learning_rate": 9.91468253968254e-06, "loss": 32.0688, "step": 14412 }, { "epoch": 343.1671641791045, "grad_norm": 14.85556697845459, "learning_rate": 9.914021164021165e-06, "loss": 31.965, "step": 14413 }, { "epoch": 343.1910447761194, "grad_norm": 17.07773780822754, "learning_rate": 9.913359788359788e-06, "loss": 32.7075, "step": 14414 }, { "epoch": 343.21492537313435, "grad_norm": 13.941993713378906, "learning_rate": 9.912698412698413e-06, "loss": 31.4292, "step": 14415 }, { "epoch": 343.23880597014926, "grad_norm": 16.280799865722656, "learning_rate": 9.912037037037038e-06, "loss": 32.8106, "step": 14416 }, { "epoch": 343.26268656716417, "grad_norm": 14.638901710510254, "learning_rate": 9.911375661375661e-06, "loss": 32.9818, "step": 14417 }, { "epoch": 343.28656716417913, "grad_norm": 16.58753204345703, "learning_rate": 9.910714285714288e-06, "loss": 32.6556, "step": 14418 }, { "epoch": 343.31044776119404, "grad_norm": 15.208233833312988, "learning_rate": 9.91005291005291e-06, "loss": 33.0974, "step": 14419 }, { "epoch": 343.33432835820895, "grad_norm": 16.727413177490234, "learning_rate": 9.909391534391535e-06, "loss": 31.9921, "step": 14420 }, { "epoch": 343.35820895522386, "grad_norm": 14.190279006958008, "learning_rate": 9.90873015873016e-06, "loss": 32.3217, "step": 14421 }, { "epoch": 343.3820895522388, "grad_norm": 17.157922744750977, "learning_rate": 9.908068783068783e-06, "loss": 32.5896, "step": 14422 }, { "epoch": 343.40597014925373, "grad_norm": 16.291887283325195, "learning_rate": 9.907407407407408e-06, "loss": 32.3837, "step": 14423 }, { "epoch": 343.42985074626864, "grad_norm": 18.021459579467773, "learning_rate": 9.906746031746033e-06, "loss": 31.1202, "step": 14424 }, { "epoch": 343.4537313432836, "grad_norm": 19.96455955505371, "learning_rate": 9.906084656084658e-06, "loss": 32.5496, "step": 14425 }, { "epoch": 343.4776119402985, "grad_norm": 18.93027687072754, "learning_rate": 9.90542328042328e-06, "loss": 33.0246, "step": 14426 }, { "epoch": 343.5014925373134, "grad_norm": 18.157634735107422, "learning_rate": 9.904761904761906e-06, "loss": 31.5134, "step": 14427 }, { "epoch": 343.52537313432833, "grad_norm": 18.85066032409668, "learning_rate": 9.90410052910053e-06, "loss": 33.0202, "step": 14428 }, { "epoch": 343.5492537313433, "grad_norm": 19.623821258544922, "learning_rate": 9.903439153439154e-06, "loss": 32.3831, "step": 14429 }, { "epoch": 343.5731343283582, "grad_norm": 18.22905731201172, "learning_rate": 9.902777777777778e-06, "loss": 33.3364, "step": 14430 }, { "epoch": 343.5970149253731, "grad_norm": 17.250316619873047, "learning_rate": 9.902116402116403e-06, "loss": 31.8466, "step": 14431 }, { "epoch": 343.6208955223881, "grad_norm": 16.9425048828125, "learning_rate": 9.901455026455026e-06, "loss": 32.9163, "step": 14432 }, { "epoch": 343.644776119403, "grad_norm": 13.741183280944824, "learning_rate": 9.900793650793653e-06, "loss": 33.4662, "step": 14433 }, { "epoch": 343.6686567164179, "grad_norm": 15.247157096862793, "learning_rate": 9.900132275132276e-06, "loss": 33.4205, "step": 14434 }, { "epoch": 343.6925373134328, "grad_norm": 13.897851943969727, "learning_rate": 9.8994708994709e-06, "loss": 33.4061, "step": 14435 }, { "epoch": 343.7164179104478, "grad_norm": 16.523433685302734, "learning_rate": 9.898809523809525e-06, "loss": 32.8969, "step": 14436 }, { "epoch": 343.7402985074627, "grad_norm": 16.474576950073242, "learning_rate": 9.898148148148148e-06, "loss": 31.8715, "step": 14437 }, { "epoch": 343.7641791044776, "grad_norm": 21.49550437927246, "learning_rate": 9.897486772486773e-06, "loss": 34.0384, "step": 14438 }, { "epoch": 343.78805970149256, "grad_norm": 21.20180320739746, "learning_rate": 9.896825396825398e-06, "loss": 34.3682, "step": 14439 }, { "epoch": 343.81194029850747, "grad_norm": 14.964547157287598, "learning_rate": 9.896164021164021e-06, "loss": 32.9994, "step": 14440 }, { "epoch": 343.8358208955224, "grad_norm": 14.992241859436035, "learning_rate": 9.895502645502646e-06, "loss": 33.961, "step": 14441 }, { "epoch": 343.85970149253734, "grad_norm": 15.811175346374512, "learning_rate": 9.89484126984127e-06, "loss": 33.093, "step": 14442 }, { "epoch": 343.88358208955225, "grad_norm": 24.445816040039062, "learning_rate": 9.894179894179896e-06, "loss": 33.3162, "step": 14443 }, { "epoch": 343.90746268656716, "grad_norm": 16.5667781829834, "learning_rate": 9.893518518518519e-06, "loss": 32.5101, "step": 14444 }, { "epoch": 343.93134328358207, "grad_norm": 18.14990997314453, "learning_rate": 9.892857142857143e-06, "loss": 32.4721, "step": 14445 }, { "epoch": 343.95522388059703, "grad_norm": 18.30742645263672, "learning_rate": 9.892195767195768e-06, "loss": 33.2712, "step": 14446 }, { "epoch": 343.97910447761194, "grad_norm": 24.58142852783203, "learning_rate": 9.891534391534391e-06, "loss": 32.5449, "step": 14447 }, { "epoch": 344.0, "grad_norm": 19.144222259521484, "learning_rate": 9.890873015873018e-06, "loss": 29.1607, "step": 14448 }, { "epoch": 344.0238805970149, "grad_norm": 17.23626708984375, "learning_rate": 9.890211640211641e-06, "loss": 33.7711, "step": 14449 }, { "epoch": 344.0477611940299, "grad_norm": 23.77984046936035, "learning_rate": 9.889550264550266e-06, "loss": 32.5528, "step": 14450 }, { "epoch": 344.0716417910448, "grad_norm": 21.655672073364258, "learning_rate": 9.88888888888889e-06, "loss": 32.9569, "step": 14451 }, { "epoch": 344.0955223880597, "grad_norm": 18.255815505981445, "learning_rate": 9.888227513227514e-06, "loss": 32.7276, "step": 14452 }, { "epoch": 344.1194029850746, "grad_norm": 17.8723201751709, "learning_rate": 9.887566137566138e-06, "loss": 32.9329, "step": 14453 }, { "epoch": 344.14328358208957, "grad_norm": 25.975536346435547, "learning_rate": 9.886904761904763e-06, "loss": 32.0884, "step": 14454 }, { "epoch": 344.1671641791045, "grad_norm": 17.716848373413086, "learning_rate": 9.886243386243386e-06, "loss": 31.6352, "step": 14455 }, { "epoch": 344.1910447761194, "grad_norm": 25.284719467163086, "learning_rate": 9.885582010582011e-06, "loss": 32.7679, "step": 14456 }, { "epoch": 344.21492537313435, "grad_norm": 19.202789306640625, "learning_rate": 9.884920634920636e-06, "loss": 32.5243, "step": 14457 }, { "epoch": 344.23880597014926, "grad_norm": 26.566465377807617, "learning_rate": 9.88425925925926e-06, "loss": 33.6448, "step": 14458 }, { "epoch": 344.26268656716417, "grad_norm": 20.592832565307617, "learning_rate": 9.883597883597884e-06, "loss": 34.2431, "step": 14459 }, { "epoch": 344.28656716417913, "grad_norm": 26.993072509765625, "learning_rate": 9.882936507936509e-06, "loss": 32.3125, "step": 14460 }, { "epoch": 344.31044776119404, "grad_norm": 24.073251724243164, "learning_rate": 9.882275132275133e-06, "loss": 32.8236, "step": 14461 }, { "epoch": 344.33432835820895, "grad_norm": 24.742605209350586, "learning_rate": 9.881613756613757e-06, "loss": 32.8045, "step": 14462 }, { "epoch": 344.35820895522386, "grad_norm": 22.220346450805664, "learning_rate": 9.880952380952381e-06, "loss": 32.2585, "step": 14463 }, { "epoch": 344.3820895522388, "grad_norm": 22.460216522216797, "learning_rate": 9.880291005291006e-06, "loss": 32.2342, "step": 14464 }, { "epoch": 344.40597014925373, "grad_norm": 19.556997299194336, "learning_rate": 9.87962962962963e-06, "loss": 31.6273, "step": 14465 }, { "epoch": 344.42985074626864, "grad_norm": 21.854801177978516, "learning_rate": 9.878968253968256e-06, "loss": 32.9147, "step": 14466 }, { "epoch": 344.4537313432836, "grad_norm": 18.510866165161133, "learning_rate": 9.878306878306879e-06, "loss": 32.0147, "step": 14467 }, { "epoch": 344.4776119402985, "grad_norm": 18.42432975769043, "learning_rate": 9.877645502645504e-06, "loss": 31.8964, "step": 14468 }, { "epoch": 344.5014925373134, "grad_norm": 18.292217254638672, "learning_rate": 9.876984126984128e-06, "loss": 33.6321, "step": 14469 }, { "epoch": 344.52537313432833, "grad_norm": 18.253793716430664, "learning_rate": 9.876322751322752e-06, "loss": 33.6365, "step": 14470 }, { "epoch": 344.5492537313433, "grad_norm": 20.801607131958008, "learning_rate": 9.875661375661376e-06, "loss": 33.5187, "step": 14471 }, { "epoch": 344.5731343283582, "grad_norm": 15.532673835754395, "learning_rate": 9.875000000000001e-06, "loss": 31.9724, "step": 14472 }, { "epoch": 344.5970149253731, "grad_norm": 22.409029006958008, "learning_rate": 9.874338624338626e-06, "loss": 32.0922, "step": 14473 }, { "epoch": 344.6208955223881, "grad_norm": 15.50053596496582, "learning_rate": 9.873677248677249e-06, "loss": 31.9287, "step": 14474 }, { "epoch": 344.644776119403, "grad_norm": 22.280168533325195, "learning_rate": 9.873015873015874e-06, "loss": 33.626, "step": 14475 }, { "epoch": 344.6686567164179, "grad_norm": 18.608139038085938, "learning_rate": 9.872354497354499e-06, "loss": 32.9671, "step": 14476 }, { "epoch": 344.6925373134328, "grad_norm": 22.933162689208984, "learning_rate": 9.871693121693122e-06, "loss": 33.3424, "step": 14477 }, { "epoch": 344.7164179104478, "grad_norm": 19.895978927612305, "learning_rate": 9.871031746031747e-06, "loss": 31.7091, "step": 14478 }, { "epoch": 344.7402985074627, "grad_norm": 19.396108627319336, "learning_rate": 9.870370370370371e-06, "loss": 32.6533, "step": 14479 }, { "epoch": 344.7641791044776, "grad_norm": 19.97950553894043, "learning_rate": 9.869708994708994e-06, "loss": 32.891, "step": 14480 }, { "epoch": 344.78805970149256, "grad_norm": 19.812124252319336, "learning_rate": 9.869047619047621e-06, "loss": 32.8538, "step": 14481 }, { "epoch": 344.81194029850747, "grad_norm": 21.236356735229492, "learning_rate": 9.868386243386244e-06, "loss": 31.6757, "step": 14482 }, { "epoch": 344.8358208955224, "grad_norm": 21.389366149902344, "learning_rate": 9.867724867724869e-06, "loss": 32.5475, "step": 14483 }, { "epoch": 344.85970149253734, "grad_norm": 19.750301361083984, "learning_rate": 9.867063492063494e-06, "loss": 33.2045, "step": 14484 }, { "epoch": 344.88358208955225, "grad_norm": 20.80890655517578, "learning_rate": 9.866402116402117e-06, "loss": 32.949, "step": 14485 }, { "epoch": 344.90746268656716, "grad_norm": 16.11481285095215, "learning_rate": 9.865740740740742e-06, "loss": 33.3379, "step": 14486 }, { "epoch": 344.93134328358207, "grad_norm": 23.29161262512207, "learning_rate": 9.865079365079366e-06, "loss": 33.3099, "step": 14487 }, { "epoch": 344.95522388059703, "grad_norm": 18.72956657409668, "learning_rate": 9.864417989417991e-06, "loss": 32.5956, "step": 14488 }, { "epoch": 344.97910447761194, "grad_norm": 16.80988883972168, "learning_rate": 9.863756613756614e-06, "loss": 33.2447, "step": 14489 }, { "epoch": 345.0, "grad_norm": 13.624998092651367, "learning_rate": 9.863095238095239e-06, "loss": 27.8146, "step": 14490 }, { "epoch": 345.0238805970149, "grad_norm": 17.24331283569336, "learning_rate": 9.862433862433864e-06, "loss": 31.8899, "step": 14491 }, { "epoch": 345.0477611940299, "grad_norm": 18.86675262451172, "learning_rate": 9.861772486772487e-06, "loss": 33.113, "step": 14492 }, { "epoch": 345.0716417910448, "grad_norm": 16.538835525512695, "learning_rate": 9.861111111111112e-06, "loss": 32.6275, "step": 14493 }, { "epoch": 345.0955223880597, "grad_norm": 18.611021041870117, "learning_rate": 9.860449735449737e-06, "loss": 33.4015, "step": 14494 }, { "epoch": 345.1194029850746, "grad_norm": 15.672499656677246, "learning_rate": 9.85978835978836e-06, "loss": 32.9165, "step": 14495 }, { "epoch": 345.14328358208957, "grad_norm": 17.5117130279541, "learning_rate": 9.859126984126986e-06, "loss": 33.469, "step": 14496 }, { "epoch": 345.1671641791045, "grad_norm": 15.414146423339844, "learning_rate": 9.85846560846561e-06, "loss": 32.4254, "step": 14497 }, { "epoch": 345.1910447761194, "grad_norm": 24.0014591217041, "learning_rate": 9.857804232804234e-06, "loss": 32.4304, "step": 14498 }, { "epoch": 345.21492537313435, "grad_norm": 17.462278366088867, "learning_rate": 9.857142857142859e-06, "loss": 33.4874, "step": 14499 }, { "epoch": 345.23880597014926, "grad_norm": 15.571789741516113, "learning_rate": 9.856481481481482e-06, "loss": 32.2926, "step": 14500 }, { "epoch": 345.26268656716417, "grad_norm": 21.77423858642578, "learning_rate": 9.855820105820107e-06, "loss": 33.1389, "step": 14501 }, { "epoch": 345.28656716417913, "grad_norm": 18.773069381713867, "learning_rate": 9.855158730158732e-06, "loss": 32.1091, "step": 14502 }, { "epoch": 345.31044776119404, "grad_norm": 16.718849182128906, "learning_rate": 9.854497354497355e-06, "loss": 33.1977, "step": 14503 }, { "epoch": 345.33432835820895, "grad_norm": 16.68386459350586, "learning_rate": 9.85383597883598e-06, "loss": 32.0855, "step": 14504 }, { "epoch": 345.35820895522386, "grad_norm": 14.361015319824219, "learning_rate": 9.853174603174604e-06, "loss": 33.9809, "step": 14505 }, { "epoch": 345.3820895522388, "grad_norm": 16.64942169189453, "learning_rate": 9.852513227513229e-06, "loss": 32.5625, "step": 14506 }, { "epoch": 345.40597014925373, "grad_norm": 18.994455337524414, "learning_rate": 9.851851851851852e-06, "loss": 31.9444, "step": 14507 }, { "epoch": 345.42985074626864, "grad_norm": 21.159523010253906, "learning_rate": 9.851190476190477e-06, "loss": 32.7126, "step": 14508 }, { "epoch": 345.4537313432836, "grad_norm": 20.6182861328125, "learning_rate": 9.850529100529102e-06, "loss": 31.1335, "step": 14509 }, { "epoch": 345.4776119402985, "grad_norm": 16.21684455871582, "learning_rate": 9.849867724867725e-06, "loss": 33.4699, "step": 14510 }, { "epoch": 345.5014925373134, "grad_norm": 19.0106258392334, "learning_rate": 9.849206349206351e-06, "loss": 32.6018, "step": 14511 }, { "epoch": 345.52537313432833, "grad_norm": 16.533052444458008, "learning_rate": 9.848544973544974e-06, "loss": 32.9582, "step": 14512 }, { "epoch": 345.5492537313433, "grad_norm": 16.521297454833984, "learning_rate": 9.8478835978836e-06, "loss": 32.0621, "step": 14513 }, { "epoch": 345.5731343283582, "grad_norm": 17.655977249145508, "learning_rate": 9.847222222222224e-06, "loss": 32.5079, "step": 14514 }, { "epoch": 345.5970149253731, "grad_norm": 17.147764205932617, "learning_rate": 9.846560846560847e-06, "loss": 32.5903, "step": 14515 }, { "epoch": 345.6208955223881, "grad_norm": 15.925622940063477, "learning_rate": 9.845899470899472e-06, "loss": 31.5798, "step": 14516 }, { "epoch": 345.644776119403, "grad_norm": 15.333617210388184, "learning_rate": 9.845238095238097e-06, "loss": 33.0217, "step": 14517 }, { "epoch": 345.6686567164179, "grad_norm": 15.204148292541504, "learning_rate": 9.84457671957672e-06, "loss": 31.8359, "step": 14518 }, { "epoch": 345.6925373134328, "grad_norm": 14.474899291992188, "learning_rate": 9.843915343915345e-06, "loss": 33.1772, "step": 14519 }, { "epoch": 345.7164179104478, "grad_norm": 18.257648468017578, "learning_rate": 9.843253968253968e-06, "loss": 32.8769, "step": 14520 }, { "epoch": 345.7402985074627, "grad_norm": 23.30188751220703, "learning_rate": 9.842592592592594e-06, "loss": 31.956, "step": 14521 }, { "epoch": 345.7641791044776, "grad_norm": 17.30249786376953, "learning_rate": 9.841931216931217e-06, "loss": 33.4029, "step": 14522 }, { "epoch": 345.78805970149256, "grad_norm": 15.513245582580566, "learning_rate": 9.841269841269842e-06, "loss": 32.5606, "step": 14523 }, { "epoch": 345.81194029850747, "grad_norm": 26.08751678466797, "learning_rate": 9.840608465608467e-06, "loss": 32.9169, "step": 14524 }, { "epoch": 345.8358208955224, "grad_norm": NaN, "learning_rate": 9.83994708994709e-06, "loss": 33.0905, "step": 14525 }, { "epoch": 345.85970149253734, "grad_norm": 20.650205612182617, "learning_rate": 9.83994708994709e-06, "loss": 33.4088, "step": 14526 }, { "epoch": 345.88358208955225, "grad_norm": 17.080671310424805, "learning_rate": 9.839285714285715e-06, "loss": 32.482, "step": 14527 }, { "epoch": 345.90746268656716, "grad_norm": 19.794126510620117, "learning_rate": 9.83862433862434e-06, "loss": 33.2448, "step": 14528 }, { "epoch": 345.93134328358207, "grad_norm": 24.577285766601562, "learning_rate": 9.837962962962964e-06, "loss": 33.5371, "step": 14529 }, { "epoch": 345.95522388059703, "grad_norm": 16.948318481445312, "learning_rate": 9.837301587301588e-06, "loss": 32.265, "step": 14530 }, { "epoch": 345.97910447761194, "grad_norm": 17.720129013061523, "learning_rate": 9.836640211640212e-06, "loss": 33.6891, "step": 14531 }, { "epoch": 346.0, "grad_norm": 18.448644638061523, "learning_rate": 9.835978835978837e-06, "loss": 27.1259, "step": 14532 }, { "epoch": 346.0238805970149, "grad_norm": 20.12361717224121, "learning_rate": 9.83531746031746e-06, "loss": 32.4664, "step": 14533 }, { "epoch": 346.0477611940299, "grad_norm": 16.14518165588379, "learning_rate": 9.834656084656085e-06, "loss": 32.8733, "step": 14534 }, { "epoch": 346.0716417910448, "grad_norm": 17.641845703125, "learning_rate": 9.83399470899471e-06, "loss": 32.8976, "step": 14535 }, { "epoch": 346.0955223880597, "grad_norm": 18.597990036010742, "learning_rate": 9.833333333333333e-06, "loss": 31.9901, "step": 14536 }, { "epoch": 346.1194029850746, "grad_norm": 21.50041961669922, "learning_rate": 9.83267195767196e-06, "loss": 33.8644, "step": 14537 }, { "epoch": 346.14328358208957, "grad_norm": 18.245433807373047, "learning_rate": 9.832010582010583e-06, "loss": 32.4087, "step": 14538 }, { "epoch": 346.1671641791045, "grad_norm": 14.784394264221191, "learning_rate": 9.831349206349207e-06, "loss": 33.6864, "step": 14539 }, { "epoch": 346.1910447761194, "grad_norm": 19.643970489501953, "learning_rate": 9.830687830687832e-06, "loss": 32.7441, "step": 14540 }, { "epoch": 346.21492537313435, "grad_norm": 21.35646629333496, "learning_rate": 9.830026455026455e-06, "loss": 33.2991, "step": 14541 }, { "epoch": 346.23880597014926, "grad_norm": 20.9975528717041, "learning_rate": 9.82936507936508e-06, "loss": 33.2202, "step": 14542 }, { "epoch": 346.26268656716417, "grad_norm": 15.574922561645508, "learning_rate": 9.828703703703705e-06, "loss": 33.7688, "step": 14543 }, { "epoch": 346.28656716417913, "grad_norm": 30.816774368286133, "learning_rate": 9.828042328042328e-06, "loss": 33.074, "step": 14544 }, { "epoch": 346.31044776119404, "grad_norm": 21.059690475463867, "learning_rate": 9.827380952380953e-06, "loss": 32.5879, "step": 14545 }, { "epoch": 346.33432835820895, "grad_norm": 21.59850311279297, "learning_rate": 9.826719576719578e-06, "loss": 31.9833, "step": 14546 }, { "epoch": 346.35820895522386, "grad_norm": 25.910024642944336, "learning_rate": 9.826058201058202e-06, "loss": 33.3459, "step": 14547 }, { "epoch": 346.3820895522388, "grad_norm": 17.163654327392578, "learning_rate": 9.825396825396825e-06, "loss": 32.7096, "step": 14548 }, { "epoch": 346.40597014925373, "grad_norm": 17.937894821166992, "learning_rate": 9.82473544973545e-06, "loss": 33.802, "step": 14549 }, { "epoch": 346.42985074626864, "grad_norm": 20.08127784729004, "learning_rate": 9.824074074074075e-06, "loss": 32.4472, "step": 14550 }, { "epoch": 346.4537313432836, "grad_norm": 16.959835052490234, "learning_rate": 9.823412698412698e-06, "loss": 31.187, "step": 14551 }, { "epoch": 346.4776119402985, "grad_norm": 17.028697967529297, "learning_rate": 9.822751322751325e-06, "loss": 32.1524, "step": 14552 }, { "epoch": 346.5014925373134, "grad_norm": 18.651256561279297, "learning_rate": 9.822089947089948e-06, "loss": 31.7408, "step": 14553 }, { "epoch": 346.52537313432833, "grad_norm": 26.050403594970703, "learning_rate": 9.821428571428573e-06, "loss": 33.4135, "step": 14554 }, { "epoch": 346.5492537313433, "grad_norm": 17.289928436279297, "learning_rate": 9.820767195767197e-06, "loss": 32.3145, "step": 14555 }, { "epoch": 346.5731343283582, "grad_norm": 17.333473205566406, "learning_rate": 9.82010582010582e-06, "loss": 33.268, "step": 14556 }, { "epoch": 346.5970149253731, "grad_norm": 24.8438720703125, "learning_rate": 9.819444444444445e-06, "loss": 31.7599, "step": 14557 }, { "epoch": 346.6208955223881, "grad_norm": 19.637678146362305, "learning_rate": 9.81878306878307e-06, "loss": 32.0226, "step": 14558 }, { "epoch": 346.644776119403, "grad_norm": 14.42353343963623, "learning_rate": 9.818121693121693e-06, "loss": 32.3305, "step": 14559 }, { "epoch": 346.6686567164179, "grad_norm": 17.578075408935547, "learning_rate": 9.817460317460318e-06, "loss": 32.8084, "step": 14560 }, { "epoch": 346.6925373134328, "grad_norm": 14.608057975769043, "learning_rate": 9.816798941798943e-06, "loss": 32.1249, "step": 14561 }, { "epoch": 346.7164179104478, "grad_norm": 20.10359001159668, "learning_rate": 9.816137566137567e-06, "loss": 33.1616, "step": 14562 }, { "epoch": 346.7402985074627, "grad_norm": 18.161693572998047, "learning_rate": 9.81547619047619e-06, "loss": 31.9219, "step": 14563 }, { "epoch": 346.7641791044776, "grad_norm": 19.661170959472656, "learning_rate": 9.814814814814815e-06, "loss": 32.9629, "step": 14564 }, { "epoch": 346.78805970149256, "grad_norm": 17.32904052734375, "learning_rate": 9.81415343915344e-06, "loss": 32.985, "step": 14565 }, { "epoch": 346.81194029850747, "grad_norm": 20.90936851501465, "learning_rate": 9.813492063492063e-06, "loss": 32.7361, "step": 14566 }, { "epoch": 346.8358208955224, "grad_norm": 21.710256576538086, "learning_rate": 9.812830687830688e-06, "loss": 32.5502, "step": 14567 }, { "epoch": 346.85970149253734, "grad_norm": 17.692102432250977, "learning_rate": 9.812169312169313e-06, "loss": 33.4243, "step": 14568 }, { "epoch": 346.88358208955225, "grad_norm": 16.292560577392578, "learning_rate": 9.811507936507938e-06, "loss": 32.7452, "step": 14569 }, { "epoch": 346.90746268656716, "grad_norm": 14.09742259979248, "learning_rate": 9.810846560846562e-06, "loss": 32.486, "step": 14570 }, { "epoch": 346.93134328358207, "grad_norm": 17.492095947265625, "learning_rate": 9.810185185185186e-06, "loss": 31.9111, "step": 14571 }, { "epoch": 346.95522388059703, "grad_norm": 16.279251098632812, "learning_rate": 9.80952380952381e-06, "loss": 31.7253, "step": 14572 }, { "epoch": 346.97910447761194, "grad_norm": 20.205720901489258, "learning_rate": 9.808862433862435e-06, "loss": 32.4854, "step": 14573 }, { "epoch": 347.0, "grad_norm": 14.61187744140625, "learning_rate": 9.808201058201058e-06, "loss": 28.3053, "step": 14574 }, { "epoch": 347.0238805970149, "grad_norm": 19.006540298461914, "learning_rate": 9.807539682539683e-06, "loss": 32.7437, "step": 14575 }, { "epoch": 347.0477611940299, "grad_norm": 22.644012451171875, "learning_rate": 9.806878306878308e-06, "loss": 32.8132, "step": 14576 }, { "epoch": 347.0716417910448, "grad_norm": 20.906108856201172, "learning_rate": 9.806216931216933e-06, "loss": 31.0339, "step": 14577 }, { "epoch": 347.0955223880597, "grad_norm": 16.691179275512695, "learning_rate": 9.805555555555556e-06, "loss": 33.0482, "step": 14578 }, { "epoch": 347.1194029850746, "grad_norm": 15.065079689025879, "learning_rate": 9.80489417989418e-06, "loss": 33.9182, "step": 14579 }, { "epoch": 347.14328358208957, "grad_norm": 22.79800033569336, "learning_rate": 9.804232804232805e-06, "loss": 31.8172, "step": 14580 }, { "epoch": 347.1671641791045, "grad_norm": 17.989540100097656, "learning_rate": 9.803571428571428e-06, "loss": 33.3956, "step": 14581 }, { "epoch": 347.1910447761194, "grad_norm": 17.740514755249023, "learning_rate": 9.802910052910053e-06, "loss": 32.1055, "step": 14582 }, { "epoch": 347.21492537313435, "grad_norm": 26.839365005493164, "learning_rate": 9.802248677248678e-06, "loss": 33.0859, "step": 14583 }, { "epoch": 347.23880597014926, "grad_norm": 18.93688201904297, "learning_rate": 9.801587301587301e-06, "loss": 32.4117, "step": 14584 }, { "epoch": 347.26268656716417, "grad_norm": 15.530482292175293, "learning_rate": 9.800925925925928e-06, "loss": 31.8495, "step": 14585 }, { "epoch": 347.28656716417913, "grad_norm": 21.48668670654297, "learning_rate": 9.80026455026455e-06, "loss": 32.381, "step": 14586 }, { "epoch": 347.31044776119404, "grad_norm": 20.40810203552246, "learning_rate": 9.799603174603176e-06, "loss": 32.6337, "step": 14587 }, { "epoch": 347.33432835820895, "grad_norm": 16.721519470214844, "learning_rate": 9.7989417989418e-06, "loss": 32.8879, "step": 14588 }, { "epoch": 347.35820895522386, "grad_norm": 16.888235092163086, "learning_rate": 9.798280423280423e-06, "loss": 32.7801, "step": 14589 }, { "epoch": 347.3820895522388, "grad_norm": 17.261720657348633, "learning_rate": 9.797619047619048e-06, "loss": 33.2261, "step": 14590 }, { "epoch": 347.40597014925373, "grad_norm": 19.447309494018555, "learning_rate": 9.796957671957673e-06, "loss": 32.6286, "step": 14591 }, { "epoch": 347.42985074626864, "grad_norm": 15.644414901733398, "learning_rate": 9.796296296296298e-06, "loss": 32.3827, "step": 14592 }, { "epoch": 347.4537313432836, "grad_norm": 16.57158660888672, "learning_rate": 9.795634920634921e-06, "loss": 33.821, "step": 14593 }, { "epoch": 347.4776119402985, "grad_norm": 14.660304069519043, "learning_rate": 9.794973544973546e-06, "loss": 32.3864, "step": 14594 }, { "epoch": 347.5014925373134, "grad_norm": 16.841506958007812, "learning_rate": 9.79431216931217e-06, "loss": 32.6457, "step": 14595 }, { "epoch": 347.52537313432833, "grad_norm": 18.000137329101562, "learning_rate": 9.793650793650794e-06, "loss": 32.2096, "step": 14596 }, { "epoch": 347.5492537313433, "grad_norm": 19.90860939025879, "learning_rate": 9.792989417989418e-06, "loss": 33.2074, "step": 14597 }, { "epoch": 347.5731343283582, "grad_norm": 14.733869552612305, "learning_rate": 9.792328042328043e-06, "loss": 33.3162, "step": 14598 }, { "epoch": 347.5970149253731, "grad_norm": 16.11147689819336, "learning_rate": 9.791666666666666e-06, "loss": 33.028, "step": 14599 }, { "epoch": 347.6208955223881, "grad_norm": 16.726499557495117, "learning_rate": 9.791005291005293e-06, "loss": 31.7532, "step": 14600 }, { "epoch": 347.644776119403, "grad_norm": 15.86723804473877, "learning_rate": 9.790343915343916e-06, "loss": 31.5954, "step": 14601 }, { "epoch": 347.6686567164179, "grad_norm": 23.73784828186035, "learning_rate": 9.78968253968254e-06, "loss": 32.2351, "step": 14602 }, { "epoch": 347.6925373134328, "grad_norm": 18.645854949951172, "learning_rate": 9.789021164021166e-06, "loss": 32.3827, "step": 14603 }, { "epoch": 347.7164179104478, "grad_norm": 16.774049758911133, "learning_rate": 9.788359788359789e-06, "loss": 33.8348, "step": 14604 }, { "epoch": 347.7402985074627, "grad_norm": 18.117555618286133, "learning_rate": 9.787698412698413e-06, "loss": 31.7104, "step": 14605 }, { "epoch": 347.7641791044776, "grad_norm": 17.258329391479492, "learning_rate": 9.787037037037038e-06, "loss": 32.4091, "step": 14606 }, { "epoch": 347.78805970149256, "grad_norm": 19.49449920654297, "learning_rate": 9.786375661375661e-06, "loss": 33.4336, "step": 14607 }, { "epoch": 347.81194029850747, "grad_norm": 18.486204147338867, "learning_rate": 9.785714285714286e-06, "loss": 31.5633, "step": 14608 }, { "epoch": 347.8358208955224, "grad_norm": 16.00359344482422, "learning_rate": 9.785052910052911e-06, "loss": 31.9202, "step": 14609 }, { "epoch": 347.85970149253734, "grad_norm": 14.867855072021484, "learning_rate": 9.784391534391536e-06, "loss": 33.3096, "step": 14610 }, { "epoch": 347.88358208955225, "grad_norm": 21.041004180908203, "learning_rate": 9.783730158730159e-06, "loss": 33.7092, "step": 14611 }, { "epoch": 347.90746268656716, "grad_norm": 21.410348892211914, "learning_rate": 9.783068783068784e-06, "loss": 32.6474, "step": 14612 }, { "epoch": 347.93134328358207, "grad_norm": 21.48059844970703, "learning_rate": 9.782407407407408e-06, "loss": 32.0023, "step": 14613 }, { "epoch": 347.95522388059703, "grad_norm": 14.269651412963867, "learning_rate": 9.781746031746032e-06, "loss": 32.7085, "step": 14614 }, { "epoch": 347.97910447761194, "grad_norm": 22.927631378173828, "learning_rate": 9.781084656084658e-06, "loss": 33.4512, "step": 14615 }, { "epoch": 348.0, "grad_norm": 17.4822998046875, "learning_rate": 9.780423280423281e-06, "loss": 27.6399, "step": 14616 }, { "epoch": 348.0238805970149, "grad_norm": 16.791461944580078, "learning_rate": 9.779761904761906e-06, "loss": 33.8503, "step": 14617 }, { "epoch": 348.0477611940299, "grad_norm": 15.67280387878418, "learning_rate": 9.77910052910053e-06, "loss": 31.505, "step": 14618 }, { "epoch": 348.0716417910448, "grad_norm": 20.893356323242188, "learning_rate": 9.778439153439154e-06, "loss": 33.9971, "step": 14619 }, { "epoch": 348.0955223880597, "grad_norm": 21.14359474182129, "learning_rate": 9.777777777777779e-06, "loss": 32.555, "step": 14620 }, { "epoch": 348.1194029850746, "grad_norm": 16.92644691467285, "learning_rate": 9.777116402116403e-06, "loss": 32.2008, "step": 14621 }, { "epoch": 348.14328358208957, "grad_norm": 15.187609672546387, "learning_rate": 9.776455026455027e-06, "loss": 32.3186, "step": 14622 }, { "epoch": 348.1671641791045, "grad_norm": 17.225370407104492, "learning_rate": 9.775793650793651e-06, "loss": 32.3924, "step": 14623 }, { "epoch": 348.1910447761194, "grad_norm": 17.131383895874023, "learning_rate": 9.775132275132276e-06, "loss": 32.3736, "step": 14624 }, { "epoch": 348.21492537313435, "grad_norm": 15.347402572631836, "learning_rate": 9.774470899470901e-06, "loss": 33.362, "step": 14625 }, { "epoch": 348.23880597014926, "grad_norm": 17.12799644470215, "learning_rate": 9.773809523809524e-06, "loss": 32.2947, "step": 14626 }, { "epoch": 348.26268656716417, "grad_norm": 16.040346145629883, "learning_rate": 9.773148148148149e-06, "loss": 31.6114, "step": 14627 }, { "epoch": 348.28656716417913, "grad_norm": 15.4306001663208, "learning_rate": 9.772486772486774e-06, "loss": 33.3488, "step": 14628 }, { "epoch": 348.31044776119404, "grad_norm": 20.037302017211914, "learning_rate": 9.771825396825397e-06, "loss": 32.9626, "step": 14629 }, { "epoch": 348.33432835820895, "grad_norm": 16.938810348510742, "learning_rate": 9.771164021164023e-06, "loss": 32.3464, "step": 14630 }, { "epoch": 348.35820895522386, "grad_norm": 16.684492111206055, "learning_rate": 9.770502645502646e-06, "loss": 31.795, "step": 14631 }, { "epoch": 348.3820895522388, "grad_norm": 20.565393447875977, "learning_rate": 9.769841269841271e-06, "loss": 32.8336, "step": 14632 }, { "epoch": 348.40597014925373, "grad_norm": 19.35353660583496, "learning_rate": 9.769179894179896e-06, "loss": 32.5364, "step": 14633 }, { "epoch": 348.42985074626864, "grad_norm": 16.575313568115234, "learning_rate": 9.768518518518519e-06, "loss": 33.0467, "step": 14634 }, { "epoch": 348.4537313432836, "grad_norm": 18.80159568786621, "learning_rate": 9.767857142857144e-06, "loss": 33.4302, "step": 14635 }, { "epoch": 348.4776119402985, "grad_norm": 19.14153289794922, "learning_rate": 9.767195767195769e-06, "loss": 32.5667, "step": 14636 }, { "epoch": 348.5014925373134, "grad_norm": 18.234790802001953, "learning_rate": 9.766534391534392e-06, "loss": 32.2728, "step": 14637 }, { "epoch": 348.52537313432833, "grad_norm": 17.12832260131836, "learning_rate": 9.765873015873017e-06, "loss": 32.2903, "step": 14638 }, { "epoch": 348.5492537313433, "grad_norm": 19.046112060546875, "learning_rate": 9.765211640211641e-06, "loss": 32.9503, "step": 14639 }, { "epoch": 348.5731343283582, "grad_norm": 14.653257369995117, "learning_rate": 9.764550264550266e-06, "loss": 32.4275, "step": 14640 }, { "epoch": 348.5970149253731, "grad_norm": 21.176921844482422, "learning_rate": 9.76388888888889e-06, "loss": 33.7491, "step": 14641 }, { "epoch": 348.6208955223881, "grad_norm": 17.440414428710938, "learning_rate": 9.763227513227514e-06, "loss": 32.8406, "step": 14642 }, { "epoch": 348.644776119403, "grad_norm": 18.87099838256836, "learning_rate": 9.762566137566139e-06, "loss": 32.2602, "step": 14643 }, { "epoch": 348.6686567164179, "grad_norm": 18.95827293395996, "learning_rate": 9.761904761904762e-06, "loss": 32.3678, "step": 14644 }, { "epoch": 348.6925373134328, "grad_norm": 19.204607009887695, "learning_rate": 9.761243386243387e-06, "loss": 32.2119, "step": 14645 }, { "epoch": 348.7164179104478, "grad_norm": 18.41114044189453, "learning_rate": 9.760582010582012e-06, "loss": 32.9687, "step": 14646 }, { "epoch": 348.7402985074627, "grad_norm": 13.804076194763184, "learning_rate": 9.759920634920635e-06, "loss": 33.004, "step": 14647 }, { "epoch": 348.7641791044776, "grad_norm": 15.167065620422363, "learning_rate": 9.759259259259261e-06, "loss": 30.597, "step": 14648 }, { "epoch": 348.78805970149256, "grad_norm": 14.439536094665527, "learning_rate": 9.758597883597884e-06, "loss": 33.1182, "step": 14649 }, { "epoch": 348.81194029850747, "grad_norm": 16.821346282958984, "learning_rate": 9.757936507936509e-06, "loss": 33.4213, "step": 14650 }, { "epoch": 348.8358208955224, "grad_norm": 16.579566955566406, "learning_rate": 9.757275132275134e-06, "loss": 32.6801, "step": 14651 }, { "epoch": 348.85970149253734, "grad_norm": 19.574453353881836, "learning_rate": 9.756613756613757e-06, "loss": 32.377, "step": 14652 }, { "epoch": 348.88358208955225, "grad_norm": 19.954803466796875, "learning_rate": 9.755952380952382e-06, "loss": 31.6744, "step": 14653 }, { "epoch": 348.90746268656716, "grad_norm": 20.58474349975586, "learning_rate": 9.755291005291007e-06, "loss": 30.9358, "step": 14654 }, { "epoch": 348.93134328358207, "grad_norm": 18.414278030395508, "learning_rate": 9.754629629629631e-06, "loss": 33.2836, "step": 14655 }, { "epoch": 348.95522388059703, "grad_norm": 20.93559455871582, "learning_rate": 9.753968253968254e-06, "loss": 33.688, "step": 14656 }, { "epoch": 348.97910447761194, "grad_norm": 18.284198760986328, "learning_rate": 9.75330687830688e-06, "loss": 33.2417, "step": 14657 }, { "epoch": 349.0, "grad_norm": 18.535316467285156, "learning_rate": 9.752645502645504e-06, "loss": 26.9225, "step": 14658 }, { "epoch": 349.0238805970149, "grad_norm": 17.07691764831543, "learning_rate": 9.751984126984127e-06, "loss": 32.4279, "step": 14659 }, { "epoch": 349.0477611940299, "grad_norm": 21.65434455871582, "learning_rate": 9.751322751322752e-06, "loss": 33.2138, "step": 14660 }, { "epoch": 349.0716417910448, "grad_norm": 19.45406723022461, "learning_rate": 9.750661375661377e-06, "loss": 32.5876, "step": 14661 }, { "epoch": 349.0955223880597, "grad_norm": 17.751808166503906, "learning_rate": 9.75e-06, "loss": 31.7013, "step": 14662 }, { "epoch": 349.1194029850746, "grad_norm": NaN, "learning_rate": 9.749338624338626e-06, "loss": 27.7308, "step": 14663 }, { "epoch": 349.14328358208957, "grad_norm": 17.530637741088867, "learning_rate": 9.749338624338626e-06, "loss": 32.323, "step": 14664 }, { "epoch": 349.1671641791045, "grad_norm": 17.759815216064453, "learning_rate": 9.74867724867725e-06, "loss": 31.6154, "step": 14665 }, { "epoch": 349.1910447761194, "grad_norm": 22.851415634155273, "learning_rate": 9.748015873015874e-06, "loss": 32.3775, "step": 14666 }, { "epoch": 349.21492537313435, "grad_norm": 16.060312271118164, "learning_rate": 9.747354497354499e-06, "loss": 33.3385, "step": 14667 }, { "epoch": 349.23880597014926, "grad_norm": 18.590045928955078, "learning_rate": 9.746693121693122e-06, "loss": 31.1666, "step": 14668 }, { "epoch": 349.26268656716417, "grad_norm": 16.344722747802734, "learning_rate": 9.746031746031747e-06, "loss": 33.0891, "step": 14669 }, { "epoch": 349.28656716417913, "grad_norm": 20.865840911865234, "learning_rate": 9.745370370370372e-06, "loss": 32.9344, "step": 14670 }, { "epoch": 349.31044776119404, "grad_norm": 18.58599853515625, "learning_rate": 9.744708994708997e-06, "loss": 32.9551, "step": 14671 }, { "epoch": 349.33432835820895, "grad_norm": 20.075489044189453, "learning_rate": 9.74404761904762e-06, "loss": 32.1846, "step": 14672 }, { "epoch": 349.35820895522386, "grad_norm": 20.111770629882812, "learning_rate": 9.743386243386244e-06, "loss": 32.8219, "step": 14673 }, { "epoch": 349.3820895522388, "grad_norm": 18.247739791870117, "learning_rate": 9.74272486772487e-06, "loss": 31.3079, "step": 14674 }, { "epoch": 349.40597014925373, "grad_norm": 16.764577865600586, "learning_rate": 9.742063492063492e-06, "loss": 33.5343, "step": 14675 }, { "epoch": 349.42985074626864, "grad_norm": 19.740215301513672, "learning_rate": 9.741402116402117e-06, "loss": 32.6692, "step": 14676 }, { "epoch": 349.4537313432836, "grad_norm": 16.972673416137695, "learning_rate": 9.740740740740742e-06, "loss": 32.0592, "step": 14677 }, { "epoch": 349.4776119402985, "grad_norm": 16.89126205444336, "learning_rate": 9.740079365079365e-06, "loss": 32.9391, "step": 14678 }, { "epoch": 349.5014925373134, "grad_norm": 16.81437873840332, "learning_rate": 9.73941798941799e-06, "loss": 33.5152, "step": 14679 }, { "epoch": 349.52537313432833, "grad_norm": 14.742659568786621, "learning_rate": 9.738756613756615e-06, "loss": 31.4651, "step": 14680 }, { "epoch": 349.5492537313433, "grad_norm": 15.249462127685547, "learning_rate": 9.73809523809524e-06, "loss": 32.7273, "step": 14681 }, { "epoch": 349.5731343283582, "grad_norm": 17.645551681518555, "learning_rate": 9.737433862433863e-06, "loss": 33.063, "step": 14682 }, { "epoch": 349.5970149253731, "grad_norm": 19.15085792541504, "learning_rate": 9.736772486772487e-06, "loss": 33.7299, "step": 14683 }, { "epoch": 349.6208955223881, "grad_norm": 19.513601303100586, "learning_rate": 9.736111111111112e-06, "loss": 32.3101, "step": 14684 }, { "epoch": 349.644776119403, "grad_norm": 19.61646270751953, "learning_rate": 9.735449735449735e-06, "loss": 32.3686, "step": 14685 }, { "epoch": 349.6686567164179, "grad_norm": 14.699602127075195, "learning_rate": 9.73478835978836e-06, "loss": 32.0377, "step": 14686 }, { "epoch": 349.6925373134328, "grad_norm": 18.042131423950195, "learning_rate": 9.734126984126985e-06, "loss": 31.0546, "step": 14687 }, { "epoch": 349.7164179104478, "grad_norm": 17.329553604125977, "learning_rate": 9.733465608465608e-06, "loss": 32.3948, "step": 14688 }, { "epoch": 349.7402985074627, "grad_norm": 20.620149612426758, "learning_rate": 9.732804232804234e-06, "loss": 32.8701, "step": 14689 }, { "epoch": 349.7641791044776, "grad_norm": 15.625349044799805, "learning_rate": 9.732142857142858e-06, "loss": 32.0927, "step": 14690 }, { "epoch": 349.78805970149256, "grad_norm": 20.403350830078125, "learning_rate": 9.731481481481482e-06, "loss": 32.8683, "step": 14691 }, { "epoch": 349.81194029850747, "grad_norm": 16.423643112182617, "learning_rate": 9.730820105820107e-06, "loss": 32.6511, "step": 14692 }, { "epoch": 349.8358208955224, "grad_norm": 17.448591232299805, "learning_rate": 9.73015873015873e-06, "loss": 32.8787, "step": 14693 }, { "epoch": 349.85970149253734, "grad_norm": 18.498205184936523, "learning_rate": 9.729497354497355e-06, "loss": 33.0482, "step": 14694 }, { "epoch": 349.88358208955225, "grad_norm": 24.00061798095703, "learning_rate": 9.72883597883598e-06, "loss": 33.0809, "step": 14695 }, { "epoch": 349.90746268656716, "grad_norm": 18.76821517944336, "learning_rate": 9.728174603174605e-06, "loss": 32.7319, "step": 14696 }, { "epoch": 349.93134328358207, "grad_norm": 20.155773162841797, "learning_rate": 9.727513227513228e-06, "loss": 33.1262, "step": 14697 }, { "epoch": 349.95522388059703, "grad_norm": 23.968116760253906, "learning_rate": 9.726851851851852e-06, "loss": 32.8412, "step": 14698 }, { "epoch": 349.97910447761194, "grad_norm": 19.672527313232422, "learning_rate": 9.726190476190477e-06, "loss": 32.5545, "step": 14699 }, { "epoch": 350.0, "grad_norm": 13.98314094543457, "learning_rate": 9.7255291005291e-06, "loss": 28.4527, "step": 14700 }, { "epoch": 350.0238805970149, "grad_norm": 16.52663803100586, "learning_rate": 9.724867724867725e-06, "loss": 32.3273, "step": 14701 }, { "epoch": 350.0477611940299, "grad_norm": 16.786659240722656, "learning_rate": 9.72420634920635e-06, "loss": 32.8609, "step": 14702 }, { "epoch": 350.0716417910448, "grad_norm": 17.514373779296875, "learning_rate": 9.723544973544973e-06, "loss": 32.098, "step": 14703 }, { "epoch": 350.0955223880597, "grad_norm": 15.79894733428955, "learning_rate": 9.7228835978836e-06, "loss": 32.8837, "step": 14704 }, { "epoch": 350.1194029850746, "grad_norm": 21.42318344116211, "learning_rate": 9.722222222222223e-06, "loss": 32.7655, "step": 14705 }, { "epoch": 350.14328358208957, "grad_norm": 18.59439468383789, "learning_rate": 9.721560846560847e-06, "loss": 31.6826, "step": 14706 }, { "epoch": 350.1671641791045, "grad_norm": 14.719908714294434, "learning_rate": 9.720899470899472e-06, "loss": 31.9127, "step": 14707 }, { "epoch": 350.1910447761194, "grad_norm": 17.84770393371582, "learning_rate": 9.720238095238095e-06, "loss": 32.6657, "step": 14708 }, { "epoch": 350.21492537313435, "grad_norm": 13.739038467407227, "learning_rate": 9.71957671957672e-06, "loss": 31.439, "step": 14709 }, { "epoch": 350.23880597014926, "grad_norm": 17.086442947387695, "learning_rate": 9.718915343915345e-06, "loss": 32.8535, "step": 14710 }, { "epoch": 350.26268656716417, "grad_norm": 16.6844482421875, "learning_rate": 9.71825396825397e-06, "loss": 33.1887, "step": 14711 }, { "epoch": 350.28656716417913, "grad_norm": 18.202463150024414, "learning_rate": 9.717592592592593e-06, "loss": 32.3802, "step": 14712 }, { "epoch": 350.31044776119404, "grad_norm": 17.434301376342773, "learning_rate": 9.716931216931218e-06, "loss": 31.6296, "step": 14713 }, { "epoch": 350.33432835820895, "grad_norm": 18.2510929107666, "learning_rate": 9.716269841269842e-06, "loss": 32.1291, "step": 14714 }, { "epoch": 350.35820895522386, "grad_norm": 16.6622371673584, "learning_rate": 9.715608465608466e-06, "loss": 32.2846, "step": 14715 }, { "epoch": 350.3820895522388, "grad_norm": NaN, "learning_rate": 9.71494708994709e-06, "loss": 46.9879, "step": 14716 }, { "epoch": 350.40597014925373, "grad_norm": 18.69403648376465, "learning_rate": 9.71494708994709e-06, "loss": 31.8159, "step": 14717 }, { "epoch": 350.42985074626864, "grad_norm": 15.543919563293457, "learning_rate": 9.714285714285715e-06, "loss": 33.0337, "step": 14718 }, { "epoch": 350.4537313432836, "grad_norm": 20.756275177001953, "learning_rate": 9.713624338624338e-06, "loss": 33.46, "step": 14719 }, { "epoch": 350.4776119402985, "grad_norm": 14.887667655944824, "learning_rate": 9.712962962962965e-06, "loss": 31.1977, "step": 14720 }, { "epoch": 350.5014925373134, "grad_norm": 24.108150482177734, "learning_rate": 9.712301587301588e-06, "loss": 33.2452, "step": 14721 }, { "epoch": 350.52537313432833, "grad_norm": 18.020376205444336, "learning_rate": 9.711640211640213e-06, "loss": 33.3585, "step": 14722 }, { "epoch": 350.5492537313433, "grad_norm": 25.169206619262695, "learning_rate": 9.710978835978837e-06, "loss": 33.5354, "step": 14723 }, { "epoch": 350.5731343283582, "grad_norm": 23.525800704956055, "learning_rate": 9.71031746031746e-06, "loss": 32.5876, "step": 14724 }, { "epoch": 350.5970149253731, "grad_norm": 19.09469223022461, "learning_rate": 9.709656084656085e-06, "loss": 31.3368, "step": 14725 }, { "epoch": 350.6208955223881, "grad_norm": 19.304349899291992, "learning_rate": 9.70899470899471e-06, "loss": 32.4693, "step": 14726 }, { "epoch": 350.644776119403, "grad_norm": 24.641857147216797, "learning_rate": 9.708333333333333e-06, "loss": 31.8913, "step": 14727 }, { "epoch": 350.6686567164179, "grad_norm": 19.130863189697266, "learning_rate": 9.707671957671958e-06, "loss": 33.6099, "step": 14728 }, { "epoch": 350.6925373134328, "grad_norm": 22.144309997558594, "learning_rate": 9.707010582010583e-06, "loss": 32.9643, "step": 14729 }, { "epoch": 350.7164179104478, "grad_norm": 22.078603744506836, "learning_rate": 9.706349206349208e-06, "loss": 33.8755, "step": 14730 }, { "epoch": 350.7402985074627, "grad_norm": 19.594648361206055, "learning_rate": 9.70568783068783e-06, "loss": 32.0336, "step": 14731 }, { "epoch": 350.7641791044776, "grad_norm": 17.195146560668945, "learning_rate": 9.705026455026456e-06, "loss": 33.3585, "step": 14732 }, { "epoch": 350.78805970149256, "grad_norm": 24.541912078857422, "learning_rate": 9.70436507936508e-06, "loss": 32.4815, "step": 14733 }, { "epoch": 350.81194029850747, "grad_norm": 21.507633209228516, "learning_rate": 9.703703703703703e-06, "loss": 33.0714, "step": 14734 }, { "epoch": 350.8358208955224, "grad_norm": 15.345576286315918, "learning_rate": 9.70304232804233e-06, "loss": 32.1094, "step": 14735 }, { "epoch": 350.85970149253734, "grad_norm": 17.41144371032715, "learning_rate": 9.702380952380953e-06, "loss": 33.1787, "step": 14736 }, { "epoch": 350.88358208955225, "grad_norm": 14.923789024353027, "learning_rate": 9.701719576719578e-06, "loss": 32.0986, "step": 14737 }, { "epoch": 350.90746268656716, "grad_norm": 17.747167587280273, "learning_rate": 9.701058201058203e-06, "loss": 32.5708, "step": 14738 }, { "epoch": 350.93134328358207, "grad_norm": 19.74225616455078, "learning_rate": 9.700396825396826e-06, "loss": 31.5536, "step": 14739 }, { "epoch": 350.95522388059703, "grad_norm": 21.36675262451172, "learning_rate": 9.69973544973545e-06, "loss": 33.7125, "step": 14740 }, { "epoch": 350.97910447761194, "grad_norm": 21.644737243652344, "learning_rate": 9.699074074074075e-06, "loss": 32.2303, "step": 14741 }, { "epoch": 351.0, "grad_norm": 17.01936912536621, "learning_rate": 9.698412698412698e-06, "loss": 28.8026, "step": 14742 }, { "epoch": 351.0238805970149, "grad_norm": 17.195436477661133, "learning_rate": 9.697751322751323e-06, "loss": 32.2544, "step": 14743 }, { "epoch": 351.0477611940299, "grad_norm": 20.987239837646484, "learning_rate": 9.697089947089948e-06, "loss": 32.4869, "step": 14744 }, { "epoch": 351.0716417910448, "grad_norm": 16.024938583374023, "learning_rate": 9.696428571428573e-06, "loss": 31.1712, "step": 14745 }, { "epoch": 351.0955223880597, "grad_norm": 18.800140380859375, "learning_rate": 9.695767195767196e-06, "loss": 31.1837, "step": 14746 }, { "epoch": 351.1194029850746, "grad_norm": 18.07328224182129, "learning_rate": 9.69510582010582e-06, "loss": 33.2909, "step": 14747 }, { "epoch": 351.14328358208957, "grad_norm": 18.07699966430664, "learning_rate": 9.694444444444446e-06, "loss": 32.3287, "step": 14748 }, { "epoch": 351.1671641791045, "grad_norm": 21.439491271972656, "learning_rate": 9.693783068783069e-06, "loss": 32.8626, "step": 14749 }, { "epoch": 351.1910447761194, "grad_norm": 20.884723663330078, "learning_rate": 9.693121693121693e-06, "loss": 31.8939, "step": 14750 }, { "epoch": 351.21492537313435, "grad_norm": 17.19676971435547, "learning_rate": 9.692460317460318e-06, "loss": 32.0953, "step": 14751 }, { "epoch": 351.23880597014926, "grad_norm": 18.076078414916992, "learning_rate": 9.691798941798943e-06, "loss": 32.7481, "step": 14752 }, { "epoch": 351.26268656716417, "grad_norm": 17.14596939086914, "learning_rate": 9.691137566137568e-06, "loss": 32.3306, "step": 14753 }, { "epoch": 351.28656716417913, "grad_norm": 15.715779304504395, "learning_rate": 9.690476190476191e-06, "loss": 32.7884, "step": 14754 }, { "epoch": 351.31044776119404, "grad_norm": 17.186420440673828, "learning_rate": 9.689814814814816e-06, "loss": 32.7943, "step": 14755 }, { "epoch": 351.33432835820895, "grad_norm": 19.32579803466797, "learning_rate": 9.68915343915344e-06, "loss": 33.0978, "step": 14756 }, { "epoch": 351.35820895522386, "grad_norm": 14.649863243103027, "learning_rate": 9.688492063492064e-06, "loss": 30.6359, "step": 14757 }, { "epoch": 351.3820895522388, "grad_norm": 20.003084182739258, "learning_rate": 9.687830687830688e-06, "loss": 33.1828, "step": 14758 }, { "epoch": 351.40597014925373, "grad_norm": 22.395776748657227, "learning_rate": 9.687169312169313e-06, "loss": 32.9852, "step": 14759 }, { "epoch": 351.42985074626864, "grad_norm": 18.03234100341797, "learning_rate": 9.686507936507938e-06, "loss": 31.451, "step": 14760 }, { "epoch": 351.4537313432836, "grad_norm": 18.269107818603516, "learning_rate": 9.685846560846561e-06, "loss": 32.2838, "step": 14761 }, { "epoch": 351.4776119402985, "grad_norm": 18.537240982055664, "learning_rate": 9.685185185185186e-06, "loss": 32.2356, "step": 14762 }, { "epoch": 351.5014925373134, "grad_norm": 28.76149559020996, "learning_rate": 9.68452380952381e-06, "loss": 33.0583, "step": 14763 }, { "epoch": 351.52537313432833, "grad_norm": NaN, "learning_rate": 9.683862433862434e-06, "loss": 41.3145, "step": 14764 }, { "epoch": 351.5492537313433, "grad_norm": 17.292757034301758, "learning_rate": 9.683862433862434e-06, "loss": 32.472, "step": 14765 }, { "epoch": 351.5731343283582, "grad_norm": 23.66617774963379, "learning_rate": 9.683201058201059e-06, "loss": 32.015, "step": 14766 }, { "epoch": 351.5970149253731, "grad_norm": 26.030893325805664, "learning_rate": 9.682539682539683e-06, "loss": 31.8104, "step": 14767 }, { "epoch": 351.6208955223881, "grad_norm": 17.178730010986328, "learning_rate": 9.681878306878307e-06, "loss": 33.3612, "step": 14768 }, { "epoch": 351.644776119403, "grad_norm": 29.958009719848633, "learning_rate": 9.681216931216933e-06, "loss": 32.4896, "step": 14769 }, { "epoch": 351.6686567164179, "grad_norm": 19.59082794189453, "learning_rate": 9.680555555555556e-06, "loss": 33.4508, "step": 14770 }, { "epoch": 351.6925373134328, "grad_norm": 27.186344146728516, "learning_rate": 9.679894179894181e-06, "loss": 32.5828, "step": 14771 }, { "epoch": 351.7164179104478, "grad_norm": 24.611085891723633, "learning_rate": 9.679232804232806e-06, "loss": 33.0101, "step": 14772 }, { "epoch": 351.7402985074627, "grad_norm": 16.794775009155273, "learning_rate": 9.678571428571429e-06, "loss": 32.1631, "step": 14773 }, { "epoch": 351.7641791044776, "grad_norm": 27.443695068359375, "learning_rate": 9.677910052910054e-06, "loss": 33.0032, "step": 14774 }, { "epoch": 351.78805970149256, "grad_norm": 19.315256118774414, "learning_rate": 9.677248677248678e-06, "loss": 32.4942, "step": 14775 }, { "epoch": 351.81194029850747, "grad_norm": 19.994688034057617, "learning_rate": 9.676587301587303e-06, "loss": 31.8404, "step": 14776 }, { "epoch": 351.8358208955224, "grad_norm": 18.42207908630371, "learning_rate": 9.675925925925926e-06, "loss": 33.3821, "step": 14777 }, { "epoch": 351.85970149253734, "grad_norm": 15.855660438537598, "learning_rate": 9.675264550264551e-06, "loss": 32.95, "step": 14778 }, { "epoch": 351.88358208955225, "grad_norm": 18.28765869140625, "learning_rate": 9.674603174603176e-06, "loss": 32.3438, "step": 14779 }, { "epoch": 351.90746268656716, "grad_norm": 20.875213623046875, "learning_rate": 9.673941798941799e-06, "loss": 32.5501, "step": 14780 }, { "epoch": 351.93134328358207, "grad_norm": 19.235822677612305, "learning_rate": 9.673280423280424e-06, "loss": 32.7862, "step": 14781 }, { "epoch": 351.95522388059703, "grad_norm": 13.67168140411377, "learning_rate": 9.672619047619049e-06, "loss": 32.4176, "step": 14782 }, { "epoch": 351.97910447761194, "grad_norm": 15.900962829589844, "learning_rate": 9.671957671957672e-06, "loss": 32.442, "step": 14783 }, { "epoch": 352.0, "grad_norm": 21.373632431030273, "learning_rate": 9.671296296296298e-06, "loss": 29.1896, "step": 14784 }, { "epoch": 352.0238805970149, "grad_norm": 16.60938262939453, "learning_rate": 9.670634920634921e-06, "loss": 32.0761, "step": 14785 }, { "epoch": 352.0477611940299, "grad_norm": 19.753610610961914, "learning_rate": 9.669973544973546e-06, "loss": 32.3375, "step": 14786 }, { "epoch": 352.0716417910448, "grad_norm": 25.297861099243164, "learning_rate": 9.669312169312171e-06, "loss": 31.8711, "step": 14787 }, { "epoch": 352.0955223880597, "grad_norm": 17.906513214111328, "learning_rate": 9.668650793650794e-06, "loss": 32.5138, "step": 14788 }, { "epoch": 352.1194029850746, "grad_norm": 23.18354034423828, "learning_rate": 9.667989417989419e-06, "loss": 33.2922, "step": 14789 }, { "epoch": 352.14328358208957, "grad_norm": 22.812021255493164, "learning_rate": 9.667328042328044e-06, "loss": 32.5472, "step": 14790 }, { "epoch": 352.1671641791045, "grad_norm": 21.94281005859375, "learning_rate": 9.666666666666667e-06, "loss": 33.1584, "step": 14791 }, { "epoch": 352.1910447761194, "grad_norm": 16.490760803222656, "learning_rate": 9.666005291005292e-06, "loss": 32.7039, "step": 14792 }, { "epoch": 352.21492537313435, "grad_norm": 25.750459671020508, "learning_rate": 9.665343915343916e-06, "loss": 32.4468, "step": 14793 }, { "epoch": 352.23880597014926, "grad_norm": 17.788372039794922, "learning_rate": 9.664682539682541e-06, "loss": 31.177, "step": 14794 }, { "epoch": 352.26268656716417, "grad_norm": 17.548831939697266, "learning_rate": 9.664021164021164e-06, "loss": 31.8691, "step": 14795 }, { "epoch": 352.28656716417913, "grad_norm": 28.672712326049805, "learning_rate": 9.663359788359789e-06, "loss": 33.6552, "step": 14796 }, { "epoch": 352.31044776119404, "grad_norm": 17.572633743286133, "learning_rate": 9.662698412698414e-06, "loss": 32.707, "step": 14797 }, { "epoch": 352.33432835820895, "grad_norm": 22.447053909301758, "learning_rate": 9.662037037037037e-06, "loss": 31.766, "step": 14798 }, { "epoch": 352.35820895522386, "grad_norm": 25.783077239990234, "learning_rate": 9.661375661375663e-06, "loss": 31.7204, "step": 14799 }, { "epoch": 352.3820895522388, "grad_norm": 16.482032775878906, "learning_rate": 9.660714285714287e-06, "loss": 31.9653, "step": 14800 }, { "epoch": 352.40597014925373, "grad_norm": 28.299856185913086, "learning_rate": 9.660052910052911e-06, "loss": 32.7796, "step": 14801 }, { "epoch": 352.42985074626864, "grad_norm": 23.327533721923828, "learning_rate": 9.659391534391536e-06, "loss": 32.8088, "step": 14802 }, { "epoch": 352.4537313432836, "grad_norm": 18.625377655029297, "learning_rate": 9.65873015873016e-06, "loss": 32.1429, "step": 14803 }, { "epoch": 352.4776119402985, "grad_norm": NaN, "learning_rate": 9.658068783068784e-06, "loss": 44.9996, "step": 14804 }, { "epoch": 352.5014925373134, "grad_norm": 28.77292251586914, "learning_rate": 9.658068783068784e-06, "loss": 33.0945, "step": 14805 }, { "epoch": 352.52537313432833, "grad_norm": 19.699316024780273, "learning_rate": 9.657407407407409e-06, "loss": 32.5718, "step": 14806 }, { "epoch": 352.5492537313433, "grad_norm": 21.003192901611328, "learning_rate": 9.656746031746032e-06, "loss": 32.6405, "step": 14807 }, { "epoch": 352.5731343283582, "grad_norm": 24.010530471801758, "learning_rate": 9.656084656084657e-06, "loss": 32.8653, "step": 14808 }, { "epoch": 352.5970149253731, "grad_norm": 20.95771598815918, "learning_rate": 9.655423280423282e-06, "loss": 31.7435, "step": 14809 }, { "epoch": 352.6208955223881, "grad_norm": 16.861574172973633, "learning_rate": 9.654761904761906e-06, "loss": 31.9477, "step": 14810 }, { "epoch": 352.644776119403, "grad_norm": 29.624996185302734, "learning_rate": 9.65410052910053e-06, "loss": 32.9392, "step": 14811 }, { "epoch": 352.6686567164179, "grad_norm": 17.968767166137695, "learning_rate": 9.653439153439154e-06, "loss": 31.531, "step": 14812 }, { "epoch": 352.6925373134328, "grad_norm": 22.400312423706055, "learning_rate": 9.652777777777779e-06, "loss": 32.9245, "step": 14813 }, { "epoch": 352.7164179104478, "grad_norm": 27.22949981689453, "learning_rate": 9.652116402116402e-06, "loss": 33.7673, "step": 14814 }, { "epoch": 352.7402985074627, "grad_norm": 17.837738037109375, "learning_rate": 9.651455026455027e-06, "loss": 32.177, "step": 14815 }, { "epoch": 352.7641791044776, "grad_norm": 27.297584533691406, "learning_rate": 9.650793650793652e-06, "loss": 31.3903, "step": 14816 }, { "epoch": 352.78805970149256, "grad_norm": 19.96441078186035, "learning_rate": 9.650132275132276e-06, "loss": 33.6202, "step": 14817 }, { "epoch": 352.81194029850747, "grad_norm": 21.580228805541992, "learning_rate": 9.649470899470901e-06, "loss": 31.7302, "step": 14818 }, { "epoch": 352.8358208955224, "grad_norm": 24.934663772583008, "learning_rate": 9.648809523809524e-06, "loss": 32.5863, "step": 14819 }, { "epoch": 352.85970149253734, "grad_norm": 17.15755271911621, "learning_rate": 9.64814814814815e-06, "loss": 32.2025, "step": 14820 }, { "epoch": 352.88358208955225, "grad_norm": 30.80393409729004, "learning_rate": 9.647486772486774e-06, "loss": 31.6753, "step": 14821 }, { "epoch": 352.90746268656716, "grad_norm": 20.597753524780273, "learning_rate": 9.646825396825397e-06, "loss": 32.8839, "step": 14822 }, { "epoch": 352.93134328358207, "grad_norm": 27.592512130737305, "learning_rate": 9.646164021164022e-06, "loss": 33.2483, "step": 14823 }, { "epoch": 352.95522388059703, "grad_norm": 22.31157875061035, "learning_rate": 9.645502645502647e-06, "loss": 33.4955, "step": 14824 }, { "epoch": 352.97910447761194, "grad_norm": 24.850711822509766, "learning_rate": 9.644841269841271e-06, "loss": 32.5427, "step": 14825 }, { "epoch": 353.0, "grad_norm": 25.0136661529541, "learning_rate": 9.644179894179895e-06, "loss": 27.6829, "step": 14826 }, { "epoch": 353.0238805970149, "grad_norm": 18.588224411010742, "learning_rate": 9.64351851851852e-06, "loss": 32.0289, "step": 14827 }, { "epoch": 353.0477611940299, "grad_norm": 27.74751091003418, "learning_rate": 9.642857142857144e-06, "loss": 31.8052, "step": 14828 }, { "epoch": 353.0716417910448, "grad_norm": 19.6511173248291, "learning_rate": 9.642195767195767e-06, "loss": 31.3612, "step": 14829 }, { "epoch": 353.0955223880597, "grad_norm": 25.314407348632812, "learning_rate": 9.641534391534392e-06, "loss": 33.1017, "step": 14830 }, { "epoch": 353.1194029850746, "grad_norm": 25.031667709350586, "learning_rate": 9.640873015873017e-06, "loss": 32.6708, "step": 14831 }, { "epoch": 353.14328358208957, "grad_norm": 17.91745948791504, "learning_rate": 9.64021164021164e-06, "loss": 33.5659, "step": 14832 }, { "epoch": 353.1671641791045, "grad_norm": 33.04819869995117, "learning_rate": 9.639550264550266e-06, "loss": 33.0337, "step": 14833 }, { "epoch": 353.1910447761194, "grad_norm": 21.4101505279541, "learning_rate": 9.63888888888889e-06, "loss": 32.53, "step": 14834 }, { "epoch": 353.21492537313435, "grad_norm": 38.68268585205078, "learning_rate": 9.638227513227514e-06, "loss": 32.503, "step": 14835 }, { "epoch": 353.23880597014926, "grad_norm": 24.02882194519043, "learning_rate": 9.63756613756614e-06, "loss": 32.7662, "step": 14836 }, { "epoch": 353.26268656716417, "grad_norm": 41.039188385009766, "learning_rate": 9.636904761904762e-06, "loss": 33.1644, "step": 14837 }, { "epoch": 353.28656716417913, "grad_norm": 29.76303482055664, "learning_rate": 9.636243386243387e-06, "loss": 32.5574, "step": 14838 }, { "epoch": 353.31044776119404, "grad_norm": 43.067386627197266, "learning_rate": 9.635582010582012e-06, "loss": 31.4812, "step": 14839 }, { "epoch": 353.33432835820895, "grad_norm": 40.734962463378906, "learning_rate": 9.634920634920637e-06, "loss": 31.8917, "step": 14840 }, { "epoch": 353.35820895522386, "grad_norm": 25.5545597076416, "learning_rate": 9.63425925925926e-06, "loss": 32.9401, "step": 14841 }, { "epoch": 353.3820895522388, "grad_norm": 30.88152503967285, "learning_rate": 9.633597883597885e-06, "loss": 31.2318, "step": 14842 }, { "epoch": 353.40597014925373, "grad_norm": 28.586931228637695, "learning_rate": 9.63293650793651e-06, "loss": 32.8859, "step": 14843 }, { "epoch": 353.42985074626864, "grad_norm": 23.067806243896484, "learning_rate": 9.632275132275132e-06, "loss": 32.103, "step": 14844 }, { "epoch": 353.4537313432836, "grad_norm": 39.63471221923828, "learning_rate": 9.631613756613757e-06, "loss": 31.1948, "step": 14845 }, { "epoch": 353.4776119402985, "grad_norm": 29.925830841064453, "learning_rate": 9.630952380952382e-06, "loss": 32.4599, "step": 14846 }, { "epoch": 353.5014925373134, "grad_norm": 36.394508361816406, "learning_rate": 9.630291005291005e-06, "loss": 33.2981, "step": 14847 }, { "epoch": 353.52537313432833, "grad_norm": 34.93574523925781, "learning_rate": 9.62962962962963e-06, "loss": 32.0258, "step": 14848 }, { "epoch": 353.5492537313433, "grad_norm": 29.750062942504883, "learning_rate": 9.628968253968255e-06, "loss": 31.8738, "step": 14849 }, { "epoch": 353.5731343283582, "grad_norm": 26.949481964111328, "learning_rate": 9.62830687830688e-06, "loss": 32.2195, "step": 14850 }, { "epoch": 353.5970149253731, "grad_norm": 32.741798400878906, "learning_rate": 9.627645502645503e-06, "loss": 32.9702, "step": 14851 }, { "epoch": 353.6208955223881, "grad_norm": 29.208690643310547, "learning_rate": 9.626984126984127e-06, "loss": 32.2488, "step": 14852 }, { "epoch": 353.644776119403, "grad_norm": 40.328269958496094, "learning_rate": 9.626322751322752e-06, "loss": 32.2062, "step": 14853 }, { "epoch": 353.6686567164179, "grad_norm": 35.718238830566406, "learning_rate": 9.625661375661375e-06, "loss": 32.1761, "step": 14854 }, { "epoch": 353.6925373134328, "grad_norm": 32.72736740112305, "learning_rate": 9.625e-06, "loss": 32.0387, "step": 14855 }, { "epoch": 353.7164179104478, "grad_norm": 28.951156616210938, "learning_rate": 9.624338624338625e-06, "loss": 32.8885, "step": 14856 }, { "epoch": 353.7402985074627, "grad_norm": 30.452449798583984, "learning_rate": 9.62367724867725e-06, "loss": 31.3375, "step": 14857 }, { "epoch": 353.7641791044776, "grad_norm": 29.340457916259766, "learning_rate": 9.623015873015875e-06, "loss": 33.4326, "step": 14858 }, { "epoch": 353.78805970149256, "grad_norm": 36.30168151855469, "learning_rate": 9.622354497354498e-06, "loss": 32.6153, "step": 14859 }, { "epoch": 353.81194029850747, "grad_norm": 29.859689712524414, "learning_rate": 9.621693121693122e-06, "loss": 32.4116, "step": 14860 }, { "epoch": 353.8358208955224, "grad_norm": 30.943918228149414, "learning_rate": 9.621031746031747e-06, "loss": 31.7455, "step": 14861 }, { "epoch": 353.85970149253734, "grad_norm": 29.39823341369629, "learning_rate": 9.62037037037037e-06, "loss": 33.0786, "step": 14862 }, { "epoch": 353.88358208955225, "grad_norm": 34.88687515258789, "learning_rate": 9.619708994708995e-06, "loss": 32.0198, "step": 14863 }, { "epoch": 353.90746268656716, "grad_norm": 26.524593353271484, "learning_rate": 9.61904761904762e-06, "loss": 32.578, "step": 14864 }, { "epoch": 353.93134328358207, "grad_norm": 36.95452117919922, "learning_rate": 9.618386243386245e-06, "loss": 32.7933, "step": 14865 }, { "epoch": 353.95522388059703, "grad_norm": 31.791065216064453, "learning_rate": 9.617724867724868e-06, "loss": 32.5098, "step": 14866 }, { "epoch": 353.97910447761194, "grad_norm": 31.311098098754883, "learning_rate": 9.617063492063493e-06, "loss": 33.022, "step": 14867 }, { "epoch": 354.0, "grad_norm": 23.458200454711914, "learning_rate": 9.616402116402117e-06, "loss": 28.9878, "step": 14868 }, { "epoch": 354.0238805970149, "grad_norm": 32.75605010986328, "learning_rate": 9.61574074074074e-06, "loss": 31.8392, "step": 14869 }, { "epoch": 354.0477611940299, "grad_norm": 27.395606994628906, "learning_rate": 9.615079365079365e-06, "loss": 32.863, "step": 14870 }, { "epoch": 354.0716417910448, "grad_norm": 35.647464752197266, "learning_rate": 9.61441798941799e-06, "loss": 32.3482, "step": 14871 }, { "epoch": 354.0955223880597, "grad_norm": 32.841495513916016, "learning_rate": 9.613756613756613e-06, "loss": 32.7021, "step": 14872 }, { "epoch": 354.1194029850746, "grad_norm": 30.325456619262695, "learning_rate": 9.61309523809524e-06, "loss": 31.0254, "step": 14873 }, { "epoch": 354.14328358208957, "grad_norm": 30.15027618408203, "learning_rate": 9.612433862433863e-06, "loss": 32.3485, "step": 14874 }, { "epoch": 354.1671641791045, "grad_norm": 30.046512603759766, "learning_rate": 9.611772486772488e-06, "loss": 31.0794, "step": 14875 }, { "epoch": 354.1910447761194, "grad_norm": 28.54079246520996, "learning_rate": 9.611111111111112e-06, "loss": 32.5211, "step": 14876 }, { "epoch": 354.21492537313435, "grad_norm": 33.021080017089844, "learning_rate": 9.610449735449736e-06, "loss": 32.0495, "step": 14877 }, { "epoch": 354.23880597014926, "grad_norm": 28.060853958129883, "learning_rate": 9.60978835978836e-06, "loss": 32.697, "step": 14878 }, { "epoch": 354.26268656716417, "grad_norm": 35.05024719238281, "learning_rate": 9.609126984126985e-06, "loss": 31.7419, "step": 14879 }, { "epoch": 354.28656716417913, "grad_norm": 27.963586807250977, "learning_rate": 9.60846560846561e-06, "loss": 33.1233, "step": 14880 }, { "epoch": 354.31044776119404, "grad_norm": 32.25556945800781, "learning_rate": 9.607804232804233e-06, "loss": 32.6409, "step": 14881 }, { "epoch": 354.33432835820895, "grad_norm": 27.298051834106445, "learning_rate": 9.607142857142858e-06, "loss": 32.189, "step": 14882 }, { "epoch": 354.35820895522386, "grad_norm": 33.5682487487793, "learning_rate": 9.606481481481483e-06, "loss": 31.9503, "step": 14883 }, { "epoch": 354.3820895522388, "grad_norm": 27.984455108642578, "learning_rate": 9.605820105820106e-06, "loss": 32.4672, "step": 14884 }, { "epoch": 354.40597014925373, "grad_norm": 32.073486328125, "learning_rate": 9.60515873015873e-06, "loss": 32.324, "step": 14885 }, { "epoch": 354.42985074626864, "grad_norm": 29.038410186767578, "learning_rate": 9.604497354497355e-06, "loss": 31.9666, "step": 14886 }, { "epoch": 354.4537313432836, "grad_norm": 31.138967514038086, "learning_rate": 9.603835978835978e-06, "loss": 31.3937, "step": 14887 }, { "epoch": 354.4776119402985, "grad_norm": 28.261775970458984, "learning_rate": 9.603174603174605e-06, "loss": 31.8848, "step": 14888 }, { "epoch": 354.5014925373134, "grad_norm": 32.29547119140625, "learning_rate": 9.602513227513228e-06, "loss": 32.8057, "step": 14889 }, { "epoch": 354.52537313432833, "grad_norm": 28.475051879882812, "learning_rate": 9.601851851851853e-06, "loss": 33.2205, "step": 14890 }, { "epoch": 354.5492537313433, "grad_norm": 32.84428024291992, "learning_rate": 9.601190476190478e-06, "loss": 32.2397, "step": 14891 }, { "epoch": 354.5731343283582, "grad_norm": 28.925617218017578, "learning_rate": 9.6005291005291e-06, "loss": 32.1004, "step": 14892 }, { "epoch": 354.5970149253731, "grad_norm": 32.93207931518555, "learning_rate": 9.599867724867726e-06, "loss": 33.4937, "step": 14893 }, { "epoch": 354.6208955223881, "grad_norm": 25.15668487548828, "learning_rate": 9.59920634920635e-06, "loss": 33.2984, "step": 14894 }, { "epoch": 354.644776119403, "grad_norm": 31.57761001586914, "learning_rate": 9.598544973544973e-06, "loss": 31.2805, "step": 14895 }, { "epoch": 354.6686567164179, "grad_norm": 26.353530883789062, "learning_rate": 9.597883597883598e-06, "loss": 31.7676, "step": 14896 }, { "epoch": 354.6925373134328, "grad_norm": 29.290597915649414, "learning_rate": 9.597222222222223e-06, "loss": 31.8581, "step": 14897 }, { "epoch": 354.7164179104478, "grad_norm": 29.742828369140625, "learning_rate": 9.596560846560848e-06, "loss": 33.3169, "step": 14898 }, { "epoch": 354.7402985074627, "grad_norm": 28.00301170349121, "learning_rate": 9.595899470899471e-06, "loss": 34.1137, "step": 14899 }, { "epoch": 354.7641791044776, "grad_norm": 24.516002655029297, "learning_rate": 9.595238095238096e-06, "loss": 32.3348, "step": 14900 }, { "epoch": 354.78805970149256, "grad_norm": 29.55282974243164, "learning_rate": 9.59457671957672e-06, "loss": 32.2715, "step": 14901 }, { "epoch": 354.81194029850747, "grad_norm": 23.89673614501953, "learning_rate": 9.593915343915344e-06, "loss": 32.8552, "step": 14902 }, { "epoch": 354.8358208955224, "grad_norm": 33.77177429199219, "learning_rate": 9.59325396825397e-06, "loss": 32.3179, "step": 14903 }, { "epoch": 354.85970149253734, "grad_norm": 29.023235321044922, "learning_rate": 9.592592592592593e-06, "loss": 32.3885, "step": 14904 }, { "epoch": 354.88358208955225, "grad_norm": 29.500022888183594, "learning_rate": 9.591931216931218e-06, "loss": 32.2777, "step": 14905 }, { "epoch": 354.90746268656716, "grad_norm": 28.313243865966797, "learning_rate": 9.591269841269843e-06, "loss": 33.1268, "step": 14906 }, { "epoch": 354.93134328358207, "grad_norm": 28.699420928955078, "learning_rate": 9.590608465608466e-06, "loss": 32.0499, "step": 14907 }, { "epoch": 354.95522388059703, "grad_norm": 24.747024536132812, "learning_rate": 9.58994708994709e-06, "loss": 33.132, "step": 14908 }, { "epoch": 354.97910447761194, "grad_norm": 24.802593231201172, "learning_rate": 9.589285714285716e-06, "loss": 31.8087, "step": 14909 }, { "epoch": 355.0, "grad_norm": 19.17487335205078, "learning_rate": 9.588624338624339e-06, "loss": 28.8846, "step": 14910 }, { "epoch": 355.0238805970149, "grad_norm": 28.91864776611328, "learning_rate": 9.587962962962963e-06, "loss": 32.7224, "step": 14911 }, { "epoch": 355.0477611940299, "grad_norm": 21.518117904663086, "learning_rate": 9.587301587301588e-06, "loss": 33.0438, "step": 14912 }, { "epoch": 355.0716417910448, "grad_norm": 27.8236083984375, "learning_rate": 9.586640211640213e-06, "loss": 32.5284, "step": 14913 }, { "epoch": 355.0955223880597, "grad_norm": 24.90439224243164, "learning_rate": 9.585978835978836e-06, "loss": 32.3166, "step": 14914 }, { "epoch": 355.1194029850746, "grad_norm": 24.12470054626465, "learning_rate": 9.585317460317461e-06, "loss": 32.409, "step": 14915 }, { "epoch": 355.14328358208957, "grad_norm": 24.157527923583984, "learning_rate": 9.584656084656086e-06, "loss": 30.7544, "step": 14916 }, { "epoch": 355.1671641791045, "grad_norm": 22.023895263671875, "learning_rate": 9.583994708994709e-06, "loss": 31.9371, "step": 14917 }, { "epoch": 355.1910447761194, "grad_norm": 20.419158935546875, "learning_rate": 9.583333333333335e-06, "loss": 31.5149, "step": 14918 }, { "epoch": 355.21492537313435, "grad_norm": 19.5517520904541, "learning_rate": 9.582671957671958e-06, "loss": 31.7053, "step": 14919 }, { "epoch": 355.23880597014926, "grad_norm": 16.270599365234375, "learning_rate": 9.582010582010583e-06, "loss": 32.5354, "step": 14920 }, { "epoch": 355.26268656716417, "grad_norm": 27.07267189025879, "learning_rate": 9.581349206349208e-06, "loss": 33.035, "step": 14921 }, { "epoch": 355.28656716417913, "grad_norm": 18.50597381591797, "learning_rate": 9.580687830687831e-06, "loss": 32.2181, "step": 14922 }, { "epoch": 355.31044776119404, "grad_norm": 26.5690975189209, "learning_rate": 9.580026455026456e-06, "loss": 32.1257, "step": 14923 }, { "epoch": 355.33432835820895, "grad_norm": 21.633460998535156, "learning_rate": 9.57936507936508e-06, "loss": 31.5428, "step": 14924 }, { "epoch": 355.35820895522386, "grad_norm": 21.906354904174805, "learning_rate": 9.578703703703704e-06, "loss": 32.6405, "step": 14925 }, { "epoch": 355.3820895522388, "grad_norm": 20.9173641204834, "learning_rate": 9.578042328042329e-06, "loss": 32.5555, "step": 14926 }, { "epoch": 355.40597014925373, "grad_norm": 20.949565887451172, "learning_rate": 9.577380952380953e-06, "loss": 32.351, "step": 14927 }, { "epoch": 355.42985074626864, "grad_norm": 19.44186019897461, "learning_rate": 9.576719576719578e-06, "loss": 31.9537, "step": 14928 }, { "epoch": 355.4537313432836, "grad_norm": 20.792724609375, "learning_rate": 9.576058201058201e-06, "loss": 32.3724, "step": 14929 }, { "epoch": 355.4776119402985, "grad_norm": 17.375606536865234, "learning_rate": 9.575396825396826e-06, "loss": 32.4854, "step": 14930 }, { "epoch": 355.5014925373134, "grad_norm": 22.074893951416016, "learning_rate": 9.574735449735451e-06, "loss": 32.8543, "step": 14931 }, { "epoch": 355.52537313432833, "grad_norm": 15.514545440673828, "learning_rate": 9.574074074074074e-06, "loss": 32.9056, "step": 14932 }, { "epoch": 355.5492537313433, "grad_norm": 21.257991790771484, "learning_rate": 9.573412698412699e-06, "loss": 32.339, "step": 14933 }, { "epoch": 355.5731343283582, "grad_norm": 17.15261459350586, "learning_rate": 9.572751322751324e-06, "loss": 32.3944, "step": 14934 }, { "epoch": 355.5970149253731, "grad_norm": 21.776504516601562, "learning_rate": 9.572089947089947e-06, "loss": 32.9607, "step": 14935 }, { "epoch": 355.6208955223881, "grad_norm": 18.435537338256836, "learning_rate": 9.571428571428573e-06, "loss": 31.9289, "step": 14936 }, { "epoch": 355.644776119403, "grad_norm": 21.378620147705078, "learning_rate": 9.570767195767196e-06, "loss": 31.3542, "step": 14937 }, { "epoch": 355.6686567164179, "grad_norm": 20.051088333129883, "learning_rate": 9.570105820105821e-06, "loss": 33.0266, "step": 14938 }, { "epoch": 355.6925373134328, "grad_norm": 17.859416961669922, "learning_rate": 9.569444444444446e-06, "loss": 32.055, "step": 14939 }, { "epoch": 355.7164179104478, "grad_norm": 20.963573455810547, "learning_rate": 9.568783068783069e-06, "loss": 31.4541, "step": 14940 }, { "epoch": 355.7402985074627, "grad_norm": 19.61673355102539, "learning_rate": 9.568121693121694e-06, "loss": 32.1547, "step": 14941 }, { "epoch": 355.7641791044776, "grad_norm": 16.462936401367188, "learning_rate": 9.567460317460319e-06, "loss": 32.4982, "step": 14942 }, { "epoch": 355.78805970149256, "grad_norm": 17.522436141967773, "learning_rate": 9.566798941798943e-06, "loss": 32.8807, "step": 14943 }, { "epoch": 355.81194029850747, "grad_norm": 19.806222915649414, "learning_rate": 9.566137566137567e-06, "loss": 32.1656, "step": 14944 }, { "epoch": 355.8358208955224, "grad_norm": 21.00943374633789, "learning_rate": 9.565476190476191e-06, "loss": 33.3594, "step": 14945 }, { "epoch": 355.85970149253734, "grad_norm": 18.697425842285156, "learning_rate": 9.564814814814816e-06, "loss": 31.4778, "step": 14946 }, { "epoch": 355.88358208955225, "grad_norm": 16.346233367919922, "learning_rate": 9.56415343915344e-06, "loss": 32.8923, "step": 14947 }, { "epoch": 355.90746268656716, "grad_norm": 29.13170623779297, "learning_rate": 9.563492063492064e-06, "loss": 33.6557, "step": 14948 }, { "epoch": 355.93134328358207, "grad_norm": 19.528850555419922, "learning_rate": 9.562830687830689e-06, "loss": 33.0077, "step": 14949 }, { "epoch": 355.95522388059703, "grad_norm": 28.07571792602539, "learning_rate": 9.562169312169312e-06, "loss": 31.9175, "step": 14950 }, { "epoch": 355.97910447761194, "grad_norm": 21.989622116088867, "learning_rate": 9.561507936507938e-06, "loss": 33.2373, "step": 14951 }, { "epoch": 356.0, "grad_norm": 21.870967864990234, "learning_rate": 9.560846560846561e-06, "loss": 28.9643, "step": 14952 }, { "epoch": 356.0238805970149, "grad_norm": 20.74629020690918, "learning_rate": 9.560185185185186e-06, "loss": 31.936, "step": 14953 }, { "epoch": 356.0477611940299, "grad_norm": 20.529767990112305, "learning_rate": 9.559523809523811e-06, "loss": 32.4032, "step": 14954 }, { "epoch": 356.0716417910448, "grad_norm": 18.911712646484375, "learning_rate": 9.558862433862434e-06, "loss": 33.1696, "step": 14955 }, { "epoch": 356.0955223880597, "grad_norm": 22.036422729492188, "learning_rate": 9.558201058201059e-06, "loss": 32.2987, "step": 14956 }, { "epoch": 356.1194029850746, "grad_norm": 20.848241806030273, "learning_rate": 9.557539682539684e-06, "loss": 33.6154, "step": 14957 }, { "epoch": 356.14328358208957, "grad_norm": 18.03307342529297, "learning_rate": 9.556878306878309e-06, "loss": 32.6677, "step": 14958 }, { "epoch": 356.1671641791045, "grad_norm": 24.9796142578125, "learning_rate": 9.556216931216932e-06, "loss": 32.6979, "step": 14959 }, { "epoch": 356.1910447761194, "grad_norm": 18.944683074951172, "learning_rate": 9.555555555555556e-06, "loss": 32.6752, "step": 14960 }, { "epoch": 356.21492537313435, "grad_norm": 17.547496795654297, "learning_rate": 9.554894179894181e-06, "loss": 32.0206, "step": 14961 }, { "epoch": 356.23880597014926, "grad_norm": 24.731046676635742, "learning_rate": 9.554232804232804e-06, "loss": 32.2894, "step": 14962 }, { "epoch": 356.26268656716417, "grad_norm": 18.288780212402344, "learning_rate": 9.55357142857143e-06, "loss": 31.5285, "step": 14963 }, { "epoch": 356.28656716417913, "grad_norm": 20.468063354492188, "learning_rate": 9.552910052910054e-06, "loss": 31.8537, "step": 14964 }, { "epoch": 356.31044776119404, "grad_norm": 19.860734939575195, "learning_rate": 9.552248677248677e-06, "loss": 32.7907, "step": 14965 }, { "epoch": 356.33432835820895, "grad_norm": 16.64264678955078, "learning_rate": 9.551587301587304e-06, "loss": 30.7387, "step": 14966 }, { "epoch": 356.35820895522386, "grad_norm": 22.245464324951172, "learning_rate": 9.550925925925927e-06, "loss": 32.9044, "step": 14967 }, { "epoch": 356.3820895522388, "grad_norm": 16.65418815612793, "learning_rate": 9.550264550264551e-06, "loss": 31.781, "step": 14968 }, { "epoch": 356.40597014925373, "grad_norm": 24.147584915161133, "learning_rate": 9.549603174603176e-06, "loss": 32.6479, "step": 14969 }, { "epoch": 356.42985074626864, "grad_norm": 17.681825637817383, "learning_rate": 9.5489417989418e-06, "loss": 31.2675, "step": 14970 }, { "epoch": 356.4537313432836, "grad_norm": 23.947404861450195, "learning_rate": 9.548280423280424e-06, "loss": 31.1554, "step": 14971 }, { "epoch": 356.4776119402985, "grad_norm": 19.93479347229004, "learning_rate": 9.547619047619049e-06, "loss": 32.2155, "step": 14972 }, { "epoch": 356.5014925373134, "grad_norm": 21.344940185546875, "learning_rate": 9.546957671957672e-06, "loss": 31.7149, "step": 14973 }, { "epoch": 356.52537313432833, "grad_norm": 21.07832908630371, "learning_rate": 9.546296296296297e-06, "loss": 32.7319, "step": 14974 }, { "epoch": 356.5492537313433, "grad_norm": 21.95323371887207, "learning_rate": 9.545634920634922e-06, "loss": 32.2067, "step": 14975 }, { "epoch": 356.5731343283582, "grad_norm": 20.22541046142578, "learning_rate": 9.544973544973546e-06, "loss": 32.8922, "step": 14976 }, { "epoch": 356.5970149253731, "grad_norm": 22.408796310424805, "learning_rate": 9.54431216931217e-06, "loss": 33.4193, "step": 14977 }, { "epoch": 356.6208955223881, "grad_norm": 19.652568817138672, "learning_rate": 9.543650793650794e-06, "loss": 32.5599, "step": 14978 }, { "epoch": 356.644776119403, "grad_norm": 18.608518600463867, "learning_rate": 9.54298941798942e-06, "loss": 31.5812, "step": 14979 }, { "epoch": 356.6686567164179, "grad_norm": 20.28130340576172, "learning_rate": 9.542328042328042e-06, "loss": 33.1113, "step": 14980 }, { "epoch": 356.6925373134328, "grad_norm": 19.574302673339844, "learning_rate": 9.541666666666669e-06, "loss": 33.2153, "step": 14981 }, { "epoch": 356.7164179104478, "grad_norm": 18.869596481323242, "learning_rate": 9.541005291005292e-06, "loss": 32.5085, "step": 14982 }, { "epoch": 356.7402985074627, "grad_norm": 16.84361457824707, "learning_rate": 9.540343915343917e-06, "loss": 31.5301, "step": 14983 }, { "epoch": 356.7641791044776, "grad_norm": 22.837491989135742, "learning_rate": 9.539682539682541e-06, "loss": 33.0972, "step": 14984 }, { "epoch": 356.78805970149256, "grad_norm": 17.742788314819336, "learning_rate": 9.539021164021165e-06, "loss": 30.3043, "step": 14985 }, { "epoch": 356.81194029850747, "grad_norm": 21.453231811523438, "learning_rate": 9.53835978835979e-06, "loss": 32.3847, "step": 14986 }, { "epoch": 356.8358208955224, "grad_norm": 17.288801193237305, "learning_rate": 9.537698412698414e-06, "loss": 32.9337, "step": 14987 }, { "epoch": 356.85970149253734, "grad_norm": 23.859575271606445, "learning_rate": 9.537037037037037e-06, "loss": 32.251, "step": 14988 }, { "epoch": 356.88358208955225, "grad_norm": 19.979860305786133, "learning_rate": 9.536375661375662e-06, "loss": 32.9284, "step": 14989 }, { "epoch": 356.90746268656716, "grad_norm": 22.138931274414062, "learning_rate": 9.535714285714287e-06, "loss": 33.8049, "step": 14990 }, { "epoch": 356.93134328358207, "grad_norm": 20.50640296936035, "learning_rate": 9.535052910052912e-06, "loss": 32.672, "step": 14991 }, { "epoch": 356.95522388059703, "grad_norm": 16.06356430053711, "learning_rate": 9.534391534391535e-06, "loss": 32.0177, "step": 14992 }, { "epoch": 356.97910447761194, "grad_norm": 21.270681381225586, "learning_rate": 9.53373015873016e-06, "loss": 32.0279, "step": 14993 }, { "epoch": 357.0, "grad_norm": 16.916532516479492, "learning_rate": 9.533068783068784e-06, "loss": 28.727, "step": 14994 }, { "epoch": 357.0238805970149, "grad_norm": 18.77668571472168, "learning_rate": 9.532407407407407e-06, "loss": 32.0947, "step": 14995 }, { "epoch": 357.0477611940299, "grad_norm": 19.10407257080078, "learning_rate": 9.531746031746032e-06, "loss": 33.2131, "step": 14996 }, { "epoch": 357.0716417910448, "grad_norm": 16.22281837463379, "learning_rate": 9.531084656084657e-06, "loss": 32.4244, "step": 14997 }, { "epoch": 357.0955223880597, "grad_norm": 20.847332000732422, "learning_rate": 9.530423280423282e-06, "loss": 32.713, "step": 14998 }, { "epoch": 357.1194029850746, "grad_norm": 17.989212036132812, "learning_rate": 9.529761904761905e-06, "loss": 33.399, "step": 14999 }, { "epoch": 357.14328358208957, "grad_norm": 24.799148559570312, "learning_rate": 9.52910052910053e-06, "loss": 32.0786, "step": 15000 }, { "epoch": 357.1671641791045, "grad_norm": 21.575510025024414, "learning_rate": 9.528439153439155e-06, "loss": 32.4382, "step": 15001 }, { "epoch": 357.1910447761194, "grad_norm": 16.28173828125, "learning_rate": 9.527777777777778e-06, "loss": 31.9868, "step": 15002 }, { "epoch": 357.21492537313435, "grad_norm": 28.259313583374023, "learning_rate": 9.527116402116402e-06, "loss": 32.7567, "step": 15003 }, { "epoch": 357.23880597014926, "grad_norm": 18.24700927734375, "learning_rate": 9.526455026455027e-06, "loss": 31.7657, "step": 15004 }, { "epoch": 357.26268656716417, "grad_norm": 18.468481063842773, "learning_rate": 9.52579365079365e-06, "loss": 33.6397, "step": 15005 }, { "epoch": 357.28656716417913, "grad_norm": 25.735326766967773, "learning_rate": 9.525132275132277e-06, "loss": 32.1366, "step": 15006 }, { "epoch": 357.31044776119404, "grad_norm": 18.402223587036133, "learning_rate": 9.5244708994709e-06, "loss": 32.029, "step": 15007 }, { "epoch": 357.33432835820895, "grad_norm": 17.230976104736328, "learning_rate": 9.523809523809525e-06, "loss": 32.1935, "step": 15008 }, { "epoch": 357.35820895522386, "grad_norm": 23.654565811157227, "learning_rate": 9.52314814814815e-06, "loss": 31.0136, "step": 15009 }, { "epoch": 357.3820895522388, "grad_norm": 22.325735092163086, "learning_rate": 9.522486772486773e-06, "loss": 31.2313, "step": 15010 }, { "epoch": 357.40597014925373, "grad_norm": 14.84327220916748, "learning_rate": 9.521825396825397e-06, "loss": 32.0371, "step": 15011 }, { "epoch": 357.42985074626864, "grad_norm": 26.068971633911133, "learning_rate": 9.521164021164022e-06, "loss": 32.2656, "step": 15012 }, { "epoch": 357.4537313432836, "grad_norm": 22.828689575195312, "learning_rate": 9.520502645502645e-06, "loss": 32.0258, "step": 15013 }, { "epoch": 357.4776119402985, "grad_norm": 21.023258209228516, "learning_rate": 9.51984126984127e-06, "loss": 32.6881, "step": 15014 }, { "epoch": 357.5014925373134, "grad_norm": 25.693632125854492, "learning_rate": 9.519179894179895e-06, "loss": 32.1838, "step": 15015 }, { "epoch": 357.52537313432833, "grad_norm": 22.227737426757812, "learning_rate": 9.51851851851852e-06, "loss": 33.7588, "step": 15016 }, { "epoch": 357.5492537313433, "grad_norm": 23.099042892456055, "learning_rate": 9.517857142857143e-06, "loss": 32.1085, "step": 15017 }, { "epoch": 357.5731343283582, "grad_norm": 25.152877807617188, "learning_rate": 9.517195767195768e-06, "loss": 31.9446, "step": 15018 }, { "epoch": 357.5970149253731, "grad_norm": 18.60989761352539, "learning_rate": 9.516534391534392e-06, "loss": 33.194, "step": 15019 }, { "epoch": 357.6208955223881, "grad_norm": 21.53455924987793, "learning_rate": 9.515873015873016e-06, "loss": 31.9685, "step": 15020 }, { "epoch": 357.644776119403, "grad_norm": 23.571380615234375, "learning_rate": 9.515211640211642e-06, "loss": 32.6799, "step": 15021 }, { "epoch": 357.6686567164179, "grad_norm": 17.009485244750977, "learning_rate": 9.514550264550265e-06, "loss": 31.5809, "step": 15022 }, { "epoch": 357.6925373134328, "grad_norm": 20.138269424438477, "learning_rate": 9.51388888888889e-06, "loss": 32.7123, "step": 15023 }, { "epoch": 357.7164179104478, "grad_norm": 24.550018310546875, "learning_rate": 9.513227513227515e-06, "loss": 32.1209, "step": 15024 }, { "epoch": 357.7402985074627, "grad_norm": 15.26170825958252, "learning_rate": 9.512566137566138e-06, "loss": 31.6934, "step": 15025 }, { "epoch": 357.7641791044776, "grad_norm": 19.679811477661133, "learning_rate": 9.511904761904763e-06, "loss": 32.5993, "step": 15026 }, { "epoch": 357.78805970149256, "grad_norm": 24.347787857055664, "learning_rate": 9.511243386243387e-06, "loss": 32.2636, "step": 15027 }, { "epoch": 357.81194029850747, "grad_norm": 16.888120651245117, "learning_rate": 9.51058201058201e-06, "loss": 31.4244, "step": 15028 }, { "epoch": 357.8358208955224, "grad_norm": 17.593503952026367, "learning_rate": 9.509920634920635e-06, "loss": 32.2956, "step": 15029 }, { "epoch": 357.85970149253734, "grad_norm": 20.981962203979492, "learning_rate": 9.50925925925926e-06, "loss": 32.8367, "step": 15030 }, { "epoch": 357.88358208955225, "grad_norm": 19.466169357299805, "learning_rate": 9.508597883597885e-06, "loss": 32.9313, "step": 15031 }, { "epoch": 357.90746268656716, "grad_norm": NaN, "learning_rate": 9.507936507936508e-06, "loss": 44.3242, "step": 15032 }, { "epoch": 357.93134328358207, "grad_norm": 13.813207626342773, "learning_rate": 9.507936507936508e-06, "loss": 32.4276, "step": 15033 }, { "epoch": 357.95522388059703, "grad_norm": 24.915910720825195, "learning_rate": 9.507275132275133e-06, "loss": 32.183, "step": 15034 }, { "epoch": 357.97910447761194, "grad_norm": 17.859813690185547, "learning_rate": 9.506613756613758e-06, "loss": 32.1332, "step": 15035 }, { "epoch": 358.0, "grad_norm": 15.236315727233887, "learning_rate": 9.50595238095238e-06, "loss": 28.0183, "step": 15036 }, { "epoch": 358.0238805970149, "grad_norm": 24.482973098754883, "learning_rate": 9.505291005291006e-06, "loss": 31.2572, "step": 15037 }, { "epoch": 358.0477611940299, "grad_norm": 18.803373336791992, "learning_rate": 9.50462962962963e-06, "loss": 31.8213, "step": 15038 }, { "epoch": 358.0716417910448, "grad_norm": 18.289478302001953, "learning_rate": 9.503968253968255e-06, "loss": 31.7839, "step": 15039 }, { "epoch": 358.0955223880597, "grad_norm": 23.65920639038086, "learning_rate": 9.50330687830688e-06, "loss": 30.9145, "step": 15040 }, { "epoch": 358.1194029850746, "grad_norm": 18.217016220092773, "learning_rate": 9.502645502645503e-06, "loss": 31.6862, "step": 15041 }, { "epoch": 358.14328358208957, "grad_norm": 18.234375, "learning_rate": 9.501984126984128e-06, "loss": 31.6968, "step": 15042 }, { "epoch": 358.1671641791045, "grad_norm": 20.62415885925293, "learning_rate": 9.501322751322753e-06, "loss": 30.8351, "step": 15043 }, { "epoch": 358.1910447761194, "grad_norm": 15.774876594543457, "learning_rate": 9.500661375661376e-06, "loss": 31.4542, "step": 15044 }, { "epoch": 358.21492537313435, "grad_norm": 17.491928100585938, "learning_rate": 9.5e-06, "loss": 31.8844, "step": 15045 }, { "epoch": 358.23880597014926, "grad_norm": 13.995555877685547, "learning_rate": 9.499338624338625e-06, "loss": 33.1773, "step": 15046 }, { "epoch": 358.26268656716417, "grad_norm": 21.541467666625977, "learning_rate": 9.49867724867725e-06, "loss": 32.439, "step": 15047 }, { "epoch": 358.28656716417913, "grad_norm": 18.12356185913086, "learning_rate": 9.498015873015873e-06, "loss": 32.5128, "step": 15048 }, { "epoch": 358.31044776119404, "grad_norm": 19.17829132080078, "learning_rate": 9.497354497354498e-06, "loss": 32.4194, "step": 15049 }, { "epoch": 358.33432835820895, "grad_norm": 18.12895393371582, "learning_rate": 9.496693121693123e-06, "loss": 33.0875, "step": 15050 }, { "epoch": 358.35820895522386, "grad_norm": 17.93811798095703, "learning_rate": 9.496031746031746e-06, "loss": 33.0323, "step": 15051 }, { "epoch": 358.3820895522388, "grad_norm": 22.62415885925293, "learning_rate": 9.49537037037037e-06, "loss": 32.2606, "step": 15052 }, { "epoch": 358.40597014925373, "grad_norm": 20.261709213256836, "learning_rate": 9.494708994708996e-06, "loss": 30.8552, "step": 15053 }, { "epoch": 358.42985074626864, "grad_norm": 21.426820755004883, "learning_rate": 9.494047619047619e-06, "loss": 32.7622, "step": 15054 }, { "epoch": 358.4537313432836, "grad_norm": 19.726211547851562, "learning_rate": 9.493386243386245e-06, "loss": 32.8708, "step": 15055 }, { "epoch": 358.4776119402985, "grad_norm": 21.264524459838867, "learning_rate": 9.492724867724868e-06, "loss": 31.4836, "step": 15056 }, { "epoch": 358.5014925373134, "grad_norm": 21.882110595703125, "learning_rate": 9.492063492063493e-06, "loss": 32.7838, "step": 15057 }, { "epoch": 358.52537313432833, "grad_norm": 17.10589027404785, "learning_rate": 9.491402116402118e-06, "loss": 32.6003, "step": 15058 }, { "epoch": 358.5492537313433, "grad_norm": 20.396278381347656, "learning_rate": 9.490740740740741e-06, "loss": 32.3933, "step": 15059 }, { "epoch": 358.5731343283582, "grad_norm": 19.05466651916504, "learning_rate": 9.490079365079366e-06, "loss": 31.852, "step": 15060 }, { "epoch": 358.5970149253731, "grad_norm": 16.59945297241211, "learning_rate": 9.48941798941799e-06, "loss": 31.9461, "step": 15061 }, { "epoch": 358.6208955223881, "grad_norm": 19.087873458862305, "learning_rate": 9.488756613756615e-06, "loss": 31.1564, "step": 15062 }, { "epoch": 358.644776119403, "grad_norm": 19.61060905456543, "learning_rate": 9.488095238095238e-06, "loss": 32.3501, "step": 15063 }, { "epoch": 358.6686567164179, "grad_norm": 16.20462417602539, "learning_rate": 9.487433862433863e-06, "loss": 31.9814, "step": 15064 }, { "epoch": 358.6925373134328, "grad_norm": 15.829198837280273, "learning_rate": 9.486772486772488e-06, "loss": 33.2889, "step": 15065 }, { "epoch": 358.7164179104478, "grad_norm": 17.540855407714844, "learning_rate": 9.486111111111111e-06, "loss": 33.2141, "step": 15066 }, { "epoch": 358.7402985074627, "grad_norm": 15.826498031616211, "learning_rate": 9.485449735449736e-06, "loss": 33.7902, "step": 15067 }, { "epoch": 358.7641791044776, "grad_norm": 18.389429092407227, "learning_rate": 9.48478835978836e-06, "loss": 32.2526, "step": 15068 }, { "epoch": 358.78805970149256, "grad_norm": 15.800008773803711, "learning_rate": 9.484126984126984e-06, "loss": 32.2332, "step": 15069 }, { "epoch": 358.81194029850747, "grad_norm": 18.420787811279297, "learning_rate": 9.48346560846561e-06, "loss": 32.9463, "step": 15070 }, { "epoch": 358.8358208955224, "grad_norm": 20.59718132019043, "learning_rate": 9.482804232804233e-06, "loss": 33.6183, "step": 15071 }, { "epoch": 358.85970149253734, "grad_norm": 16.356536865234375, "learning_rate": 9.482142857142858e-06, "loss": 32.2114, "step": 15072 }, { "epoch": 358.88358208955225, "grad_norm": 14.856832504272461, "learning_rate": 9.481481481481483e-06, "loss": 32.3674, "step": 15073 }, { "epoch": 358.90746268656716, "grad_norm": 18.862398147583008, "learning_rate": 9.480820105820106e-06, "loss": 32.3993, "step": 15074 }, { "epoch": 358.93134328358207, "grad_norm": 19.472637176513672, "learning_rate": 9.480158730158731e-06, "loss": 32.7058, "step": 15075 }, { "epoch": 358.95522388059703, "grad_norm": 17.979694366455078, "learning_rate": 9.479497354497356e-06, "loss": 32.1515, "step": 15076 }, { "epoch": 358.97910447761194, "grad_norm": 15.228195190429688, "learning_rate": 9.478835978835979e-06, "loss": 32.6082, "step": 15077 }, { "epoch": 359.0, "grad_norm": 16.04807472229004, "learning_rate": 9.478174603174604e-06, "loss": 29.2817, "step": 15078 }, { "epoch": 359.0238805970149, "grad_norm": 17.568143844604492, "learning_rate": 9.477513227513228e-06, "loss": 32.422, "step": 15079 }, { "epoch": 359.0477611940299, "grad_norm": 18.491682052612305, "learning_rate": 9.476851851851853e-06, "loss": 32.4046, "step": 15080 }, { "epoch": 359.0716417910448, "grad_norm": 16.725357055664062, "learning_rate": 9.476190476190476e-06, "loss": 31.3566, "step": 15081 }, { "epoch": 359.0955223880597, "grad_norm": 14.86788558959961, "learning_rate": 9.475529100529101e-06, "loss": 31.243, "step": 15082 }, { "epoch": 359.1194029850746, "grad_norm": 18.561227798461914, "learning_rate": 9.474867724867726e-06, "loss": 31.8954, "step": 15083 }, { "epoch": 359.14328358208957, "grad_norm": 18.431364059448242, "learning_rate": 9.474206349206349e-06, "loss": 32.5703, "step": 15084 }, { "epoch": 359.1671641791045, "grad_norm": 19.41144371032715, "learning_rate": 9.473544973544975e-06, "loss": 32.4983, "step": 15085 }, { "epoch": 359.1910447761194, "grad_norm": 17.109878540039062, "learning_rate": 9.472883597883599e-06, "loss": 31.6643, "step": 15086 }, { "epoch": 359.21492537313435, "grad_norm": 16.63594627380371, "learning_rate": 9.472222222222223e-06, "loss": 32.945, "step": 15087 }, { "epoch": 359.23880597014926, "grad_norm": 15.511810302734375, "learning_rate": 9.471560846560848e-06, "loss": 32.0218, "step": 15088 }, { "epoch": 359.26268656716417, "grad_norm": 15.681384086608887, "learning_rate": 9.470899470899471e-06, "loss": 32.5628, "step": 15089 }, { "epoch": 359.28656716417913, "grad_norm": 15.733247756958008, "learning_rate": 9.470238095238096e-06, "loss": 32.2761, "step": 15090 }, { "epoch": 359.31044776119404, "grad_norm": 19.590049743652344, "learning_rate": 9.469576719576721e-06, "loss": 31.4616, "step": 15091 }, { "epoch": 359.33432835820895, "grad_norm": 20.36347198486328, "learning_rate": 9.468915343915344e-06, "loss": 32.6567, "step": 15092 }, { "epoch": 359.35820895522386, "grad_norm": 16.04654884338379, "learning_rate": 9.468253968253969e-06, "loss": 32.9795, "step": 15093 }, { "epoch": 359.3820895522388, "grad_norm": 15.172518730163574, "learning_rate": 9.467592592592594e-06, "loss": 31.9688, "step": 15094 }, { "epoch": 359.40597014925373, "grad_norm": 14.613859176635742, "learning_rate": 9.466931216931218e-06, "loss": 32.2976, "step": 15095 }, { "epoch": 359.42985074626864, "grad_norm": 20.026838302612305, "learning_rate": 9.466269841269841e-06, "loss": 32.5515, "step": 15096 }, { "epoch": 359.4537313432836, "grad_norm": 20.542890548706055, "learning_rate": 9.465608465608466e-06, "loss": 32.0271, "step": 15097 }, { "epoch": 359.4776119402985, "grad_norm": 16.293359756469727, "learning_rate": 9.464947089947091e-06, "loss": 32.4827, "step": 15098 }, { "epoch": 359.5014925373134, "grad_norm": 18.699499130249023, "learning_rate": 9.464285714285714e-06, "loss": 32.3706, "step": 15099 }, { "epoch": 359.52537313432833, "grad_norm": 18.629541397094727, "learning_rate": 9.463624338624339e-06, "loss": 31.9718, "step": 15100 }, { "epoch": 359.5492537313433, "grad_norm": 16.9014949798584, "learning_rate": 9.462962962962964e-06, "loss": 32.1638, "step": 15101 }, { "epoch": 359.5731343283582, "grad_norm": NaN, "learning_rate": 9.462301587301589e-06, "loss": 51.2537, "step": 15102 }, { "epoch": 359.5970149253731, "grad_norm": 20.027040481567383, "learning_rate": 9.462301587301589e-06, "loss": 32.284, "step": 15103 }, { "epoch": 359.6208955223881, "grad_norm": 20.181591033935547, "learning_rate": 9.461640211640213e-06, "loss": 31.7524, "step": 15104 }, { "epoch": 359.644776119403, "grad_norm": 16.75054359436035, "learning_rate": 9.460978835978836e-06, "loss": 31.7709, "step": 15105 }, { "epoch": 359.6686567164179, "grad_norm": 21.57439613342285, "learning_rate": 9.460317460317461e-06, "loss": 33.2595, "step": 15106 }, { "epoch": 359.6925373134328, "grad_norm": 15.870322227478027, "learning_rate": 9.459656084656086e-06, "loss": 32.8171, "step": 15107 }, { "epoch": 359.7164179104478, "grad_norm": 18.507720947265625, "learning_rate": 9.45899470899471e-06, "loss": 31.6635, "step": 15108 }, { "epoch": 359.7402985074627, "grad_norm": 19.092636108398438, "learning_rate": 9.458333333333334e-06, "loss": 32.6238, "step": 15109 }, { "epoch": 359.7641791044776, "grad_norm": 16.813720703125, "learning_rate": 9.457671957671959e-06, "loss": 33.4931, "step": 15110 }, { "epoch": 359.78805970149256, "grad_norm": 19.020538330078125, "learning_rate": 9.457010582010584e-06, "loss": 32.5465, "step": 15111 }, { "epoch": 359.81194029850747, "grad_norm": 17.173288345336914, "learning_rate": 9.456349206349207e-06, "loss": 30.842, "step": 15112 }, { "epoch": 359.8358208955224, "grad_norm": 16.530750274658203, "learning_rate": 9.455687830687831e-06, "loss": 32.7027, "step": 15113 }, { "epoch": 359.85970149253734, "grad_norm": 16.611957550048828, "learning_rate": 9.455026455026456e-06, "loss": 32.7209, "step": 15114 }, { "epoch": 359.88358208955225, "grad_norm": 20.9792423248291, "learning_rate": 9.45436507936508e-06, "loss": 32.247, "step": 15115 }, { "epoch": 359.90746268656716, "grad_norm": 17.569683074951172, "learning_rate": 9.453703703703704e-06, "loss": 32.5358, "step": 15116 }, { "epoch": 359.93134328358207, "grad_norm": 16.445842742919922, "learning_rate": 9.453042328042329e-06, "loss": 33.1839, "step": 15117 }, { "epoch": 359.95522388059703, "grad_norm": 17.492259979248047, "learning_rate": 9.452380952380952e-06, "loss": 31.6665, "step": 15118 }, { "epoch": 359.97910447761194, "grad_norm": 17.20362091064453, "learning_rate": 9.451719576719579e-06, "loss": 32.2951, "step": 15119 }, { "epoch": 360.0, "grad_norm": 15.607762336730957, "learning_rate": 9.451058201058202e-06, "loss": 27.6531, "step": 15120 }, { "epoch": 360.0, "step": 15120, "total_flos": 7.433098170764248e+17, "train_loss": 1.8099347542202662, "train_runtime": 25633.3904, "train_samples_per_second": 75.164, "train_steps_per_second": 0.59 }, { "epoch": 360.0238805970149, "grad_norm": 17.132539749145508, "learning_rate": 1e-05, "loss": 32.4364, "step": 15121 }, { "epoch": 360.0477611940299, "grad_norm": Infinity, "learning_rate": 9.99937343358396e-06, "loss": 36.1293, "step": 15122 }, { "epoch": 360.0716417910448, "grad_norm": 256.2436218261719, "learning_rate": 9.99937343358396e-06, "loss": 37.1061, "step": 15123 }, { "epoch": 360.0955223880597, "grad_norm": 126.70600128173828, "learning_rate": 9.99874686716792e-06, "loss": 35.8634, "step": 15124 }, { "epoch": 360.1194029850746, "grad_norm": 70.51016235351562, "learning_rate": 9.99812030075188e-06, "loss": 34.1869, "step": 15125 }, { "epoch": 360.14328358208957, "grad_norm": 51.900020599365234, "learning_rate": 9.99749373433584e-06, "loss": 33.8237, "step": 15126 }, { "epoch": 360.1671641791045, "grad_norm": 61.7481689453125, "learning_rate": 9.9968671679198e-06, "loss": 34.1598, "step": 15127 }, { "epoch": 360.1910447761194, "grad_norm": 47.25251007080078, "learning_rate": 9.996240601503761e-06, "loss": 32.6713, "step": 15128 }, { "epoch": 360.21492537313435, "grad_norm": 39.941184997558594, "learning_rate": 9.995614035087721e-06, "loss": 32.4269, "step": 15129 }, { "epoch": 360.23880597014926, "grad_norm": 28.25043296813965, "learning_rate": 9.99498746867168e-06, "loss": 34.0428, "step": 15130 }, { "epoch": 360.26268656716417, "grad_norm": 32.02793502807617, "learning_rate": 9.994360902255639e-06, "loss": 32.6393, "step": 15131 }, { "epoch": 360.28656716417913, "grad_norm": 30.364452362060547, "learning_rate": 9.9937343358396e-06, "loss": 32.22, "step": 15132 }, { "epoch": 360.31044776119404, "grad_norm": 24.977066040039062, "learning_rate": 9.99310776942356e-06, "loss": 32.555, "step": 15133 }, { "epoch": 360.33432835820895, "grad_norm": 20.768693923950195, "learning_rate": 9.992481203007518e-06, "loss": 31.885, "step": 15134 }, { "epoch": 360.35820895522386, "grad_norm": 21.985082626342773, "learning_rate": 9.99185463659148e-06, "loss": 32.3355, "step": 15135 }, { "epoch": 360.3820895522388, "grad_norm": 21.16714859008789, "learning_rate": 9.99122807017544e-06, "loss": 32.2855, "step": 15136 }, { "epoch": 360.40597014925373, "grad_norm": 25.63187026977539, "learning_rate": 9.9906015037594e-06, "loss": 31.8372, "step": 15137 }, { "epoch": 360.42985074626864, "grad_norm": 22.163450241088867, "learning_rate": 9.98997493734336e-06, "loss": 31.9503, "step": 15138 }, { "epoch": 360.4537313432836, "grad_norm": 22.825502395629883, "learning_rate": 9.989348370927319e-06, "loss": 31.8992, "step": 15139 }, { "epoch": 360.4776119402985, "grad_norm": 22.60196304321289, "learning_rate": 9.988721804511279e-06, "loss": 31.5811, "step": 15140 }, { "epoch": 360.5014925373134, "grad_norm": 21.542665481567383, "learning_rate": 9.988095238095239e-06, "loss": 32.0599, "step": 15141 }, { "epoch": 360.52537313432833, "grad_norm": 16.821033477783203, "learning_rate": 9.987468671679199e-06, "loss": 31.8262, "step": 15142 }, { "epoch": 360.5492537313433, "grad_norm": 17.12664222717285, "learning_rate": 9.98684210526316e-06, "loss": 32.5633, "step": 15143 }, { "epoch": 360.5731343283582, "grad_norm": 19.482683181762695, "learning_rate": 9.986215538847118e-06, "loss": 31.6738, "step": 15144 }, { "epoch": 360.5970149253731, "grad_norm": 21.525285720825195, "learning_rate": 9.985588972431078e-06, "loss": 32.8867, "step": 15145 }, { "epoch": 360.6208955223881, "grad_norm": 20.889223098754883, "learning_rate": 9.984962406015038e-06, "loss": 31.0746, "step": 15146 }, { "epoch": 360.644776119403, "grad_norm": 16.877544403076172, "learning_rate": 9.984335839599e-06, "loss": 31.6427, "step": 15147 }, { "epoch": 360.6686567164179, "grad_norm": 16.711463928222656, "learning_rate": 9.983709273182957e-06, "loss": 32.912, "step": 15148 }, { "epoch": 360.6925373134328, "grad_norm": 20.331371307373047, "learning_rate": 9.983082706766917e-06, "loss": 32.2909, "step": 15149 }, { "epoch": 360.7164179104478, "grad_norm": 24.232513427734375, "learning_rate": 9.982456140350879e-06, "loss": 33.2445, "step": 15150 }, { "epoch": 360.7402985074627, "grad_norm": 22.532785415649414, "learning_rate": 9.981829573934838e-06, "loss": 31.4148, "step": 15151 }, { "epoch": 360.7641791044776, "grad_norm": 16.70780372619629, "learning_rate": 9.981203007518798e-06, "loss": 32.9421, "step": 15152 }, { "epoch": 360.78805970149256, "grad_norm": 20.62165069580078, "learning_rate": 9.980576441102758e-06, "loss": 32.7361, "step": 15153 }, { "epoch": 360.81194029850747, "grad_norm": 22.46186637878418, "learning_rate": 9.979949874686718e-06, "loss": 31.7574, "step": 15154 }, { "epoch": 360.8358208955224, "grad_norm": 22.450260162353516, "learning_rate": 9.979323308270678e-06, "loss": 33.2606, "step": 15155 }, { "epoch": 360.85970149253734, "grad_norm": 15.548917770385742, "learning_rate": 9.978696741854637e-06, "loss": 32.1242, "step": 15156 }, { "epoch": 360.88358208955225, "grad_norm": 16.999727249145508, "learning_rate": 9.978070175438597e-06, "loss": 32.9468, "step": 15157 }, { "epoch": 360.90746268656716, "grad_norm": 16.765602111816406, "learning_rate": 9.977443609022557e-06, "loss": 32.8646, "step": 15158 }, { "epoch": 360.93134328358207, "grad_norm": 16.898029327392578, "learning_rate": 9.976817042606517e-06, "loss": 33.7647, "step": 15159 }, { "epoch": 360.95522388059703, "grad_norm": 15.133593559265137, "learning_rate": 9.976190476190477e-06, "loss": 32.4601, "step": 15160 }, { "epoch": 360.97910447761194, "grad_norm": 18.777118682861328, "learning_rate": 9.975563909774436e-06, "loss": 32.0783, "step": 15161 }, { "epoch": 361.0, "grad_norm": 17.703529357910156, "learning_rate": 9.974937343358396e-06, "loss": 27.7518, "step": 15162 }, { "epoch": 361.0238805970149, "grad_norm": 17.120885848999023, "learning_rate": 9.974310776942356e-06, "loss": 32.3899, "step": 15163 }, { "epoch": 361.0477611940299, "grad_norm": 18.410572052001953, "learning_rate": 9.973684210526316e-06, "loss": 32.0294, "step": 15164 }, { "epoch": 361.0716417910448, "grad_norm": 15.773614883422852, "learning_rate": 9.973057644110277e-06, "loss": 31.6015, "step": 15165 }, { "epoch": 361.0955223880597, "grad_norm": 25.361528396606445, "learning_rate": 9.972431077694237e-06, "loss": 32.1149, "step": 15166 }, { "epoch": 361.1194029850746, "grad_norm": 21.404897689819336, "learning_rate": 9.971804511278195e-06, "loss": 32.3963, "step": 15167 }, { "epoch": 361.14328358208957, "grad_norm": 15.300490379333496, "learning_rate": 9.971177944862157e-06, "loss": 31.4946, "step": 15168 }, { "epoch": 361.1671641791045, "grad_norm": 23.240764617919922, "learning_rate": 9.970551378446116e-06, "loss": 33.2305, "step": 15169 }, { "epoch": 361.1910447761194, "grad_norm": 23.104097366333008, "learning_rate": 9.969924812030076e-06, "loss": 31.5535, "step": 15170 }, { "epoch": 361.21492537313435, "grad_norm": 18.92438507080078, "learning_rate": 9.969298245614036e-06, "loss": 30.3473, "step": 15171 }, { "epoch": 361.23880597014926, "grad_norm": 17.293514251708984, "learning_rate": 9.968671679197996e-06, "loss": 32.2234, "step": 15172 }, { "epoch": 361.26268656716417, "grad_norm": 18.993066787719727, "learning_rate": 9.968045112781956e-06, "loss": 32.7033, "step": 15173 }, { "epoch": 361.28656716417913, "grad_norm": 19.64884376525879, "learning_rate": 9.967418546365915e-06, "loss": 32.5276, "step": 15174 }, { "epoch": 361.31044776119404, "grad_norm": 18.286048889160156, "learning_rate": 9.966791979949875e-06, "loss": 32.4066, "step": 15175 }, { "epoch": 361.33432835820895, "grad_norm": 15.611220359802246, "learning_rate": 9.966165413533837e-06, "loss": 32.4372, "step": 15176 }, { "epoch": 361.35820895522386, "grad_norm": 23.98234748840332, "learning_rate": 9.965538847117795e-06, "loss": 31.8022, "step": 15177 }, { "epoch": 361.3820895522388, "grad_norm": 20.376148223876953, "learning_rate": 9.964912280701755e-06, "loss": 32.0176, "step": 15178 }, { "epoch": 361.40597014925373, "grad_norm": 15.575143814086914, "learning_rate": 9.964285714285714e-06, "loss": 32.4731, "step": 15179 }, { "epoch": 361.42985074626864, "grad_norm": 24.95465087890625, "learning_rate": 9.963659147869676e-06, "loss": 33.1329, "step": 15180 }, { "epoch": 361.4537313432836, "grad_norm": 21.045879364013672, "learning_rate": 9.963032581453634e-06, "loss": 33.2694, "step": 15181 }, { "epoch": 361.4776119402985, "grad_norm": 14.200586318969727, "learning_rate": 9.962406015037594e-06, "loss": 32.3127, "step": 15182 }, { "epoch": 361.5014925373134, "grad_norm": 25.738948822021484, "learning_rate": 9.961779448621555e-06, "loss": 31.5539, "step": 15183 }, { "epoch": 361.52537313432833, "grad_norm": 21.601974487304688, "learning_rate": 9.961152882205515e-06, "loss": 30.7075, "step": 15184 }, { "epoch": 361.5492537313433, "grad_norm": 13.03670883178711, "learning_rate": 9.960526315789475e-06, "loss": 32.0896, "step": 15185 }, { "epoch": 361.5731343283582, "grad_norm": 23.764514923095703, "learning_rate": 9.959899749373435e-06, "loss": 32.5814, "step": 15186 }, { "epoch": 361.5970149253731, "grad_norm": 22.003652572631836, "learning_rate": 9.959273182957395e-06, "loss": 32.2197, "step": 15187 }, { "epoch": 361.6208955223881, "grad_norm": 17.986604690551758, "learning_rate": 9.958646616541354e-06, "loss": 32.1586, "step": 15188 }, { "epoch": 361.644776119403, "grad_norm": 19.210546493530273, "learning_rate": 9.958020050125314e-06, "loss": 32.8158, "step": 15189 }, { "epoch": 361.6686567164179, "grad_norm": 24.086362838745117, "learning_rate": 9.957393483709274e-06, "loss": 33.3453, "step": 15190 }, { "epoch": 361.6925373134328, "grad_norm": 19.50816535949707, "learning_rate": 9.956766917293234e-06, "loss": 31.7828, "step": 15191 }, { "epoch": 361.7164179104478, "grad_norm": 17.095325469970703, "learning_rate": 9.956140350877194e-06, "loss": 32.0506, "step": 15192 }, { "epoch": 361.7402985074627, "grad_norm": 22.07693862915039, "learning_rate": 9.955513784461153e-06, "loss": 31.8724, "step": 15193 }, { "epoch": 361.7641791044776, "grad_norm": 20.5903263092041, "learning_rate": 9.954887218045113e-06, "loss": 31.8127, "step": 15194 }, { "epoch": 361.78805970149256, "grad_norm": 16.421934127807617, "learning_rate": 9.954260651629073e-06, "loss": 31.5456, "step": 15195 }, { "epoch": 361.81194029850747, "grad_norm": 24.3125, "learning_rate": 9.953634085213033e-06, "loss": 31.9808, "step": 15196 }, { "epoch": 361.8358208955224, "grad_norm": 23.15384864807129, "learning_rate": 9.953007518796993e-06, "loss": 33.4299, "step": 15197 }, { "epoch": 361.85970149253734, "grad_norm": 18.186634063720703, "learning_rate": 9.952380952380954e-06, "loss": 34.1768, "step": 15198 }, { "epoch": 361.88358208955225, "grad_norm": 20.971479415893555, "learning_rate": 9.951754385964914e-06, "loss": 31.6073, "step": 15199 }, { "epoch": 361.90746268656716, "grad_norm": 21.818872451782227, "learning_rate": 9.951127819548872e-06, "loss": 32.7098, "step": 15200 }, { "epoch": 361.93134328358207, "grad_norm": 19.670656204223633, "learning_rate": 9.950501253132833e-06, "loss": 31.7838, "step": 15201 }, { "epoch": 361.95522388059703, "grad_norm": 17.483009338378906, "learning_rate": 9.949874686716793e-06, "loss": 32.1778, "step": 15202 }, { "epoch": 361.97910447761194, "grad_norm": 16.347700119018555, "learning_rate": 9.949248120300753e-06, "loss": 32.058, "step": 15203 }, { "epoch": 362.0, "grad_norm": 17.010881423950195, "learning_rate": 9.948621553884713e-06, "loss": 28.1426, "step": 15204 }, { "epoch": 362.0238805970149, "grad_norm": 19.743928909301758, "learning_rate": 9.947994987468673e-06, "loss": 32.6264, "step": 15205 }, { "epoch": 362.0477611940299, "grad_norm": 14.946175575256348, "learning_rate": 9.947368421052632e-06, "loss": 30.7231, "step": 15206 }, { "epoch": 362.0716417910448, "grad_norm": 17.401426315307617, "learning_rate": 9.946741854636592e-06, "loss": 32.1345, "step": 15207 }, { "epoch": 362.0955223880597, "grad_norm": 15.826093673706055, "learning_rate": 9.946115288220552e-06, "loss": 32.8926, "step": 15208 }, { "epoch": 362.1194029850746, "grad_norm": 16.590192794799805, "learning_rate": 9.945488721804512e-06, "loss": 31.0876, "step": 15209 }, { "epoch": 362.14328358208957, "grad_norm": 23.43697166442871, "learning_rate": 9.944862155388472e-06, "loss": 31.8363, "step": 15210 }, { "epoch": 362.1671641791045, "grad_norm": 18.69304084777832, "learning_rate": 9.944235588972431e-06, "loss": 32.4287, "step": 15211 }, { "epoch": 362.1910447761194, "grad_norm": 17.14446449279785, "learning_rate": 9.943609022556391e-06, "loss": 31.4971, "step": 15212 }, { "epoch": 362.21492537313435, "grad_norm": 16.17544937133789, "learning_rate": 9.942982456140353e-06, "loss": 33.4381, "step": 15213 }, { "epoch": 362.23880597014926, "grad_norm": 21.8199520111084, "learning_rate": 9.942355889724311e-06, "loss": 32.3361, "step": 15214 }, { "epoch": 362.26268656716417, "grad_norm": 23.29813003540039, "learning_rate": 9.94172932330827e-06, "loss": 32.6938, "step": 15215 }, { "epoch": 362.28656716417913, "grad_norm": 22.747772216796875, "learning_rate": 9.941102756892232e-06, "loss": 31.8064, "step": 15216 }, { "epoch": 362.31044776119404, "grad_norm": 17.181781768798828, "learning_rate": 9.940476190476192e-06, "loss": 31.7559, "step": 15217 }, { "epoch": 362.33432835820895, "grad_norm": 22.056598663330078, "learning_rate": 9.939849624060152e-06, "loss": 32.0445, "step": 15218 }, { "epoch": 362.35820895522386, "grad_norm": 20.21261978149414, "learning_rate": 9.93922305764411e-06, "loss": 33.0214, "step": 15219 }, { "epoch": 362.3820895522388, "grad_norm": 15.272744178771973, "learning_rate": 9.938596491228071e-06, "loss": 30.1479, "step": 15220 }, { "epoch": 362.40597014925373, "grad_norm": 17.363733291625977, "learning_rate": 9.937969924812031e-06, "loss": 32.0461, "step": 15221 }, { "epoch": 362.42985074626864, "grad_norm": 14.621134757995605, "learning_rate": 9.937343358395991e-06, "loss": 32.2182, "step": 15222 }, { "epoch": 362.4537313432836, "grad_norm": 19.389097213745117, "learning_rate": 9.93671679197995e-06, "loss": 32.7509, "step": 15223 }, { "epoch": 362.4776119402985, "grad_norm": 20.173542022705078, "learning_rate": 9.93609022556391e-06, "loss": 32.1246, "step": 15224 }, { "epoch": 362.5014925373134, "grad_norm": 18.420679092407227, "learning_rate": 9.93546365914787e-06, "loss": 33.4089, "step": 15225 }, { "epoch": 362.52537313432833, "grad_norm": 16.837743759155273, "learning_rate": 9.93483709273183e-06, "loss": 31.5921, "step": 15226 }, { "epoch": 362.5492537313433, "grad_norm": 15.984968185424805, "learning_rate": 9.93421052631579e-06, "loss": 32.4424, "step": 15227 }, { "epoch": 362.5731343283582, "grad_norm": 19.200674057006836, "learning_rate": 9.93358395989975e-06, "loss": 32.0312, "step": 15228 }, { "epoch": 362.5970149253731, "grad_norm": 19.186283111572266, "learning_rate": 9.93295739348371e-06, "loss": 30.7211, "step": 15229 }, { "epoch": 362.6208955223881, "grad_norm": 19.88683319091797, "learning_rate": 9.93233082706767e-06, "loss": 32.1018, "step": 15230 }, { "epoch": 362.644776119403, "grad_norm": 16.639360427856445, "learning_rate": 9.93170426065163e-06, "loss": 32.0127, "step": 15231 }, { "epoch": 362.6686567164179, "grad_norm": 24.132434844970703, "learning_rate": 9.93107769423559e-06, "loss": 32.8554, "step": 15232 }, { "epoch": 362.6925373134328, "grad_norm": 23.40017318725586, "learning_rate": 9.930451127819549e-06, "loss": 32.1667, "step": 15233 }, { "epoch": 362.7164179104478, "grad_norm": 20.206464767456055, "learning_rate": 9.929824561403509e-06, "loss": 31.7039, "step": 15234 }, { "epoch": 362.7402985074627, "grad_norm": 21.73969268798828, "learning_rate": 9.92919799498747e-06, "loss": 32.7147, "step": 15235 }, { "epoch": 362.7641791044776, "grad_norm": 18.947214126586914, "learning_rate": 9.92857142857143e-06, "loss": 33.2989, "step": 15236 }, { "epoch": 362.78805970149256, "grad_norm": 21.250171661376953, "learning_rate": 9.92794486215539e-06, "loss": 31.2918, "step": 15237 }, { "epoch": 362.81194029850747, "grad_norm": 17.559688568115234, "learning_rate": 9.92731829573935e-06, "loss": 32.6922, "step": 15238 }, { "epoch": 362.8358208955224, "grad_norm": 19.79376983642578, "learning_rate": 9.92669172932331e-06, "loss": 32.9719, "step": 15239 }, { "epoch": 362.85970149253734, "grad_norm": 21.98496437072754, "learning_rate": 9.926065162907269e-06, "loss": 32.4065, "step": 15240 }, { "epoch": 362.88358208955225, "grad_norm": 19.461105346679688, "learning_rate": 9.925438596491229e-06, "loss": 33.0088, "step": 15241 }, { "epoch": 362.90746268656716, "grad_norm": 21.3272705078125, "learning_rate": 9.924812030075189e-06, "loss": 32.0047, "step": 15242 }, { "epoch": 362.93134328358207, "grad_norm": 16.512256622314453, "learning_rate": 9.924185463659148e-06, "loss": 31.7071, "step": 15243 }, { "epoch": 362.95522388059703, "grad_norm": 24.910062789916992, "learning_rate": 9.923558897243108e-06, "loss": 32.8821, "step": 15244 }, { "epoch": 362.97910447761194, "grad_norm": 19.84198760986328, "learning_rate": 9.922932330827068e-06, "loss": 31.2735, "step": 15245 }, { "epoch": 363.0, "grad_norm": 17.119239807128906, "learning_rate": 9.92230576441103e-06, "loss": 28.0328, "step": 15246 }, { "epoch": 363.0238805970149, "grad_norm": 26.13155174255371, "learning_rate": 9.921679197994988e-06, "loss": 32.0354, "step": 15247 }, { "epoch": 363.0477611940299, "grad_norm": 22.28853416442871, "learning_rate": 9.921052631578947e-06, "loss": 32.1523, "step": 15248 }, { "epoch": 363.0716417910448, "grad_norm": 16.86628532409668, "learning_rate": 9.920426065162907e-06, "loss": 33.1073, "step": 15249 }, { "epoch": 363.0955223880597, "grad_norm": 20.905820846557617, "learning_rate": 9.919799498746869e-06, "loss": 32.6834, "step": 15250 }, { "epoch": 363.1194029850746, "grad_norm": 17.573211669921875, "learning_rate": 9.919172932330829e-06, "loss": 31.2877, "step": 15251 }, { "epoch": 363.14328358208957, "grad_norm": 16.618595123291016, "learning_rate": 9.918546365914787e-06, "loss": 31.8507, "step": 15252 }, { "epoch": 363.1671641791045, "grad_norm": 19.411659240722656, "learning_rate": 9.917919799498748e-06, "loss": 32.3002, "step": 15253 }, { "epoch": 363.1910447761194, "grad_norm": 18.91495704650879, "learning_rate": 9.917293233082708e-06, "loss": 32.5148, "step": 15254 }, { "epoch": 363.21492537313435, "grad_norm": 18.91404914855957, "learning_rate": 9.916666666666668e-06, "loss": 31.3953, "step": 15255 }, { "epoch": 363.23880597014926, "grad_norm": 19.728281021118164, "learning_rate": 9.916040100250628e-06, "loss": 32.4247, "step": 15256 }, { "epoch": 363.26268656716417, "grad_norm": 22.37010955810547, "learning_rate": 9.915413533834587e-06, "loss": 34.0721, "step": 15257 }, { "epoch": 363.28656716417913, "grad_norm": NaN, "learning_rate": 9.914786967418547e-06, "loss": 55.5244, "step": 15258 }, { "epoch": 363.31044776119404, "grad_norm": 19.158201217651367, "learning_rate": 9.914786967418547e-06, "loss": 32.7449, "step": 15259 }, { "epoch": 363.33432835820895, "grad_norm": 17.627317428588867, "learning_rate": 9.914160401002507e-06, "loss": 31.4861, "step": 15260 }, { "epoch": 363.35820895522386, "grad_norm": 16.3303279876709, "learning_rate": 9.913533834586467e-06, "loss": 32.5336, "step": 15261 }, { "epoch": 363.3820895522388, "grad_norm": 18.387176513671875, "learning_rate": 9.912907268170427e-06, "loss": 31.5398, "step": 15262 }, { "epoch": 363.40597014925373, "grad_norm": 20.99032211303711, "learning_rate": 9.912280701754386e-06, "loss": 31.3534, "step": 15263 }, { "epoch": 363.42985074626864, "grad_norm": 22.79098129272461, "learning_rate": 9.911654135338346e-06, "loss": 32.3939, "step": 15264 }, { "epoch": 363.4537313432836, "grad_norm": 17.615060806274414, "learning_rate": 9.911027568922308e-06, "loss": 32.314, "step": 15265 }, { "epoch": 363.4776119402985, "grad_norm": 16.081541061401367, "learning_rate": 9.910401002506267e-06, "loss": 32.4229, "step": 15266 }, { "epoch": 363.5014925373134, "grad_norm": 23.015844345092773, "learning_rate": 9.909774436090226e-06, "loss": 31.3226, "step": 15267 }, { "epoch": 363.52537313432833, "grad_norm": 25.138582229614258, "learning_rate": 9.909147869674185e-06, "loss": 31.9168, "step": 15268 }, { "epoch": 363.5492537313433, "grad_norm": 16.33774757385254, "learning_rate": 9.908521303258147e-06, "loss": 32.3593, "step": 15269 }, { "epoch": 363.5731343283582, "grad_norm": 18.47342300415039, "learning_rate": 9.907894736842107e-06, "loss": 31.7626, "step": 15270 }, { "epoch": 363.5970149253731, "grad_norm": 24.26117706298828, "learning_rate": 9.907268170426066e-06, "loss": 32.4874, "step": 15271 }, { "epoch": 363.6208955223881, "grad_norm": 19.61480712890625, "learning_rate": 9.906641604010026e-06, "loss": 31.4948, "step": 15272 }, { "epoch": 363.644776119403, "grad_norm": 19.58074951171875, "learning_rate": 9.906015037593986e-06, "loss": 31.8857, "step": 15273 }, { "epoch": 363.6686567164179, "grad_norm": 19.42598533630371, "learning_rate": 9.905388471177946e-06, "loss": 32.2685, "step": 15274 }, { "epoch": 363.6925373134328, "grad_norm": 21.81300163269043, "learning_rate": 9.904761904761906e-06, "loss": 32.3431, "step": 15275 }, { "epoch": 363.7164179104478, "grad_norm": 21.299245834350586, "learning_rate": 9.904135338345865e-06, "loss": 32.1986, "step": 15276 }, { "epoch": 363.7402985074627, "grad_norm": 16.842878341674805, "learning_rate": 9.903508771929825e-06, "loss": 33.189, "step": 15277 }, { "epoch": 363.7641791044776, "grad_norm": 22.124414443969727, "learning_rate": 9.902882205513785e-06, "loss": 32.4759, "step": 15278 }, { "epoch": 363.78805970149256, "grad_norm": 20.511117935180664, "learning_rate": 9.902255639097745e-06, "loss": 31.369, "step": 15279 }, { "epoch": 363.81194029850747, "grad_norm": 18.832853317260742, "learning_rate": 9.901629072681706e-06, "loss": 31.8353, "step": 15280 }, { "epoch": 363.8358208955224, "grad_norm": 20.221786499023438, "learning_rate": 9.901002506265664e-06, "loss": 30.8457, "step": 15281 }, { "epoch": 363.85970149253734, "grad_norm": 25.181447982788086, "learning_rate": 9.900375939849624e-06, "loss": 32.4595, "step": 15282 }, { "epoch": 363.88358208955225, "grad_norm": 16.329669952392578, "learning_rate": 9.899749373433584e-06, "loss": 31.1605, "step": 15283 }, { "epoch": 363.90746268656716, "grad_norm": 17.075014114379883, "learning_rate": 9.899122807017545e-06, "loss": 32.9915, "step": 15284 }, { "epoch": 363.93134328358207, "grad_norm": 23.282970428466797, "learning_rate": 9.898496240601505e-06, "loss": 31.0115, "step": 15285 }, { "epoch": 363.95522388059703, "grad_norm": 17.927663803100586, "learning_rate": 9.897869674185463e-06, "loss": 33.3532, "step": 15286 }, { "epoch": 363.97910447761194, "grad_norm": 17.54277992248535, "learning_rate": 9.897243107769425e-06, "loss": 31.9632, "step": 15287 }, { "epoch": 364.0, "grad_norm": 15.090412139892578, "learning_rate": 9.896616541353385e-06, "loss": 28.6456, "step": 15288 }, { "epoch": 364.0238805970149, "grad_norm": 17.34769630432129, "learning_rate": 9.895989974937344e-06, "loss": 31.6095, "step": 15289 }, { "epoch": 364.0477611940299, "grad_norm": 23.21035385131836, "learning_rate": 9.895363408521304e-06, "loss": 31.5968, "step": 15290 }, { "epoch": 364.0716417910448, "grad_norm": 21.537927627563477, "learning_rate": 9.894736842105264e-06, "loss": 32.173, "step": 15291 }, { "epoch": 364.0955223880597, "grad_norm": 18.95825958251953, "learning_rate": 9.894110275689224e-06, "loss": 32.0189, "step": 15292 }, { "epoch": 364.1194029850746, "grad_norm": 15.338730812072754, "learning_rate": 9.893483709273184e-06, "loss": 32.2987, "step": 15293 }, { "epoch": 364.14328358208957, "grad_norm": 21.401004791259766, "learning_rate": 9.892857142857143e-06, "loss": 30.9803, "step": 15294 }, { "epoch": 364.1671641791045, "grad_norm": 17.64253044128418, "learning_rate": 9.892230576441103e-06, "loss": 33.7867, "step": 15295 }, { "epoch": 364.1910447761194, "grad_norm": 21.7203311920166, "learning_rate": 9.891604010025063e-06, "loss": 31.395, "step": 15296 }, { "epoch": 364.21492537313435, "grad_norm": 22.429622650146484, "learning_rate": 9.890977443609023e-06, "loss": 31.6195, "step": 15297 }, { "epoch": 364.23880597014926, "grad_norm": 19.789377212524414, "learning_rate": 9.890350877192983e-06, "loss": 32.3123, "step": 15298 }, { "epoch": 364.26268656716417, "grad_norm": 16.696474075317383, "learning_rate": 9.889724310776944e-06, "loss": 31.7702, "step": 15299 }, { "epoch": 364.28656716417913, "grad_norm": 22.65117073059082, "learning_rate": 9.889097744360902e-06, "loss": 32.9917, "step": 15300 }, { "epoch": 364.31044776119404, "grad_norm": 18.027467727661133, "learning_rate": 9.888471177944862e-06, "loss": 31.2865, "step": 15301 }, { "epoch": 364.33432835820895, "grad_norm": 22.309864044189453, "learning_rate": 9.887844611528824e-06, "loss": 33.268, "step": 15302 }, { "epoch": 364.35820895522386, "grad_norm": 17.887165069580078, "learning_rate": 9.887218045112783e-06, "loss": 32.3819, "step": 15303 }, { "epoch": 364.3820895522388, "grad_norm": 20.570512771606445, "learning_rate": 9.886591478696743e-06, "loss": 32.2796, "step": 15304 }, { "epoch": 364.40597014925373, "grad_norm": 16.284791946411133, "learning_rate": 9.885964912280703e-06, "loss": 30.3214, "step": 15305 }, { "epoch": 364.42985074626864, "grad_norm": 20.710899353027344, "learning_rate": 9.885338345864663e-06, "loss": 32.5604, "step": 15306 }, { "epoch": 364.4537313432836, "grad_norm": 15.39677906036377, "learning_rate": 9.884711779448623e-06, "loss": 32.2054, "step": 15307 }, { "epoch": 364.4776119402985, "grad_norm": 17.5885009765625, "learning_rate": 9.884085213032582e-06, "loss": 32.0225, "step": 15308 }, { "epoch": 364.5014925373134, "grad_norm": 17.18393898010254, "learning_rate": 9.883458646616542e-06, "loss": 32.6099, "step": 15309 }, { "epoch": 364.52537313432833, "grad_norm": 22.8990421295166, "learning_rate": 9.882832080200502e-06, "loss": 31.5182, "step": 15310 }, { "epoch": 364.5492537313433, "grad_norm": 18.452173233032227, "learning_rate": 9.882205513784462e-06, "loss": 33.1553, "step": 15311 }, { "epoch": 364.5731343283582, "grad_norm": 18.07838249206543, "learning_rate": 9.881578947368422e-06, "loss": 31.8111, "step": 15312 }, { "epoch": 364.5970149253731, "grad_norm": 20.9888916015625, "learning_rate": 9.880952380952381e-06, "loss": 31.1067, "step": 15313 }, { "epoch": 364.6208955223881, "grad_norm": 20.998638153076172, "learning_rate": 9.880325814536341e-06, "loss": 32.2242, "step": 15314 }, { "epoch": 364.644776119403, "grad_norm": 18.86063003540039, "learning_rate": 9.879699248120301e-06, "loss": 31.5959, "step": 15315 }, { "epoch": 364.6686567164179, "grad_norm": 14.274968147277832, "learning_rate": 9.87907268170426e-06, "loss": 31.1165, "step": 15316 }, { "epoch": 364.6925373134328, "grad_norm": 15.340092658996582, "learning_rate": 9.878446115288222e-06, "loss": 31.7937, "step": 15317 }, { "epoch": 364.7164179104478, "grad_norm": 24.58694076538086, "learning_rate": 9.877819548872182e-06, "loss": 32.4217, "step": 15318 }, { "epoch": 364.7402985074627, "grad_norm": 20.13288688659668, "learning_rate": 9.87719298245614e-06, "loss": 32.5419, "step": 15319 }, { "epoch": 364.7641791044776, "grad_norm": 12.716923713684082, "learning_rate": 9.876566416040102e-06, "loss": 32.7927, "step": 15320 }, { "epoch": 364.78805970149256, "grad_norm": 18.81435775756836, "learning_rate": 9.875939849624061e-06, "loss": 31.619, "step": 15321 }, { "epoch": 364.81194029850747, "grad_norm": 25.388444900512695, "learning_rate": 9.875313283208021e-06, "loss": 33.0537, "step": 15322 }, { "epoch": 364.8358208955224, "grad_norm": 17.26911163330078, "learning_rate": 9.87468671679198e-06, "loss": 31.6359, "step": 15323 }, { "epoch": 364.85970149253734, "grad_norm": 15.614147186279297, "learning_rate": 9.874060150375941e-06, "loss": 32.3819, "step": 15324 }, { "epoch": 364.88358208955225, "grad_norm": 18.926908493041992, "learning_rate": 9.8734335839599e-06, "loss": 32.628, "step": 15325 }, { "epoch": 364.90746268656716, "grad_norm": 16.784101486206055, "learning_rate": 9.87280701754386e-06, "loss": 34.1808, "step": 15326 }, { "epoch": 364.93134328358207, "grad_norm": 21.025915145874023, "learning_rate": 9.87218045112782e-06, "loss": 31.5357, "step": 15327 }, { "epoch": 364.95522388059703, "grad_norm": 15.31556224822998, "learning_rate": 9.87155388471178e-06, "loss": 32.2547, "step": 15328 }, { "epoch": 364.97910447761194, "grad_norm": 24.622791290283203, "learning_rate": 9.87092731829574e-06, "loss": 31.9887, "step": 15329 }, { "epoch": 365.0, "grad_norm": 14.526107788085938, "learning_rate": 9.8703007518797e-06, "loss": 27.4657, "step": 15330 }, { "epoch": 365.0238805970149, "grad_norm": 19.568317413330078, "learning_rate": 9.86967418546366e-06, "loss": 31.4216, "step": 15331 }, { "epoch": 365.0477611940299, "grad_norm": 19.773418426513672, "learning_rate": 9.869047619047621e-06, "loss": 32.037, "step": 15332 }, { "epoch": 365.0716417910448, "grad_norm": 19.401296615600586, "learning_rate": 9.868421052631579e-06, "loss": 31.4882, "step": 15333 }, { "epoch": 365.0955223880597, "grad_norm": 22.989543914794922, "learning_rate": 9.867794486215539e-06, "loss": 30.8285, "step": 15334 }, { "epoch": 365.1194029850746, "grad_norm": 18.464834213256836, "learning_rate": 9.8671679197995e-06, "loss": 32.0074, "step": 15335 }, { "epoch": 365.14328358208957, "grad_norm": 19.494367599487305, "learning_rate": 9.86654135338346e-06, "loss": 32.3485, "step": 15336 }, { "epoch": 365.1671641791045, "grad_norm": 24.23535919189453, "learning_rate": 9.86591478696742e-06, "loss": 31.6978, "step": 15337 }, { "epoch": 365.1910447761194, "grad_norm": 19.43990135192871, "learning_rate": 9.86528822055138e-06, "loss": 31.9308, "step": 15338 }, { "epoch": 365.21492537313435, "grad_norm": 18.06524085998535, "learning_rate": 9.86466165413534e-06, "loss": 31.2191, "step": 15339 }, { "epoch": 365.23880597014926, "grad_norm": 17.638917922973633, "learning_rate": 9.8640350877193e-06, "loss": 32.2167, "step": 15340 }, { "epoch": 365.26268656716417, "grad_norm": 17.34427833557129, "learning_rate": 9.86340852130326e-06, "loss": 32.0774, "step": 15341 }, { "epoch": 365.28656716417913, "grad_norm": 22.937944412231445, "learning_rate": 9.862781954887219e-06, "loss": 31.2344, "step": 15342 }, { "epoch": 365.31044776119404, "grad_norm": 18.434293746948242, "learning_rate": 9.862155388471179e-06, "loss": 32.0202, "step": 15343 }, { "epoch": 365.33432835820895, "grad_norm": 17.503267288208008, "learning_rate": 9.861528822055139e-06, "loss": 31.5617, "step": 15344 }, { "epoch": 365.35820895522386, "grad_norm": 16.172712326049805, "learning_rate": 9.860902255639098e-06, "loss": 32.8991, "step": 15345 }, { "epoch": 365.3820895522388, "grad_norm": 20.303129196166992, "learning_rate": 9.860275689223058e-06, "loss": 32.6047, "step": 15346 }, { "epoch": 365.40597014925373, "grad_norm": 22.519086837768555, "learning_rate": 9.859649122807018e-06, "loss": 32.259, "step": 15347 }, { "epoch": 365.42985074626864, "grad_norm": 19.33053207397461, "learning_rate": 9.859022556390978e-06, "loss": 32.6486, "step": 15348 }, { "epoch": 365.4537313432836, "grad_norm": 18.114622116088867, "learning_rate": 9.858395989974938e-06, "loss": 32.7031, "step": 15349 }, { "epoch": 365.4776119402985, "grad_norm": 18.428302764892578, "learning_rate": 9.857769423558899e-06, "loss": 32.2902, "step": 15350 }, { "epoch": 365.5014925373134, "grad_norm": 14.867958068847656, "learning_rate": 9.857142857142859e-06, "loss": 33.123, "step": 15351 }, { "epoch": 365.52537313432833, "grad_norm": 20.644237518310547, "learning_rate": 9.856516290726817e-06, "loss": 32.3267, "step": 15352 }, { "epoch": 365.5492537313433, "grad_norm": 18.06245994567871, "learning_rate": 9.855889724310778e-06, "loss": 31.8889, "step": 15353 }, { "epoch": 365.5731343283582, "grad_norm": 17.88252830505371, "learning_rate": 9.855263157894738e-06, "loss": 31.7894, "step": 15354 }, { "epoch": 365.5970149253731, "grad_norm": NaN, "learning_rate": 9.854636591478698e-06, "loss": 27.8248, "step": 15355 }, { "epoch": 365.6208955223881, "grad_norm": 21.058269500732422, "learning_rate": 9.854636591478698e-06, "loss": 32.0298, "step": 15356 }, { "epoch": 365.644776119403, "grad_norm": 21.552438735961914, "learning_rate": 9.854010025062656e-06, "loss": 33.0316, "step": 15357 }, { "epoch": 365.6686567164179, "grad_norm": 16.302946090698242, "learning_rate": 9.853383458646618e-06, "loss": 31.7935, "step": 15358 }, { "epoch": 365.6925373134328, "grad_norm": 18.238101959228516, "learning_rate": 9.852756892230577e-06, "loss": 31.5756, "step": 15359 }, { "epoch": 365.7164179104478, "grad_norm": 17.57839012145996, "learning_rate": 9.852130325814537e-06, "loss": 32.1403, "step": 15360 }, { "epoch": 365.7402985074627, "grad_norm": 21.027997970581055, "learning_rate": 9.851503759398497e-06, "loss": 33.4299, "step": 15361 }, { "epoch": 365.7641791044776, "grad_norm": 23.699108123779297, "learning_rate": 9.850877192982457e-06, "loss": 32.4048, "step": 15362 }, { "epoch": 365.78805970149256, "grad_norm": 16.43022346496582, "learning_rate": 9.850250626566417e-06, "loss": 32.8855, "step": 15363 }, { "epoch": 365.81194029850747, "grad_norm": 21.189767837524414, "learning_rate": 9.849624060150376e-06, "loss": 30.8562, "step": 15364 }, { "epoch": 365.8358208955224, "grad_norm": 25.296646118164062, "learning_rate": 9.848997493734336e-06, "loss": 32.8415, "step": 15365 }, { "epoch": 365.85970149253734, "grad_norm": 23.162118911743164, "learning_rate": 9.848370927318298e-06, "loss": 31.0586, "step": 15366 }, { "epoch": 365.88358208955225, "grad_norm": 15.77981185913086, "learning_rate": 9.847744360902256e-06, "loss": 32.1768, "step": 15367 }, { "epoch": 365.90746268656716, "grad_norm": 25.820587158203125, "learning_rate": 9.847117794486216e-06, "loss": 31.7466, "step": 15368 }, { "epoch": 365.93134328358207, "grad_norm": 22.924257278442383, "learning_rate": 9.846491228070177e-06, "loss": 32.3555, "step": 15369 }, { "epoch": 365.95522388059703, "grad_norm": 20.160552978515625, "learning_rate": 9.845864661654137e-06, "loss": 31.8706, "step": 15370 }, { "epoch": 365.97910447761194, "grad_norm": 19.70369529724121, "learning_rate": 9.845238095238097e-06, "loss": 31.9827, "step": 15371 }, { "epoch": 366.0, "grad_norm": 19.337146759033203, "learning_rate": 9.844611528822055e-06, "loss": 28.0124, "step": 15372 }, { "epoch": 366.0238805970149, "grad_norm": 16.080902099609375, "learning_rate": 9.843984962406016e-06, "loss": 32.6589, "step": 15373 }, { "epoch": 366.0477611940299, "grad_norm": 20.87893295288086, "learning_rate": 9.843358395989976e-06, "loss": 32.5517, "step": 15374 }, { "epoch": 366.0716417910448, "grad_norm": 18.223037719726562, "learning_rate": 9.842731829573936e-06, "loss": 31.3611, "step": 15375 }, { "epoch": 366.0955223880597, "grad_norm": 19.568954467773438, "learning_rate": 9.842105263157896e-06, "loss": 32.4866, "step": 15376 }, { "epoch": 366.1194029850746, "grad_norm": 17.641977310180664, "learning_rate": 9.841478696741856e-06, "loss": 32.9956, "step": 15377 }, { "epoch": 366.14328358208957, "grad_norm": 18.96194076538086, "learning_rate": 9.840852130325815e-06, "loss": 32.7475, "step": 15378 }, { "epoch": 366.1671641791045, "grad_norm": 19.562108993530273, "learning_rate": 9.840225563909775e-06, "loss": 31.9512, "step": 15379 }, { "epoch": 366.1910447761194, "grad_norm": 20.464187622070312, "learning_rate": 9.839598997493735e-06, "loss": 32.5754, "step": 15380 }, { "epoch": 366.21492537313435, "grad_norm": 20.65941047668457, "learning_rate": 9.838972431077695e-06, "loss": 31.1942, "step": 15381 }, { "epoch": 366.23880597014926, "grad_norm": 18.312759399414062, "learning_rate": 9.838345864661655e-06, "loss": 31.6319, "step": 15382 }, { "epoch": 366.26268656716417, "grad_norm": 21.73177146911621, "learning_rate": 9.837719298245614e-06, "loss": 33.0737, "step": 15383 }, { "epoch": 366.28656716417913, "grad_norm": 19.632993698120117, "learning_rate": 9.837092731829576e-06, "loss": 31.7084, "step": 15384 }, { "epoch": 366.31044776119404, "grad_norm": 17.026018142700195, "learning_rate": 9.836466165413536e-06, "loss": 32.1507, "step": 15385 }, { "epoch": 366.33432835820895, "grad_norm": 27.969884872436523, "learning_rate": 9.835839598997494e-06, "loss": 31.6364, "step": 15386 }, { "epoch": 366.35820895522386, "grad_norm": 25.337657928466797, "learning_rate": 9.835213032581454e-06, "loss": 32.8782, "step": 15387 }, { "epoch": 366.3820895522388, "grad_norm": 18.363353729248047, "learning_rate": 9.834586466165415e-06, "loss": 30.9665, "step": 15388 }, { "epoch": 366.40597014925373, "grad_norm": 25.76776123046875, "learning_rate": 9.833959899749375e-06, "loss": 32.6728, "step": 15389 }, { "epoch": 366.42985074626864, "grad_norm": 23.761690139770508, "learning_rate": 9.833333333333333e-06, "loss": 31.9841, "step": 15390 }, { "epoch": 366.4537313432836, "grad_norm": 17.141319274902344, "learning_rate": 9.832706766917294e-06, "loss": 32.5077, "step": 15391 }, { "epoch": 366.4776119402985, "grad_norm": 25.57672691345215, "learning_rate": 9.832080200501254e-06, "loss": 31.7279, "step": 15392 }, { "epoch": 366.5014925373134, "grad_norm": 22.10552406311035, "learning_rate": 9.831453634085214e-06, "loss": 32.2644, "step": 15393 }, { "epoch": 366.52537313432833, "grad_norm": 16.392301559448242, "learning_rate": 9.830827067669174e-06, "loss": 31.6418, "step": 15394 }, { "epoch": 366.5492537313433, "grad_norm": 22.449726104736328, "learning_rate": 9.830200501253134e-06, "loss": 32.3267, "step": 15395 }, { "epoch": 366.5731343283582, "grad_norm": 22.13176727294922, "learning_rate": 9.829573934837093e-06, "loss": 32.8179, "step": 15396 }, { "epoch": 366.5970149253731, "grad_norm": 18.91019058227539, "learning_rate": 9.828947368421053e-06, "loss": 31.3633, "step": 15397 }, { "epoch": 366.6208955223881, "grad_norm": 15.951644897460938, "learning_rate": 9.828320802005013e-06, "loss": 31.5082, "step": 15398 }, { "epoch": 366.644776119403, "grad_norm": 28.609750747680664, "learning_rate": 9.827694235588975e-06, "loss": 32.2829, "step": 15399 }, { "epoch": 366.6686567164179, "grad_norm": 17.81928062438965, "learning_rate": 9.827067669172933e-06, "loss": 32.403, "step": 15400 }, { "epoch": 366.6925373134328, "grad_norm": 22.5119571685791, "learning_rate": 9.826441102756892e-06, "loss": 32.0073, "step": 15401 }, { "epoch": 366.7164179104478, "grad_norm": 26.724044799804688, "learning_rate": 9.825814536340852e-06, "loss": 31.6893, "step": 15402 }, { "epoch": 366.7402985074627, "grad_norm": 18.280824661254883, "learning_rate": 9.825187969924814e-06, "loss": 32.9523, "step": 15403 }, { "epoch": 366.7641791044776, "grad_norm": 29.23700523376465, "learning_rate": 9.824561403508772e-06, "loss": 32.6632, "step": 15404 }, { "epoch": 366.78805970149256, "grad_norm": 22.60460090637207, "learning_rate": 9.823934837092732e-06, "loss": 31.6254, "step": 15405 }, { "epoch": 366.81194029850747, "grad_norm": 24.471580505371094, "learning_rate": 9.823308270676693e-06, "loss": 32.4686, "step": 15406 }, { "epoch": 366.8358208955224, "grad_norm": 23.822912216186523, "learning_rate": 9.822681704260653e-06, "loss": 30.3543, "step": 15407 }, { "epoch": 366.85970149253734, "grad_norm": 20.79631805419922, "learning_rate": 9.822055137844613e-06, "loss": 30.3284, "step": 15408 }, { "epoch": 366.88358208955225, "grad_norm": 15.284931182861328, "learning_rate": 9.821428571428573e-06, "loss": 32.3226, "step": 15409 }, { "epoch": 366.90746268656716, "grad_norm": 17.51460838317871, "learning_rate": 9.820802005012532e-06, "loss": 32.3922, "step": 15410 }, { "epoch": 366.93134328358207, "grad_norm": 20.007862091064453, "learning_rate": 9.820175438596492e-06, "loss": 31.3975, "step": 15411 }, { "epoch": 366.95522388059703, "grad_norm": 19.774919509887695, "learning_rate": 9.819548872180452e-06, "loss": 31.7776, "step": 15412 }, { "epoch": 366.97910447761194, "grad_norm": 21.901992797851562, "learning_rate": 9.818922305764412e-06, "loss": 32.1234, "step": 15413 }, { "epoch": 367.0, "grad_norm": 16.642581939697266, "learning_rate": 9.818295739348372e-06, "loss": 26.9963, "step": 15414 }, { "epoch": 367.0238805970149, "grad_norm": 17.27568817138672, "learning_rate": 9.817669172932331e-06, "loss": 32.547, "step": 15415 }, { "epoch": 367.0477611940299, "grad_norm": 15.555951118469238, "learning_rate": 9.817042606516291e-06, "loss": 31.6967, "step": 15416 }, { "epoch": 367.0716417910448, "grad_norm": 15.493464469909668, "learning_rate": 9.816416040100251e-06, "loss": 31.5815, "step": 15417 }, { "epoch": 367.0955223880597, "grad_norm": 17.905778884887695, "learning_rate": 9.815789473684212e-06, "loss": 31.9997, "step": 15418 }, { "epoch": 367.1194029850746, "grad_norm": 18.846046447753906, "learning_rate": 9.81516290726817e-06, "loss": 31.3564, "step": 15419 }, { "epoch": 367.14328358208957, "grad_norm": 16.605934143066406, "learning_rate": 9.81453634085213e-06, "loss": 31.3784, "step": 15420 }, { "epoch": 367.1671641791045, "grad_norm": 17.084476470947266, "learning_rate": 9.813909774436092e-06, "loss": 31.1145, "step": 15421 }, { "epoch": 367.1910447761194, "grad_norm": 17.43899154663086, "learning_rate": 9.813283208020052e-06, "loss": 32.7294, "step": 15422 }, { "epoch": 367.21492537313435, "grad_norm": 24.862119674682617, "learning_rate": 9.81265664160401e-06, "loss": 30.8308, "step": 15423 }, { "epoch": 367.23880597014926, "grad_norm": 17.063512802124023, "learning_rate": 9.812030075187971e-06, "loss": 31.9816, "step": 15424 }, { "epoch": 367.26268656716417, "grad_norm": 17.12827491760254, "learning_rate": 9.811403508771931e-06, "loss": 32.8559, "step": 15425 }, { "epoch": 367.28656716417913, "grad_norm": 23.62639808654785, "learning_rate": 9.81077694235589e-06, "loss": 31.6583, "step": 15426 }, { "epoch": 367.31044776119404, "grad_norm": 22.921585083007812, "learning_rate": 9.81015037593985e-06, "loss": 31.7304, "step": 15427 }, { "epoch": 367.33432835820895, "grad_norm": 17.045209884643555, "learning_rate": 9.80952380952381e-06, "loss": 32.3467, "step": 15428 }, { "epoch": 367.35820895522386, "grad_norm": 29.46330451965332, "learning_rate": 9.80889724310777e-06, "loss": 31.5553, "step": 15429 }, { "epoch": 367.3820895522388, "grad_norm": 21.51797866821289, "learning_rate": 9.80827067669173e-06, "loss": 31.6182, "step": 15430 }, { "epoch": 367.40597014925373, "grad_norm": 18.44357681274414, "learning_rate": 9.80764411027569e-06, "loss": 31.3722, "step": 15431 }, { "epoch": 367.42985074626864, "grad_norm": 30.695262908935547, "learning_rate": 9.80701754385965e-06, "loss": 32.5535, "step": 15432 }, { "epoch": 367.4537313432836, "grad_norm": 17.565475463867188, "learning_rate": 9.80639097744361e-06, "loss": 32.4373, "step": 15433 }, { "epoch": 367.4776119402985, "grad_norm": 25.367204666137695, "learning_rate": 9.80576441102757e-06, "loss": 30.8352, "step": 15434 }, { "epoch": 367.5014925373134, "grad_norm": 23.94452667236328, "learning_rate": 9.805137844611529e-06, "loss": 32.6506, "step": 15435 }, { "epoch": 367.52537313432833, "grad_norm": 18.084810256958008, "learning_rate": 9.80451127819549e-06, "loss": 32.1855, "step": 15436 }, { "epoch": 367.5492537313433, "grad_norm": 31.32501792907715, "learning_rate": 9.803884711779449e-06, "loss": 32.3171, "step": 15437 }, { "epoch": 367.5731343283582, "grad_norm": 20.915185928344727, "learning_rate": 9.803258145363408e-06, "loss": 31.6824, "step": 15438 }, { "epoch": 367.5970149253731, "grad_norm": 21.990827560424805, "learning_rate": 9.80263157894737e-06, "loss": 32.0792, "step": 15439 }, { "epoch": 367.6208955223881, "grad_norm": 30.322816848754883, "learning_rate": 9.80200501253133e-06, "loss": 31.6604, "step": 15440 }, { "epoch": 367.644776119403, "grad_norm": 19.280437469482422, "learning_rate": 9.80137844611529e-06, "loss": 31.5484, "step": 15441 }, { "epoch": 367.6686567164179, "grad_norm": 30.336301803588867, "learning_rate": 9.80075187969925e-06, "loss": 31.835, "step": 15442 }, { "epoch": 367.6925373134328, "grad_norm": 25.579570770263672, "learning_rate": 9.800125313283209e-06, "loss": 31.8172, "step": 15443 }, { "epoch": 367.7164179104478, "grad_norm": 24.215070724487305, "learning_rate": 9.799498746867169e-06, "loss": 32.6645, "step": 15444 }, { "epoch": 367.7402985074627, "grad_norm": 32.025482177734375, "learning_rate": 9.798872180451129e-06, "loss": 32.8574, "step": 15445 }, { "epoch": 367.7641791044776, "grad_norm": 21.791353225708008, "learning_rate": 9.798245614035088e-06, "loss": 32.7522, "step": 15446 }, { "epoch": 367.78805970149256, "grad_norm": 41.8706169128418, "learning_rate": 9.797619047619048e-06, "loss": 32.2187, "step": 15447 }, { "epoch": 367.81194029850747, "grad_norm": 29.62680435180664, "learning_rate": 9.796992481203008e-06, "loss": 31.8115, "step": 15448 }, { "epoch": 367.8358208955224, "grad_norm": 44.39241409301758, "learning_rate": 9.796365914786968e-06, "loss": 32.425, "step": 15449 }, { "epoch": 367.85970149253734, "grad_norm": 40.109642028808594, "learning_rate": 9.795739348370928e-06, "loss": 32.9681, "step": 15450 }, { "epoch": 367.88358208955225, "grad_norm": 37.884368896484375, "learning_rate": 9.79511278195489e-06, "loss": 32.4673, "step": 15451 }, { "epoch": 367.90746268656716, "grad_norm": 36.89011001586914, "learning_rate": 9.794486215538847e-06, "loss": 32.4061, "step": 15452 }, { "epoch": 367.93134328358207, "grad_norm": 32.9327278137207, "learning_rate": 9.793859649122807e-06, "loss": 32.0519, "step": 15453 }, { "epoch": 367.95522388059703, "grad_norm": 26.91158676147461, "learning_rate": 9.793233082706769e-06, "loss": 32.754, "step": 15454 }, { "epoch": 367.97910447761194, "grad_norm": 42.379486083984375, "learning_rate": 9.792606516290728e-06, "loss": 31.3521, "step": 15455 }, { "epoch": 368.0, "grad_norm": 30.096933364868164, "learning_rate": 9.791979949874686e-06, "loss": 28.0914, "step": 15456 }, { "epoch": 368.0238805970149, "grad_norm": 36.74195861816406, "learning_rate": 9.791353383458648e-06, "loss": 30.9952, "step": 15457 }, { "epoch": 368.0477611940299, "grad_norm": 35.80557632446289, "learning_rate": 9.790726817042608e-06, "loss": 32.0456, "step": 15458 }, { "epoch": 368.0716417910448, "grad_norm": 30.618431091308594, "learning_rate": 9.790100250626568e-06, "loss": 32.3686, "step": 15459 }, { "epoch": 368.0955223880597, "grad_norm": 29.104036331176758, "learning_rate": 9.789473684210527e-06, "loss": 31.9376, "step": 15460 }, { "epoch": 368.1194029850746, "grad_norm": 34.558807373046875, "learning_rate": 9.788847117794487e-06, "loss": 30.0659, "step": 15461 }, { "epoch": 368.14328358208957, "grad_norm": 28.875043869018555, "learning_rate": 9.788220551378447e-06, "loss": 30.8547, "step": 15462 }, { "epoch": 368.1671641791045, "grad_norm": 38.92327117919922, "learning_rate": 9.787593984962407e-06, "loss": 31.9357, "step": 15463 }, { "epoch": 368.1910447761194, "grad_norm": 34.628334045410156, "learning_rate": 9.786967418546367e-06, "loss": 31.4637, "step": 15464 }, { "epoch": 368.21492537313435, "grad_norm": 36.315650939941406, "learning_rate": 9.786340852130326e-06, "loss": 31.6243, "step": 15465 }, { "epoch": 368.23880597014926, "grad_norm": 35.161319732666016, "learning_rate": 9.785714285714286e-06, "loss": 32.0192, "step": 15466 }, { "epoch": 368.26268656716417, "grad_norm": 31.11237335205078, "learning_rate": 9.785087719298246e-06, "loss": 32.99, "step": 15467 }, { "epoch": 368.28656716417913, "grad_norm": 29.740867614746094, "learning_rate": 9.784461152882206e-06, "loss": 31.46, "step": 15468 }, { "epoch": 368.31044776119404, "grad_norm": 35.33416748046875, "learning_rate": 9.783834586466167e-06, "loss": 32.2135, "step": 15469 }, { "epoch": 368.33432835820895, "grad_norm": 32.053707122802734, "learning_rate": 9.783208020050125e-06, "loss": 32.5681, "step": 15470 }, { "epoch": 368.35820895522386, "grad_norm": 32.8370361328125, "learning_rate": 9.782581453634085e-06, "loss": 32.3504, "step": 15471 }, { "epoch": 368.3820895522388, "grad_norm": 29.674190521240234, "learning_rate": 9.781954887218047e-06, "loss": 32.2585, "step": 15472 }, { "epoch": 368.40597014925373, "grad_norm": 36.9009895324707, "learning_rate": 9.781328320802006e-06, "loss": 32.2817, "step": 15473 }, { "epoch": 368.42985074626864, "grad_norm": 30.275714874267578, "learning_rate": 9.780701754385966e-06, "loss": 32.0449, "step": 15474 }, { "epoch": 368.4537313432836, "grad_norm": 34.580535888671875, "learning_rate": 9.780075187969924e-06, "loss": 32.2336, "step": 15475 }, { "epoch": 368.4776119402985, "grad_norm": 31.394521713256836, "learning_rate": 9.779448621553886e-06, "loss": 33.726, "step": 15476 }, { "epoch": 368.5014925373134, "grad_norm": 36.203285217285156, "learning_rate": 9.778822055137846e-06, "loss": 31.1039, "step": 15477 }, { "epoch": 368.52537313432833, "grad_norm": 29.48996353149414, "learning_rate": 9.778195488721805e-06, "loss": 31.3228, "step": 15478 }, { "epoch": 368.5492537313433, "grad_norm": 37.20621109008789, "learning_rate": 9.777568922305765e-06, "loss": 32.4839, "step": 15479 }, { "epoch": 368.5731343283582, "grad_norm": 31.56957244873047, "learning_rate": 9.776942355889725e-06, "loss": 31.7537, "step": 15480 }, { "epoch": 368.5970149253731, "grad_norm": 33.576663970947266, "learning_rate": 9.776315789473685e-06, "loss": 32.0297, "step": 15481 }, { "epoch": 368.6208955223881, "grad_norm": 30.444557189941406, "learning_rate": 9.775689223057645e-06, "loss": 31.9534, "step": 15482 }, { "epoch": 368.644776119403, "grad_norm": 34.19621658325195, "learning_rate": 9.775062656641604e-06, "loss": 31.6328, "step": 15483 }, { "epoch": 368.6686567164179, "grad_norm": 33.4950065612793, "learning_rate": 9.774436090225564e-06, "loss": 32.7578, "step": 15484 }, { "epoch": 368.6925373134328, "grad_norm": 31.00712776184082, "learning_rate": 9.773809523809524e-06, "loss": 32.7168, "step": 15485 }, { "epoch": 368.7164179104478, "grad_norm": 30.438560485839844, "learning_rate": 9.773182957393484e-06, "loss": 32.4116, "step": 15486 }, { "epoch": 368.7402985074627, "grad_norm": 35.17342758178711, "learning_rate": 9.772556390977445e-06, "loss": 32.0912, "step": 15487 }, { "epoch": 368.7641791044776, "grad_norm": 28.074779510498047, "learning_rate": 9.771929824561405e-06, "loss": 32.1994, "step": 15488 }, { "epoch": 368.78805970149256, "grad_norm": 35.372432708740234, "learning_rate": 9.771303258145363e-06, "loss": 31.5893, "step": 15489 }, { "epoch": 368.81194029850747, "grad_norm": 30.71903419494629, "learning_rate": 9.770676691729323e-06, "loss": 31.6006, "step": 15490 }, { "epoch": 368.8358208955224, "grad_norm": 33.70318603515625, "learning_rate": 9.770050125313285e-06, "loss": 31.2515, "step": 15491 }, { "epoch": 368.85970149253734, "grad_norm": 30.457311630249023, "learning_rate": 9.769423558897244e-06, "loss": 31.5946, "step": 15492 }, { "epoch": 368.88358208955225, "grad_norm": 34.40906524658203, "learning_rate": 9.768796992481204e-06, "loss": 32.6181, "step": 15493 }, { "epoch": 368.90746268656716, "grad_norm": 31.876022338867188, "learning_rate": 9.768170426065164e-06, "loss": 31.9773, "step": 15494 }, { "epoch": 368.93134328358207, "grad_norm": 34.63753128051758, "learning_rate": 9.767543859649124e-06, "loss": 32.5075, "step": 15495 }, { "epoch": 368.95522388059703, "grad_norm": 28.70196533203125, "learning_rate": 9.766917293233084e-06, "loss": 32.4539, "step": 15496 }, { "epoch": 368.97910447761194, "grad_norm": 33.3589973449707, "learning_rate": 9.766290726817043e-06, "loss": 30.9086, "step": 15497 }, { "epoch": 369.0, "grad_norm": 25.43604850769043, "learning_rate": 9.765664160401003e-06, "loss": 28.0329, "step": 15498 }, { "epoch": 369.0238805970149, "grad_norm": 35.03908920288086, "learning_rate": 9.765037593984963e-06, "loss": 33.3028, "step": 15499 }, { "epoch": 369.0477611940299, "grad_norm": 31.213130950927734, "learning_rate": 9.764411027568923e-06, "loss": 32.0712, "step": 15500 }, { "epoch": 369.0716417910448, "grad_norm": 34.93855285644531, "learning_rate": 9.763784461152883e-06, "loss": 33.2838, "step": 15501 }, { "epoch": 369.0955223880597, "grad_norm": 30.033199310302734, "learning_rate": 9.763157894736844e-06, "loss": 31.3056, "step": 15502 }, { "epoch": 369.1194029850746, "grad_norm": 31.440475463867188, "learning_rate": 9.762531328320802e-06, "loss": 32.4002, "step": 15503 }, { "epoch": 369.14328358208957, "grad_norm": 24.40007781982422, "learning_rate": 9.761904761904762e-06, "loss": 31.126, "step": 15504 }, { "epoch": 369.1671641791045, "grad_norm": 35.60890579223633, "learning_rate": 9.761278195488722e-06, "loss": 31.9125, "step": 15505 }, { "epoch": 369.1910447761194, "grad_norm": 30.02541160583496, "learning_rate": 9.760651629072683e-06, "loss": 31.2605, "step": 15506 }, { "epoch": 369.21492537313435, "grad_norm": 32.43452453613281, "learning_rate": 9.760025062656643e-06, "loss": 31.4892, "step": 15507 }, { "epoch": 369.23880597014926, "grad_norm": 31.58152198791504, "learning_rate": 9.759398496240601e-06, "loss": 31.267, "step": 15508 }, { "epoch": 369.26268656716417, "grad_norm": 31.37113380432129, "learning_rate": 9.758771929824563e-06, "loss": 32.1276, "step": 15509 }, { "epoch": 369.28656716417913, "grad_norm": 26.584365844726562, "learning_rate": 9.758145363408522e-06, "loss": 32.9294, "step": 15510 }, { "epoch": 369.31044776119404, "grad_norm": 30.561748504638672, "learning_rate": 9.757518796992482e-06, "loss": 32.3237, "step": 15511 }, { "epoch": 369.33432835820895, "grad_norm": 25.796714782714844, "learning_rate": 9.756892230576442e-06, "loss": 33.5997, "step": 15512 }, { "epoch": 369.35820895522386, "grad_norm": 35.041847229003906, "learning_rate": 9.756265664160402e-06, "loss": 31.7871, "step": 15513 }, { "epoch": 369.3820895522388, "grad_norm": 29.74791717529297, "learning_rate": 9.755639097744362e-06, "loss": 32.6398, "step": 15514 }, { "epoch": 369.40597014925373, "grad_norm": 34.51224899291992, "learning_rate": 9.755012531328321e-06, "loss": 31.3808, "step": 15515 }, { "epoch": 369.42985074626864, "grad_norm": 30.90576171875, "learning_rate": 9.754385964912281e-06, "loss": 32.1477, "step": 15516 }, { "epoch": 369.4537313432836, "grad_norm": 32.87046813964844, "learning_rate": 9.753759398496241e-06, "loss": 30.7815, "step": 15517 }, { "epoch": 369.4776119402985, "grad_norm": 31.34151268005371, "learning_rate": 9.7531328320802e-06, "loss": 32.4918, "step": 15518 }, { "epoch": 369.5014925373134, "grad_norm": 33.1551513671875, "learning_rate": 9.75250626566416e-06, "loss": 31.5781, "step": 15519 }, { "epoch": 369.52537313432833, "grad_norm": 30.585973739624023, "learning_rate": 9.751879699248122e-06, "loss": 31.218, "step": 15520 }, { "epoch": 369.5492537313433, "grad_norm": NaN, "learning_rate": 9.751253132832082e-06, "loss": 45.2538, "step": 15521 }, { "epoch": 369.5731343283582, "grad_norm": 30.716432571411133, "learning_rate": 9.751253132832082e-06, "loss": 31.3152, "step": 15522 }, { "epoch": 369.5970149253731, "grad_norm": 26.349685668945312, "learning_rate": 9.75062656641604e-06, "loss": 32.5287, "step": 15523 }, { "epoch": 369.6208955223881, "grad_norm": 35.84343338012695, "learning_rate": 9.75e-06, "loss": 33.6801, "step": 15524 }, { "epoch": 369.644776119403, "grad_norm": 29.796785354614258, "learning_rate": 9.749373433583961e-06, "loss": 31.9983, "step": 15525 }, { "epoch": 369.6686567164179, "grad_norm": 36.817138671875, "learning_rate": 9.748746867167921e-06, "loss": 31.5356, "step": 15526 }, { "epoch": 369.6925373134328, "grad_norm": 33.89388656616211, "learning_rate": 9.748120300751881e-06, "loss": 30.51, "step": 15527 }, { "epoch": 369.7164179104478, "grad_norm": 29.24090576171875, "learning_rate": 9.74749373433584e-06, "loss": 31.9835, "step": 15528 }, { "epoch": 369.7402985074627, "grad_norm": 30.145151138305664, "learning_rate": 9.7468671679198e-06, "loss": 31.6624, "step": 15529 }, { "epoch": 369.7641791044776, "grad_norm": 30.085168838500977, "learning_rate": 9.74624060150376e-06, "loss": 31.5058, "step": 15530 }, { "epoch": 369.78805970149256, "grad_norm": 25.119464874267578, "learning_rate": 9.74561403508772e-06, "loss": 31.6317, "step": 15531 }, { "epoch": 369.81194029850747, "grad_norm": 38.49943923950195, "learning_rate": 9.74498746867168e-06, "loss": 31.6026, "step": 15532 }, { "epoch": 369.8358208955224, "grad_norm": 30.145709991455078, "learning_rate": 9.74436090225564e-06, "loss": 30.7972, "step": 15533 }, { "epoch": 369.85970149253734, "grad_norm": 32.580440521240234, "learning_rate": 9.7437343358396e-06, "loss": 32.9526, "step": 15534 }, { "epoch": 369.88358208955225, "grad_norm": 32.929630279541016, "learning_rate": 9.74310776942356e-06, "loss": 31.8029, "step": 15535 }, { "epoch": 369.90746268656716, "grad_norm": 32.2010612487793, "learning_rate": 9.74248120300752e-06, "loss": 31.5956, "step": 15536 }, { "epoch": 369.93134328358207, "grad_norm": 26.953540802001953, "learning_rate": 9.741854636591479e-06, "loss": 32.5965, "step": 15537 }, { "epoch": 369.95522388059703, "grad_norm": 35.538597106933594, "learning_rate": 9.741228070175439e-06, "loss": 31.7904, "step": 15538 }, { "epoch": 369.97910447761194, "grad_norm": 30.791345596313477, "learning_rate": 9.740601503759399e-06, "loss": 31.886, "step": 15539 }, { "epoch": 370.0, "grad_norm": 26.10861587524414, "learning_rate": 9.73997493734336e-06, "loss": 26.5258, "step": 15540 }, { "epoch": 370.0238805970149, "grad_norm": 25.21748924255371, "learning_rate": 9.73934837092732e-06, "loss": 32.4107, "step": 15541 }, { "epoch": 370.0477611940299, "grad_norm": 33.55727767944336, "learning_rate": 9.738721804511278e-06, "loss": 31.2604, "step": 15542 }, { "epoch": 370.0716417910448, "grad_norm": 27.492733001708984, "learning_rate": 9.73809523809524e-06, "loss": 31.3192, "step": 15543 }, { "epoch": 370.0955223880597, "grad_norm": 35.26737976074219, "learning_rate": 9.7374686716792e-06, "loss": 32.7499, "step": 15544 }, { "epoch": 370.1194029850746, "grad_norm": 29.664501190185547, "learning_rate": 9.736842105263159e-06, "loss": 30.9097, "step": 15545 }, { "epoch": 370.14328358208957, "grad_norm": 29.96634292602539, "learning_rate": 9.736215538847119e-06, "loss": 32.626, "step": 15546 }, { "epoch": 370.1671641791045, "grad_norm": 26.555383682250977, "learning_rate": 9.735588972431079e-06, "loss": 31.5584, "step": 15547 }, { "epoch": 370.1910447761194, "grad_norm": 31.332529067993164, "learning_rate": 9.734962406015038e-06, "loss": 32.3188, "step": 15548 }, { "epoch": 370.21492537313435, "grad_norm": 24.305049896240234, "learning_rate": 9.734335839598998e-06, "loss": 31.6605, "step": 15549 }, { "epoch": 370.23880597014926, "grad_norm": 30.789113998413086, "learning_rate": 9.733709273182958e-06, "loss": 30.8001, "step": 15550 }, { "epoch": 370.26268656716417, "grad_norm": 25.05321502685547, "learning_rate": 9.733082706766918e-06, "loss": 32.8161, "step": 15551 }, { "epoch": 370.28656716417913, "grad_norm": 33.553836822509766, "learning_rate": 9.732456140350878e-06, "loss": 32.5293, "step": 15552 }, { "epoch": 370.31044776119404, "grad_norm": 30.329069137573242, "learning_rate": 9.731829573934837e-06, "loss": 32.8475, "step": 15553 }, { "epoch": 370.33432835820895, "grad_norm": 32.331119537353516, "learning_rate": 9.731203007518797e-06, "loss": 31.3911, "step": 15554 }, { "epoch": 370.35820895522386, "grad_norm": 26.98367691040039, "learning_rate": 9.730576441102759e-06, "loss": 31.6221, "step": 15555 }, { "epoch": 370.3820895522388, "grad_norm": 30.56206703186035, "learning_rate": 9.729949874686717e-06, "loss": 31.9551, "step": 15556 }, { "epoch": 370.40597014925373, "grad_norm": 27.096773147583008, "learning_rate": 9.729323308270677e-06, "loss": 31.6267, "step": 15557 }, { "epoch": 370.42985074626864, "grad_norm": 30.748807907104492, "learning_rate": 9.728696741854638e-06, "loss": 31.1861, "step": 15558 }, { "epoch": 370.4537313432836, "grad_norm": 27.02344512939453, "learning_rate": 9.728070175438598e-06, "loss": 31.6813, "step": 15559 }, { "epoch": 370.4776119402985, "grad_norm": 33.22273254394531, "learning_rate": 9.727443609022558e-06, "loss": 32.8792, "step": 15560 }, { "epoch": 370.5014925373134, "grad_norm": 27.815967559814453, "learning_rate": 9.726817042606517e-06, "loss": 31.3247, "step": 15561 }, { "epoch": 370.52537313432833, "grad_norm": 28.645957946777344, "learning_rate": 9.726190476190477e-06, "loss": 31.9764, "step": 15562 }, { "epoch": 370.5492537313433, "grad_norm": 22.48628807067871, "learning_rate": 9.725563909774437e-06, "loss": 30.3754, "step": 15563 }, { "epoch": 370.5731343283582, "grad_norm": 26.176103591918945, "learning_rate": 9.724937343358397e-06, "loss": 32.7292, "step": 15564 }, { "epoch": 370.5970149253731, "grad_norm": 22.917192459106445, "learning_rate": 9.724310776942357e-06, "loss": 31.6366, "step": 15565 }, { "epoch": 370.6208955223881, "grad_norm": 30.325681686401367, "learning_rate": 9.723684210526316e-06, "loss": 31.0988, "step": 15566 }, { "epoch": 370.644776119403, "grad_norm": 25.483360290527344, "learning_rate": 9.723057644110276e-06, "loss": 32.2486, "step": 15567 }, { "epoch": 370.6686567164179, "grad_norm": 29.788089752197266, "learning_rate": 9.722431077694236e-06, "loss": 32.6861, "step": 15568 }, { "epoch": 370.6925373134328, "grad_norm": 27.898733139038086, "learning_rate": 9.721804511278196e-06, "loss": 32.5099, "step": 15569 }, { "epoch": 370.7164179104478, "grad_norm": 23.3122615814209, "learning_rate": 9.721177944862156e-06, "loss": 30.6325, "step": 15570 }, { "epoch": 370.7402985074627, "grad_norm": 25.64780616760254, "learning_rate": 9.720551378446115e-06, "loss": 32.0149, "step": 15571 }, { "epoch": 370.7641791044776, "grad_norm": 24.018203735351562, "learning_rate": 9.719924812030075e-06, "loss": 31.7357, "step": 15572 }, { "epoch": 370.78805970149256, "grad_norm": 21.4426212310791, "learning_rate": 9.719298245614037e-06, "loss": 32.3928, "step": 15573 }, { "epoch": 370.81194029850747, "grad_norm": 22.026609420776367, "learning_rate": 9.718671679197997e-06, "loss": 31.4606, "step": 15574 }, { "epoch": 370.8358208955224, "grad_norm": 19.70415687561035, "learning_rate": 9.718045112781955e-06, "loss": 31.9559, "step": 15575 }, { "epoch": 370.85970149253734, "grad_norm": 19.977237701416016, "learning_rate": 9.717418546365916e-06, "loss": 32.4238, "step": 15576 }, { "epoch": 370.88358208955225, "grad_norm": 20.55643653869629, "learning_rate": 9.716791979949876e-06, "loss": 31.3513, "step": 15577 }, { "epoch": 370.90746268656716, "grad_norm": 18.112289428710938, "learning_rate": 9.716165413533836e-06, "loss": 33.0936, "step": 15578 }, { "epoch": 370.93134328358207, "grad_norm": 19.38970184326172, "learning_rate": 9.715538847117796e-06, "loss": 31.9432, "step": 15579 }, { "epoch": 370.95522388059703, "grad_norm": 19.477205276489258, "learning_rate": 9.714912280701755e-06, "loss": 32.2867, "step": 15580 }, { "epoch": 370.97910447761194, "grad_norm": 18.255844116210938, "learning_rate": 9.714285714285715e-06, "loss": 32.1032, "step": 15581 }, { "epoch": 371.0, "grad_norm": 17.171831130981445, "learning_rate": 9.713659147869675e-06, "loss": 28.3181, "step": 15582 }, { "epoch": 371.0238805970149, "grad_norm": 20.054550170898438, "learning_rate": 9.713032581453635e-06, "loss": 31.2504, "step": 15583 }, { "epoch": 371.0477611940299, "grad_norm": 16.24350357055664, "learning_rate": 9.712406015037595e-06, "loss": 30.9002, "step": 15584 }, { "epoch": 371.0716417910448, "grad_norm": 22.360483169555664, "learning_rate": 9.711779448621554e-06, "loss": 31.9409, "step": 15585 }, { "epoch": 371.0955223880597, "grad_norm": 15.936921119689941, "learning_rate": 9.711152882205514e-06, "loss": 32.561, "step": 15586 }, { "epoch": 371.1194029850746, "grad_norm": 24.830108642578125, "learning_rate": 9.710526315789474e-06, "loss": 32.1191, "step": 15587 }, { "epoch": 371.14328358208957, "grad_norm": 19.574199676513672, "learning_rate": 9.709899749373435e-06, "loss": 32.28, "step": 15588 }, { "epoch": 371.1671641791045, "grad_norm": 23.13572120666504, "learning_rate": 9.709273182957394e-06, "loss": 31.4571, "step": 15589 }, { "epoch": 371.1910447761194, "grad_norm": 18.860767364501953, "learning_rate": 9.708646616541353e-06, "loss": 32.0729, "step": 15590 }, { "epoch": 371.21492537313435, "grad_norm": 23.69676399230957, "learning_rate": 9.708020050125315e-06, "loss": 33.1218, "step": 15591 }, { "epoch": 371.23880597014926, "grad_norm": 23.62067222595215, "learning_rate": 9.707393483709275e-06, "loss": 31.9943, "step": 15592 }, { "epoch": 371.26268656716417, "grad_norm": 17.43523406982422, "learning_rate": 9.706766917293234e-06, "loss": 31.6582, "step": 15593 }, { "epoch": 371.28656716417913, "grad_norm": 23.645328521728516, "learning_rate": 9.706140350877193e-06, "loss": 32.9624, "step": 15594 }, { "epoch": 371.31044776119404, "grad_norm": 21.034313201904297, "learning_rate": 9.705513784461154e-06, "loss": 32.4597, "step": 15595 }, { "epoch": 371.33432835820895, "grad_norm": 21.051218032836914, "learning_rate": 9.704887218045114e-06, "loss": 31.1774, "step": 15596 }, { "epoch": 371.35820895522386, "grad_norm": 19.982168197631836, "learning_rate": 9.704260651629074e-06, "loss": 31.3354, "step": 15597 }, { "epoch": 371.3820895522388, "grad_norm": 18.409671783447266, "learning_rate": 9.703634085213033e-06, "loss": 31.4788, "step": 15598 }, { "epoch": 371.40597014925373, "grad_norm": 22.119007110595703, "learning_rate": 9.703007518796993e-06, "loss": 33.4, "step": 15599 }, { "epoch": 371.42985074626864, "grad_norm": 18.121231079101562, "learning_rate": 9.702380952380953e-06, "loss": 32.2274, "step": 15600 }, { "epoch": 371.4537313432836, "grad_norm": 20.418014526367188, "learning_rate": 9.701754385964913e-06, "loss": 31.1862, "step": 15601 }, { "epoch": 371.4776119402985, "grad_norm": 20.588905334472656, "learning_rate": 9.701127819548873e-06, "loss": 32.4219, "step": 15602 }, { "epoch": 371.5014925373134, "grad_norm": 19.312116622924805, "learning_rate": 9.700501253132832e-06, "loss": 32.1805, "step": 15603 }, { "epoch": 371.52537313432833, "grad_norm": 30.35874366760254, "learning_rate": 9.699874686716792e-06, "loss": 31.7602, "step": 15604 }, { "epoch": 371.5492537313433, "grad_norm": 20.58683204650879, "learning_rate": 9.699248120300752e-06, "loss": 32.6349, "step": 15605 }, { "epoch": 371.5731343283582, "grad_norm": 27.020854949951172, "learning_rate": 9.698621553884714e-06, "loss": 31.0244, "step": 15606 }, { "epoch": 371.5970149253731, "grad_norm": 19.93199348449707, "learning_rate": 9.697994987468673e-06, "loss": 31.4117, "step": 15607 }, { "epoch": 371.6208955223881, "grad_norm": 26.27665901184082, "learning_rate": 9.697368421052631e-06, "loss": 32.1331, "step": 15608 }, { "epoch": 371.644776119403, "grad_norm": 23.66201400756836, "learning_rate": 9.696741854636593e-06, "loss": 32.7146, "step": 15609 }, { "epoch": 371.6686567164179, "grad_norm": 22.81130027770996, "learning_rate": 9.696115288220553e-06, "loss": 31.8624, "step": 15610 }, { "epoch": 371.6925373134328, "grad_norm": 19.37236976623535, "learning_rate": 9.695488721804513e-06, "loss": 32.7146, "step": 15611 }, { "epoch": 371.7164179104478, "grad_norm": 25.079851150512695, "learning_rate": 9.69486215538847e-06, "loss": 31.806, "step": 15612 }, { "epoch": 371.7402985074627, "grad_norm": 21.69959831237793, "learning_rate": 9.694235588972432e-06, "loss": 31.6021, "step": 15613 }, { "epoch": 371.7641791044776, "grad_norm": 20.25904083251953, "learning_rate": 9.693609022556392e-06, "loss": 30.7198, "step": 15614 }, { "epoch": 371.78805970149256, "grad_norm": 20.798961639404297, "learning_rate": 9.692982456140352e-06, "loss": 32.8119, "step": 15615 }, { "epoch": 371.81194029850747, "grad_norm": 19.60314178466797, "learning_rate": 9.692355889724312e-06, "loss": 31.6579, "step": 15616 }, { "epoch": 371.8358208955224, "grad_norm": 18.63255500793457, "learning_rate": 9.691729323308271e-06, "loss": 31.6115, "step": 15617 }, { "epoch": 371.85970149253734, "grad_norm": 22.781856536865234, "learning_rate": 9.691102756892231e-06, "loss": 31.7108, "step": 15618 }, { "epoch": 371.88358208955225, "grad_norm": 18.63038444519043, "learning_rate": 9.690476190476191e-06, "loss": 30.5746, "step": 15619 }, { "epoch": 371.90746268656716, "grad_norm": 18.407224655151367, "learning_rate": 9.68984962406015e-06, "loss": 32.8008, "step": 15620 }, { "epoch": 371.93134328358207, "grad_norm": 15.794146537780762, "learning_rate": 9.689223057644112e-06, "loss": 31.8143, "step": 15621 }, { "epoch": 371.95522388059703, "grad_norm": 15.639073371887207, "learning_rate": 9.68859649122807e-06, "loss": 31.0757, "step": 15622 }, { "epoch": 371.97910447761194, "grad_norm": 18.33678436279297, "learning_rate": 9.68796992481203e-06, "loss": 32.9441, "step": 15623 }, { "epoch": 372.0, "grad_norm": 14.055055618286133, "learning_rate": 9.687343358395992e-06, "loss": 27.1119, "step": 15624 }, { "epoch": 372.0238805970149, "grad_norm": 20.265544891357422, "learning_rate": 9.686716791979951e-06, "loss": 32.5663, "step": 15625 }, { "epoch": 372.0477611940299, "grad_norm": 16.922901153564453, "learning_rate": 9.686090225563911e-06, "loss": 32.1715, "step": 15626 }, { "epoch": 372.0716417910448, "grad_norm": 17.308961868286133, "learning_rate": 9.68546365914787e-06, "loss": 31.0583, "step": 15627 }, { "epoch": 372.0955223880597, "grad_norm": 17.07660675048828, "learning_rate": 9.68483709273183e-06, "loss": 32.5213, "step": 15628 }, { "epoch": 372.1194029850746, "grad_norm": 19.201412200927734, "learning_rate": 9.68421052631579e-06, "loss": 32.5643, "step": 15629 }, { "epoch": 372.14328358208957, "grad_norm": 17.836727142333984, "learning_rate": 9.68358395989975e-06, "loss": 31.0902, "step": 15630 }, { "epoch": 372.1671641791045, "grad_norm": 18.333969116210938, "learning_rate": 9.68295739348371e-06, "loss": 31.5131, "step": 15631 }, { "epoch": 372.1910447761194, "grad_norm": 19.234643936157227, "learning_rate": 9.68233082706767e-06, "loss": 31.4236, "step": 15632 }, { "epoch": 372.21492537313435, "grad_norm": 19.713449478149414, "learning_rate": 9.68170426065163e-06, "loss": 31.9958, "step": 15633 }, { "epoch": 372.23880597014926, "grad_norm": 16.030332565307617, "learning_rate": 9.68107769423559e-06, "loss": 31.6064, "step": 15634 }, { "epoch": 372.26268656716417, "grad_norm": 19.08253288269043, "learning_rate": 9.68045112781955e-06, "loss": 32.1442, "step": 15635 }, { "epoch": 372.28656716417913, "grad_norm": 17.089033126831055, "learning_rate": 9.67982456140351e-06, "loss": 31.3209, "step": 15636 }, { "epoch": 372.31044776119404, "grad_norm": 20.829944610595703, "learning_rate": 9.679197994987469e-06, "loss": 31.4231, "step": 15637 }, { "epoch": 372.33432835820895, "grad_norm": 20.799938201904297, "learning_rate": 9.678571428571429e-06, "loss": 31.0764, "step": 15638 }, { "epoch": 372.35820895522386, "grad_norm": 16.779449462890625, "learning_rate": 9.67794486215539e-06, "loss": 31.2131, "step": 15639 }, { "epoch": 372.3820895522388, "grad_norm": 20.484376907348633, "learning_rate": 9.67731829573935e-06, "loss": 32.8235, "step": 15640 }, { "epoch": 372.40597014925373, "grad_norm": 17.515758514404297, "learning_rate": 9.676691729323308e-06, "loss": 31.6492, "step": 15641 }, { "epoch": 372.42985074626864, "grad_norm": 22.78826332092285, "learning_rate": 9.676065162907268e-06, "loss": 31.8365, "step": 15642 }, { "epoch": 372.4537313432836, "grad_norm": 19.350875854492188, "learning_rate": 9.67543859649123e-06, "loss": 31.5171, "step": 15643 }, { "epoch": 372.4776119402985, "grad_norm": 23.918296813964844, "learning_rate": 9.67481203007519e-06, "loss": 31.1798, "step": 15644 }, { "epoch": 372.5014925373134, "grad_norm": 18.345638275146484, "learning_rate": 9.674185463659147e-06, "loss": 33.647, "step": 15645 }, { "epoch": 372.52537313432833, "grad_norm": 17.2088565826416, "learning_rate": 9.673558897243109e-06, "loss": 30.8432, "step": 15646 }, { "epoch": 372.5492537313433, "grad_norm": 17.569700241088867, "learning_rate": 9.672932330827069e-06, "loss": 30.4097, "step": 15647 }, { "epoch": 372.5731343283582, "grad_norm": 19.014482498168945, "learning_rate": 9.672305764411029e-06, "loss": 31.684, "step": 15648 }, { "epoch": 372.5970149253731, "grad_norm": 18.570009231567383, "learning_rate": 9.671679197994988e-06, "loss": 32.3407, "step": 15649 }, { "epoch": 372.6208955223881, "grad_norm": 18.89525032043457, "learning_rate": 9.671052631578948e-06, "loss": 31.7127, "step": 15650 }, { "epoch": 372.644776119403, "grad_norm": 18.356260299682617, "learning_rate": 9.670426065162908e-06, "loss": 31.7904, "step": 15651 }, { "epoch": 372.6686567164179, "grad_norm": 17.954633712768555, "learning_rate": 9.669799498746868e-06, "loss": 31.2474, "step": 15652 }, { "epoch": 372.6925373134328, "grad_norm": 20.029273986816406, "learning_rate": 9.669172932330828e-06, "loss": 32.123, "step": 15653 }, { "epoch": 372.7164179104478, "grad_norm": 24.08721351623535, "learning_rate": 9.668546365914789e-06, "loss": 31.1802, "step": 15654 }, { "epoch": 372.7402985074627, "grad_norm": 19.90455436706543, "learning_rate": 9.667919799498747e-06, "loss": 32.2033, "step": 15655 }, { "epoch": 372.7641791044776, "grad_norm": 16.073408126831055, "learning_rate": 9.667293233082707e-06, "loss": 32.0278, "step": 15656 }, { "epoch": 372.78805970149256, "grad_norm": 19.517078399658203, "learning_rate": 9.666666666666667e-06, "loss": 31.9246, "step": 15657 }, { "epoch": 372.81194029850747, "grad_norm": 19.3916072845459, "learning_rate": 9.666040100250628e-06, "loss": 32.5814, "step": 15658 }, { "epoch": 372.8358208955224, "grad_norm": 19.744491577148438, "learning_rate": 9.665413533834588e-06, "loss": 32.2292, "step": 15659 }, { "epoch": 372.85970149253734, "grad_norm": 17.65901756286621, "learning_rate": 9.664786967418546e-06, "loss": 33.2274, "step": 15660 }, { "epoch": 372.88358208955225, "grad_norm": 16.525672912597656, "learning_rate": 9.664160401002508e-06, "loss": 32.1232, "step": 15661 }, { "epoch": 372.90746268656716, "grad_norm": 19.38490867614746, "learning_rate": 9.663533834586467e-06, "loss": 31.7979, "step": 15662 }, { "epoch": 372.93134328358207, "grad_norm": 19.699527740478516, "learning_rate": 9.662907268170427e-06, "loss": 31.924, "step": 15663 }, { "epoch": 372.95522388059703, "grad_norm": 16.93585777282715, "learning_rate": 9.662280701754387e-06, "loss": 33.2152, "step": 15664 }, { "epoch": 372.97910447761194, "grad_norm": 19.587158203125, "learning_rate": 9.661654135338347e-06, "loss": 31.9852, "step": 15665 }, { "epoch": 373.0, "grad_norm": 15.894246101379395, "learning_rate": 9.661027568922307e-06, "loss": 27.6835, "step": 15666 }, { "epoch": 373.0238805970149, "grad_norm": 21.86368751525879, "learning_rate": 9.660401002506266e-06, "loss": 32.8987, "step": 15667 }, { "epoch": 373.0477611940299, "grad_norm": 19.782215118408203, "learning_rate": 9.659774436090226e-06, "loss": 32.9165, "step": 15668 }, { "epoch": 373.0716417910448, "grad_norm": 21.27768325805664, "learning_rate": 9.659147869674186e-06, "loss": 32.9157, "step": 15669 }, { "epoch": 373.0955223880597, "grad_norm": 17.359445571899414, "learning_rate": 9.658521303258146e-06, "loss": 32.5595, "step": 15670 }, { "epoch": 373.1194029850746, "grad_norm": 15.899066925048828, "learning_rate": 9.657894736842106e-06, "loss": 31.5267, "step": 15671 }, { "epoch": 373.14328358208957, "grad_norm": 18.509159088134766, "learning_rate": 9.657268170426065e-06, "loss": 32.7882, "step": 15672 }, { "epoch": 373.1671641791045, "grad_norm": 17.6567440032959, "learning_rate": 9.656641604010027e-06, "loss": 29.9962, "step": 15673 }, { "epoch": 373.1910447761194, "grad_norm": 22.238590240478516, "learning_rate": 9.656015037593985e-06, "loss": 30.9683, "step": 15674 }, { "epoch": 373.21492537313435, "grad_norm": 18.666589736938477, "learning_rate": 9.655388471177945e-06, "loss": 31.52, "step": 15675 }, { "epoch": 373.23880597014926, "grad_norm": 15.589720726013184, "learning_rate": 9.654761904761906e-06, "loss": 32.5969, "step": 15676 }, { "epoch": 373.26268656716417, "grad_norm": 17.04974365234375, "learning_rate": 9.654135338345866e-06, "loss": 32.6573, "step": 15677 }, { "epoch": 373.28656716417913, "grad_norm": 17.244007110595703, "learning_rate": 9.653508771929824e-06, "loss": 31.636, "step": 15678 }, { "epoch": 373.31044776119404, "grad_norm": 20.437562942504883, "learning_rate": 9.652882205513786e-06, "loss": 31.147, "step": 15679 }, { "epoch": 373.33432835820895, "grad_norm": 17.9154109954834, "learning_rate": 9.652255639097746e-06, "loss": 32.4387, "step": 15680 }, { "epoch": 373.35820895522386, "grad_norm": 13.466055870056152, "learning_rate": 9.651629072681705e-06, "loss": 31.4355, "step": 15681 }, { "epoch": 373.3820895522388, "grad_norm": 19.356807708740234, "learning_rate": 9.651002506265665e-06, "loss": 33.0777, "step": 15682 }, { "epoch": 373.40597014925373, "grad_norm": 17.105945587158203, "learning_rate": 9.650375939849625e-06, "loss": 32.6633, "step": 15683 }, { "epoch": 373.42985074626864, "grad_norm": 23.539051055908203, "learning_rate": 9.649749373433585e-06, "loss": 31.5375, "step": 15684 }, { "epoch": 373.4537313432836, "grad_norm": 17.529346466064453, "learning_rate": 9.649122807017545e-06, "loss": 30.6411, "step": 15685 }, { "epoch": 373.4776119402985, "grad_norm": 22.33460235595703, "learning_rate": 9.648496240601504e-06, "loss": 32.7147, "step": 15686 }, { "epoch": 373.5014925373134, "grad_norm": 18.314443588256836, "learning_rate": 9.647869674185464e-06, "loss": 32.3927, "step": 15687 }, { "epoch": 373.52537313432833, "grad_norm": 23.103107452392578, "learning_rate": 9.647243107769424e-06, "loss": 31.2902, "step": 15688 }, { "epoch": 373.5492537313433, "grad_norm": 18.87906837463379, "learning_rate": 9.646616541353384e-06, "loss": 31.5671, "step": 15689 }, { "epoch": 373.5731343283582, "grad_norm": 20.747264862060547, "learning_rate": 9.645989974937343e-06, "loss": 31.3365, "step": 15690 }, { "epoch": 373.5970149253731, "grad_norm": 18.24740982055664, "learning_rate": 9.645363408521305e-06, "loss": 32.4551, "step": 15691 }, { "epoch": 373.6208955223881, "grad_norm": 25.681413650512695, "learning_rate": 9.644736842105263e-06, "loss": 32.2727, "step": 15692 }, { "epoch": 373.644776119403, "grad_norm": 19.273601531982422, "learning_rate": 9.644110275689223e-06, "loss": 30.9468, "step": 15693 }, { "epoch": 373.6686567164179, "grad_norm": 20.211137771606445, "learning_rate": 9.643483709273184e-06, "loss": 31.6142, "step": 15694 }, { "epoch": 373.6925373134328, "grad_norm": 21.14042091369629, "learning_rate": 9.642857142857144e-06, "loss": 32.7588, "step": 15695 }, { "epoch": 373.7164179104478, "grad_norm": 23.442846298217773, "learning_rate": 9.642230576441104e-06, "loss": 31.6502, "step": 15696 }, { "epoch": 373.7402985074627, "grad_norm": 18.872739791870117, "learning_rate": 9.641604010025064e-06, "loss": 31.2705, "step": 15697 }, { "epoch": 373.7641791044776, "grad_norm": 23.400981903076172, "learning_rate": 9.640977443609024e-06, "loss": 30.9387, "step": 15698 }, { "epoch": 373.78805970149256, "grad_norm": 25.57870101928711, "learning_rate": 9.640350877192983e-06, "loss": 32.485, "step": 15699 }, { "epoch": 373.81194029850747, "grad_norm": 17.87088394165039, "learning_rate": 9.639724310776943e-06, "loss": 32.1647, "step": 15700 }, { "epoch": 373.8358208955224, "grad_norm": 21.626007080078125, "learning_rate": 9.639097744360903e-06, "loss": 31.8758, "step": 15701 }, { "epoch": 373.85970149253734, "grad_norm": 21.196897506713867, "learning_rate": 9.638471177944863e-06, "loss": 32.1598, "step": 15702 }, { "epoch": 373.88358208955225, "grad_norm": 20.520910263061523, "learning_rate": 9.637844611528823e-06, "loss": 32.0218, "step": 15703 }, { "epoch": 373.90746268656716, "grad_norm": 17.130661010742188, "learning_rate": 9.637218045112782e-06, "loss": 31.28, "step": 15704 }, { "epoch": 373.93134328358207, "grad_norm": 22.795211791992188, "learning_rate": 9.636591478696742e-06, "loss": 30.8157, "step": 15705 }, { "epoch": 373.95522388059703, "grad_norm": 17.75351333618164, "learning_rate": 9.635964912280704e-06, "loss": 31.2262, "step": 15706 }, { "epoch": 373.97910447761194, "grad_norm": 19.189939498901367, "learning_rate": 9.635338345864662e-06, "loss": 30.2862, "step": 15707 }, { "epoch": 374.0, "grad_norm": 18.197975158691406, "learning_rate": 9.634711779448622e-06, "loss": 27.8654, "step": 15708 }, { "epoch": 374.0238805970149, "grad_norm": 20.827064514160156, "learning_rate": 9.634085213032583e-06, "loss": 32.659, "step": 15709 }, { "epoch": 374.0477611940299, "grad_norm": 16.823833465576172, "learning_rate": 9.633458646616543e-06, "loss": 31.2229, "step": 15710 }, { "epoch": 374.0716417910448, "grad_norm": 15.454418182373047, "learning_rate": 9.632832080200501e-06, "loss": 32.8586, "step": 15711 }, { "epoch": 374.0955223880597, "grad_norm": 15.673126220703125, "learning_rate": 9.632205513784462e-06, "loss": 31.9886, "step": 15712 }, { "epoch": 374.1194029850746, "grad_norm": 20.704084396362305, "learning_rate": 9.631578947368422e-06, "loss": 30.682, "step": 15713 }, { "epoch": 374.14328358208957, "grad_norm": 19.827491760253906, "learning_rate": 9.630952380952382e-06, "loss": 31.6092, "step": 15714 }, { "epoch": 374.1671641791045, "grad_norm": 20.26668930053711, "learning_rate": 9.630325814536342e-06, "loss": 30.9277, "step": 15715 }, { "epoch": 374.1910447761194, "grad_norm": 16.02630615234375, "learning_rate": 9.629699248120302e-06, "loss": 32.166, "step": 15716 }, { "epoch": 374.21492537313435, "grad_norm": 17.158720016479492, "learning_rate": 9.629072681704261e-06, "loss": 30.8832, "step": 15717 }, { "epoch": 374.23880597014926, "grad_norm": 18.231409072875977, "learning_rate": 9.628446115288221e-06, "loss": 31.5202, "step": 15718 }, { "epoch": 374.26268656716417, "grad_norm": 16.47313117980957, "learning_rate": 9.627819548872181e-06, "loss": 31.6077, "step": 15719 }, { "epoch": 374.28656716417913, "grad_norm": 19.122621536254883, "learning_rate": 9.627192982456141e-06, "loss": 32.1889, "step": 15720 }, { "epoch": 374.31044776119404, "grad_norm": 16.968183517456055, "learning_rate": 9.6265664160401e-06, "loss": 30.3898, "step": 15721 }, { "epoch": 374.33432835820895, "grad_norm": 17.954517364501953, "learning_rate": 9.62593984962406e-06, "loss": 30.8276, "step": 15722 }, { "epoch": 374.35820895522386, "grad_norm": 16.388755798339844, "learning_rate": 9.62531328320802e-06, "loss": 32.2938, "step": 15723 }, { "epoch": 374.3820895522388, "grad_norm": 16.530471801757812, "learning_rate": 9.624686716791982e-06, "loss": 31.9841, "step": 15724 }, { "epoch": 374.40597014925373, "grad_norm": 19.26321792602539, "learning_rate": 9.62406015037594e-06, "loss": 32.2286, "step": 15725 }, { "epoch": 374.42985074626864, "grad_norm": 15.312089920043945, "learning_rate": 9.6234335839599e-06, "loss": 31.0212, "step": 15726 }, { "epoch": 374.4537313432836, "grad_norm": 17.374874114990234, "learning_rate": 9.622807017543861e-06, "loss": 31.0968, "step": 15727 }, { "epoch": 374.4776119402985, "grad_norm": 17.588796615600586, "learning_rate": 9.622180451127821e-06, "loss": 32.8473, "step": 15728 }, { "epoch": 374.5014925373134, "grad_norm": 22.50474739074707, "learning_rate": 9.62155388471178e-06, "loss": 32.6445, "step": 15729 }, { "epoch": 374.52537313432833, "grad_norm": 20.12819480895996, "learning_rate": 9.620927318295739e-06, "loss": 32.2587, "step": 15730 }, { "epoch": 374.5492537313433, "grad_norm": 18.700468063354492, "learning_rate": 9.6203007518797e-06, "loss": 30.9637, "step": 15731 }, { "epoch": 374.5731343283582, "grad_norm": 16.99765396118164, "learning_rate": 9.61967418546366e-06, "loss": 33.1013, "step": 15732 }, { "epoch": 374.5970149253731, "grad_norm": 24.254283905029297, "learning_rate": 9.61904761904762e-06, "loss": 31.9349, "step": 15733 }, { "epoch": 374.6208955223881, "grad_norm": 22.096355438232422, "learning_rate": 9.61842105263158e-06, "loss": 31.8806, "step": 15734 }, { "epoch": 374.644776119403, "grad_norm": 15.354461669921875, "learning_rate": 9.61779448621554e-06, "loss": 32.1886, "step": 15735 }, { "epoch": 374.6686567164179, "grad_norm": 18.554244995117188, "learning_rate": 9.6171679197995e-06, "loss": 31.6642, "step": 15736 }, { "epoch": 374.6925373134328, "grad_norm": 25.11833381652832, "learning_rate": 9.61654135338346e-06, "loss": 31.1263, "step": 15737 }, { "epoch": 374.7164179104478, "grad_norm": 19.309633255004883, "learning_rate": 9.615914786967419e-06, "loss": 32.7638, "step": 15738 }, { "epoch": 374.7402985074627, "grad_norm": 17.912731170654297, "learning_rate": 9.61528822055138e-06, "loss": 32.5479, "step": 15739 }, { "epoch": 374.7641791044776, "grad_norm": 22.228496551513672, "learning_rate": 9.614661654135339e-06, "loss": 32.5027, "step": 15740 }, { "epoch": 374.78805970149256, "grad_norm": 14.644289016723633, "learning_rate": 9.614035087719298e-06, "loss": 31.0092, "step": 15741 }, { "epoch": 374.81194029850747, "grad_norm": 19.665285110473633, "learning_rate": 9.61340852130326e-06, "loss": 31.5028, "step": 15742 }, { "epoch": 374.8358208955224, "grad_norm": 17.751829147338867, "learning_rate": 9.61278195488722e-06, "loss": 32.241, "step": 15743 }, { "epoch": 374.85970149253734, "grad_norm": 21.286407470703125, "learning_rate": 9.612155388471178e-06, "loss": 31.8838, "step": 15744 }, { "epoch": 374.88358208955225, "grad_norm": 21.53445816040039, "learning_rate": 9.611528822055138e-06, "loss": 31.1677, "step": 15745 }, { "epoch": 374.90746268656716, "grad_norm": 22.49662971496582, "learning_rate": 9.610902255639099e-06, "loss": 33.3431, "step": 15746 }, { "epoch": 374.93134328358207, "grad_norm": 16.694021224975586, "learning_rate": 9.610275689223059e-06, "loss": 32.0352, "step": 15747 }, { "epoch": 374.95522388059703, "grad_norm": 31.050386428833008, "learning_rate": 9.609649122807019e-06, "loss": 30.8092, "step": 15748 }, { "epoch": 374.97910447761194, "grad_norm": 21.933225631713867, "learning_rate": 9.609022556390978e-06, "loss": 31.636, "step": 15749 }, { "epoch": 375.0, "grad_norm": 29.1340274810791, "learning_rate": 9.608395989974938e-06, "loss": 27.5906, "step": 15750 }, { "epoch": 375.0238805970149, "grad_norm": 25.01865577697754, "learning_rate": 9.607769423558898e-06, "loss": 32.5635, "step": 15751 }, { "epoch": 375.0477611940299, "grad_norm": 25.84816551208496, "learning_rate": 9.607142857142858e-06, "loss": 31.8677, "step": 15752 }, { "epoch": 375.0716417910448, "grad_norm": 24.823163986206055, "learning_rate": 9.606516290726818e-06, "loss": 31.0163, "step": 15753 }, { "epoch": 375.0955223880597, "grad_norm": 18.789514541625977, "learning_rate": 9.605889724310777e-06, "loss": 31.1502, "step": 15754 }, { "epoch": 375.1194029850746, "grad_norm": 32.61874771118164, "learning_rate": 9.605263157894737e-06, "loss": 32.6863, "step": 15755 }, { "epoch": 375.14328358208957, "grad_norm": 21.44687271118164, "learning_rate": 9.604636591478697e-06, "loss": 32.0578, "step": 15756 }, { "epoch": 375.1671641791045, "grad_norm": 32.22695541381836, "learning_rate": 9.604010025062659e-06, "loss": 33.2771, "step": 15757 }, { "epoch": 375.1910447761194, "grad_norm": 23.191083908081055, "learning_rate": 9.603383458646617e-06, "loss": 32.1562, "step": 15758 }, { "epoch": 375.21492537313435, "grad_norm": 30.16989517211914, "learning_rate": 9.602756892230576e-06, "loss": 31.4468, "step": 15759 }, { "epoch": 375.23880597014926, "grad_norm": 25.701231002807617, "learning_rate": 9.602130325814536e-06, "loss": 30.8878, "step": 15760 }, { "epoch": 375.26268656716417, "grad_norm": 20.336566925048828, "learning_rate": 9.601503759398498e-06, "loss": 30.8714, "step": 15761 }, { "epoch": 375.28656716417913, "grad_norm": 34.1949577331543, "learning_rate": 9.600877192982458e-06, "loss": 30.8927, "step": 15762 }, { "epoch": 375.31044776119404, "grad_norm": 24.111207962036133, "learning_rate": 9.600250626566416e-06, "loss": 31.5991, "step": 15763 }, { "epoch": 375.33432835820895, "grad_norm": 37.756351470947266, "learning_rate": 9.599624060150377e-06, "loss": 31.1229, "step": 15764 }, { "epoch": 375.35820895522386, "grad_norm": 24.177133560180664, "learning_rate": 9.598997493734337e-06, "loss": 31.7933, "step": 15765 }, { "epoch": 375.3820895522388, "grad_norm": 36.99074935913086, "learning_rate": 9.598370927318297e-06, "loss": 30.9657, "step": 15766 }, { "epoch": 375.40597014925373, "grad_norm": 27.523313522338867, "learning_rate": 9.597744360902257e-06, "loss": 32.164, "step": 15767 }, { "epoch": 375.42985074626864, "grad_norm": 46.68133544921875, "learning_rate": 9.597117794486216e-06, "loss": 30.1314, "step": 15768 }, { "epoch": 375.4537313432836, "grad_norm": 39.167694091796875, "learning_rate": 9.596491228070176e-06, "loss": 31.8979, "step": 15769 }, { "epoch": 375.4776119402985, "grad_norm": 36.873687744140625, "learning_rate": 9.595864661654136e-06, "loss": 31.4334, "step": 15770 }, { "epoch": 375.5014925373134, "grad_norm": 34.393184661865234, "learning_rate": 9.595238095238096e-06, "loss": 31.9177, "step": 15771 }, { "epoch": 375.52537313432833, "grad_norm": 32.181156158447266, "learning_rate": 9.594611528822056e-06, "loss": 32.4528, "step": 15772 }, { "epoch": 375.5492537313433, "grad_norm": 25.708959579467773, "learning_rate": 9.593984962406015e-06, "loss": 32.719, "step": 15773 }, { "epoch": 375.5731343283582, "grad_norm": 37.73577880859375, "learning_rate": 9.593358395989975e-06, "loss": 32.6842, "step": 15774 }, { "epoch": 375.5970149253731, "grad_norm": 30.247201919555664, "learning_rate": 9.592731829573937e-06, "loss": 31.812, "step": 15775 }, { "epoch": 375.6208955223881, "grad_norm": 40.35224533081055, "learning_rate": 9.592105263157896e-06, "loss": 31.7453, "step": 15776 }, { "epoch": 375.644776119403, "grad_norm": 35.667171478271484, "learning_rate": 9.591478696741855e-06, "loss": 32.4635, "step": 15777 }, { "epoch": 375.6686567164179, "grad_norm": 32.05931091308594, "learning_rate": 9.590852130325814e-06, "loss": 30.7586, "step": 15778 }, { "epoch": 375.6925373134328, "grad_norm": 29.923818588256836, "learning_rate": 9.590225563909776e-06, "loss": 31.6638, "step": 15779 }, { "epoch": 375.7164179104478, "grad_norm": 35.31935501098633, "learning_rate": 9.589598997493736e-06, "loss": 32.3195, "step": 15780 }, { "epoch": 375.7402985074627, "grad_norm": 28.098405838012695, "learning_rate": 9.588972431077695e-06, "loss": 32.5611, "step": 15781 }, { "epoch": 375.7641791044776, "grad_norm": 35.44413375854492, "learning_rate": 9.588345864661655e-06, "loss": 32.3807, "step": 15782 }, { "epoch": 375.78805970149256, "grad_norm": 35.95784378051758, "learning_rate": 9.587719298245615e-06, "loss": 31.6648, "step": 15783 }, { "epoch": 375.81194029850747, "grad_norm": 33.91968536376953, "learning_rate": 9.587092731829575e-06, "loss": 32.5861, "step": 15784 }, { "epoch": 375.8358208955224, "grad_norm": 34.18654251098633, "learning_rate": 9.586466165413535e-06, "loss": 31.0572, "step": 15785 }, { "epoch": 375.85970149253734, "grad_norm": 29.548994064331055, "learning_rate": 9.585839598997494e-06, "loss": 31.0244, "step": 15786 }, { "epoch": 375.88358208955225, "grad_norm": 26.251121520996094, "learning_rate": 9.585213032581454e-06, "loss": 31.1787, "step": 15787 }, { "epoch": 375.90746268656716, "grad_norm": 32.742435455322266, "learning_rate": 9.584586466165414e-06, "loss": 33.0015, "step": 15788 }, { "epoch": 375.93134328358207, "grad_norm": 30.169097900390625, "learning_rate": 9.583959899749374e-06, "loss": 32.0246, "step": 15789 }, { "epoch": 375.95522388059703, "grad_norm": 37.93808364868164, "learning_rate": 9.583333333333335e-06, "loss": 30.7534, "step": 15790 }, { "epoch": 375.97910447761194, "grad_norm": 33.832611083984375, "learning_rate": 9.582706766917293e-06, "loss": 31.6429, "step": 15791 }, { "epoch": 376.0, "grad_norm": 26.81100845336914, "learning_rate": 9.582080200501253e-06, "loss": 27.1944, "step": 15792 }, { "epoch": 376.0238805970149, "grad_norm": 31.282583236694336, "learning_rate": 9.581453634085213e-06, "loss": 31.937, "step": 15793 }, { "epoch": 376.0477611940299, "grad_norm": 31.36951446533203, "learning_rate": 9.580827067669175e-06, "loss": 31.5685, "step": 15794 }, { "epoch": 376.0716417910448, "grad_norm": 29.539073944091797, "learning_rate": 9.580200501253134e-06, "loss": 31.4499, "step": 15795 }, { "epoch": 376.0955223880597, "grad_norm": 35.193016052246094, "learning_rate": 9.579573934837092e-06, "loss": 32.0183, "step": 15796 }, { "epoch": 376.1194029850746, "grad_norm": 29.613739013671875, "learning_rate": 9.578947368421054e-06, "loss": 32.0695, "step": 15797 }, { "epoch": 376.14328358208957, "grad_norm": 34.028934478759766, "learning_rate": 9.578320802005014e-06, "loss": 32.6362, "step": 15798 }, { "epoch": 376.1671641791045, "grad_norm": 26.947418212890625, "learning_rate": 9.577694235588974e-06, "loss": 31.9747, "step": 15799 }, { "epoch": 376.1910447761194, "grad_norm": 34.34019088745117, "learning_rate": 9.577067669172933e-06, "loss": 31.2205, "step": 15800 }, { "epoch": 376.21492537313435, "grad_norm": 32.156585693359375, "learning_rate": 9.576441102756893e-06, "loss": 31.2934, "step": 15801 }, { "epoch": 376.23880597014926, "grad_norm": 32.523345947265625, "learning_rate": 9.575814536340853e-06, "loss": 30.6958, "step": 15802 }, { "epoch": 376.26268656716417, "grad_norm": 28.242023468017578, "learning_rate": 9.575187969924813e-06, "loss": 31.0181, "step": 15803 }, { "epoch": 376.28656716417913, "grad_norm": 34.437557220458984, "learning_rate": 9.574561403508773e-06, "loss": 31.7272, "step": 15804 }, { "epoch": 376.31044776119404, "grad_norm": 29.275766372680664, "learning_rate": 9.573934837092732e-06, "loss": 32.7677, "step": 15805 }, { "epoch": 376.33432835820895, "grad_norm": 32.32860565185547, "learning_rate": 9.573308270676692e-06, "loss": 31.3942, "step": 15806 }, { "epoch": 376.35820895522386, "grad_norm": 32.09244918823242, "learning_rate": 9.572681704260652e-06, "loss": 31.0439, "step": 15807 }, { "epoch": 376.3820895522388, "grad_norm": 34.416072845458984, "learning_rate": 9.572055137844612e-06, "loss": 31.642, "step": 15808 }, { "epoch": 376.40597014925373, "grad_norm": 28.986270904541016, "learning_rate": 9.571428571428573e-06, "loss": 31.3785, "step": 15809 }, { "epoch": 376.42985074626864, "grad_norm": 36.026180267333984, "learning_rate": 9.570802005012531e-06, "loss": 32.0038, "step": 15810 }, { "epoch": 376.4537313432836, "grad_norm": 30.704662322998047, "learning_rate": 9.570175438596491e-06, "loss": 32.4572, "step": 15811 }, { "epoch": 376.4776119402985, "grad_norm": 35.821624755859375, "learning_rate": 9.569548872180453e-06, "loss": 31.7397, "step": 15812 }, { "epoch": 376.5014925373134, "grad_norm": 30.665729522705078, "learning_rate": 9.568922305764412e-06, "loss": 32.1376, "step": 15813 }, { "epoch": 376.52537313432833, "grad_norm": 30.9582576751709, "learning_rate": 9.568295739348372e-06, "loss": 32.7472, "step": 15814 }, { "epoch": 376.5492537313433, "grad_norm": 27.143753051757812, "learning_rate": 9.567669172932332e-06, "loss": 31.5181, "step": 15815 }, { "epoch": 376.5731343283582, "grad_norm": 34.9139289855957, "learning_rate": 9.567042606516292e-06, "loss": 31.7119, "step": 15816 }, { "epoch": 376.5970149253731, "grad_norm": 30.557527542114258, "learning_rate": 9.566416040100252e-06, "loss": 32.5282, "step": 15817 }, { "epoch": 376.6208955223881, "grad_norm": 32.080833435058594, "learning_rate": 9.565789473684211e-06, "loss": 32.4353, "step": 15818 }, { "epoch": 376.644776119403, "grad_norm": 28.52341079711914, "learning_rate": 9.565162907268171e-06, "loss": 31.566, "step": 15819 }, { "epoch": 376.6686567164179, "grad_norm": 34.749046325683594, "learning_rate": 9.564536340852131e-06, "loss": 31.0211, "step": 15820 }, { "epoch": 376.6925373134328, "grad_norm": 28.574386596679688, "learning_rate": 9.56390977443609e-06, "loss": 31.212, "step": 15821 }, { "epoch": 376.7164179104478, "grad_norm": 31.630035400390625, "learning_rate": 9.56328320802005e-06, "loss": 31.7167, "step": 15822 }, { "epoch": 376.7402985074627, "grad_norm": 25.646991729736328, "learning_rate": 9.56265664160401e-06, "loss": 29.6201, "step": 15823 }, { "epoch": 376.7641791044776, "grad_norm": 32.602535247802734, "learning_rate": 9.56203007518797e-06, "loss": 32.3173, "step": 15824 }, { "epoch": 376.78805970149256, "grad_norm": 26.45676040649414, "learning_rate": 9.56140350877193e-06, "loss": 32.0254, "step": 15825 }, { "epoch": 376.81194029850747, "grad_norm": 33.60383987426758, "learning_rate": 9.56077694235589e-06, "loss": 31.7161, "step": 15826 }, { "epoch": 376.8358208955224, "grad_norm": 29.133054733276367, "learning_rate": 9.560150375939851e-06, "loss": 30.4912, "step": 15827 }, { "epoch": 376.85970149253734, "grad_norm": 34.06317901611328, "learning_rate": 9.559523809523811e-06, "loss": 32.2731, "step": 15828 }, { "epoch": 376.88358208955225, "grad_norm": 29.86116600036621, "learning_rate": 9.55889724310777e-06, "loss": 31.0667, "step": 15829 }, { "epoch": 376.90746268656716, "grad_norm": 28.151329040527344, "learning_rate": 9.55827067669173e-06, "loss": 31.4379, "step": 15830 }, { "epoch": 376.93134328358207, "grad_norm": 29.177358627319336, "learning_rate": 9.55764411027569e-06, "loss": 32.7566, "step": 15831 }, { "epoch": 376.95522388059703, "grad_norm": 30.21954917907715, "learning_rate": 9.55701754385965e-06, "loss": 31.9125, "step": 15832 }, { "epoch": 376.97910447761194, "grad_norm": 23.19196319580078, "learning_rate": 9.55639097744361e-06, "loss": 31.6665, "step": 15833 }, { "epoch": 377.0, "grad_norm": 29.176729202270508, "learning_rate": 9.55576441102757e-06, "loss": 27.8317, "step": 15834 }, { "epoch": 377.0238805970149, "grad_norm": 27.250619888305664, "learning_rate": 9.55513784461153e-06, "loss": 31.0447, "step": 15835 }, { "epoch": 377.0477611940299, "grad_norm": 32.7923698425293, "learning_rate": 9.55451127819549e-06, "loss": 30.9549, "step": 15836 }, { "epoch": 377.0716417910448, "grad_norm": 30.054567337036133, "learning_rate": 9.55388471177945e-06, "loss": 31.3272, "step": 15837 }, { "epoch": 377.0955223880597, "grad_norm": 30.509685516357422, "learning_rate": 9.553258145363409e-06, "loss": 31.5636, "step": 15838 }, { "epoch": 377.1194029850746, "grad_norm": 27.59921646118164, "learning_rate": 9.552631578947369e-06, "loss": 31.7532, "step": 15839 }, { "epoch": 377.14328358208957, "grad_norm": 26.052112579345703, "learning_rate": 9.552005012531329e-06, "loss": 31.6361, "step": 15840 }, { "epoch": 377.1671641791045, "grad_norm": 24.729148864746094, "learning_rate": 9.551378446115288e-06, "loss": 32.0229, "step": 15841 }, { "epoch": 377.1910447761194, "grad_norm": 25.934402465820312, "learning_rate": 9.55075187969925e-06, "loss": 31.8813, "step": 15842 }, { "epoch": 377.21492537313435, "grad_norm": 22.117822647094727, "learning_rate": 9.550125313283208e-06, "loss": 32.8609, "step": 15843 }, { "epoch": 377.23880597014926, "grad_norm": 30.511245727539062, "learning_rate": 9.549498746867168e-06, "loss": 31.8948, "step": 15844 }, { "epoch": 377.26268656716417, "grad_norm": 21.63555145263672, "learning_rate": 9.54887218045113e-06, "loss": 31.1702, "step": 15845 }, { "epoch": 377.28656716417913, "grad_norm": 29.63176155090332, "learning_rate": 9.54824561403509e-06, "loss": 31.411, "step": 15846 }, { "epoch": 377.31044776119404, "grad_norm": 22.744958877563477, "learning_rate": 9.547619047619049e-06, "loss": 31.5629, "step": 15847 }, { "epoch": 377.33432835820895, "grad_norm": 30.34880828857422, "learning_rate": 9.546992481203007e-06, "loss": 30.4462, "step": 15848 }, { "epoch": 377.35820895522386, "grad_norm": 26.772872924804688, "learning_rate": 9.546365914786969e-06, "loss": 31.9283, "step": 15849 }, { "epoch": 377.3820895522388, "grad_norm": 28.795146942138672, "learning_rate": 9.545739348370928e-06, "loss": 31.2553, "step": 15850 }, { "epoch": 377.40597014925373, "grad_norm": 26.94765281677246, "learning_rate": 9.545112781954888e-06, "loss": 32.3547, "step": 15851 }, { "epoch": 377.42985074626864, "grad_norm": 28.66550636291504, "learning_rate": 9.544486215538848e-06, "loss": 31.7639, "step": 15852 }, { "epoch": 377.4537313432836, "grad_norm": 23.641977310180664, "learning_rate": 9.543859649122808e-06, "loss": 31.9693, "step": 15853 }, { "epoch": 377.4776119402985, "grad_norm": 30.01308250427246, "learning_rate": 9.543233082706768e-06, "loss": 32.1116, "step": 15854 }, { "epoch": 377.5014925373134, "grad_norm": 22.378013610839844, "learning_rate": 9.542606516290727e-06, "loss": 32.2518, "step": 15855 }, { "epoch": 377.52537313432833, "grad_norm": 26.711362838745117, "learning_rate": 9.541979949874687e-06, "loss": 32.4148, "step": 15856 }, { "epoch": 377.5492537313433, "grad_norm": 22.41970443725586, "learning_rate": 9.541353383458647e-06, "loss": 30.8866, "step": 15857 }, { "epoch": 377.5731343283582, "grad_norm": 25.54366111755371, "learning_rate": 9.540726817042607e-06, "loss": 31.7993, "step": 15858 }, { "epoch": 377.5970149253731, "grad_norm": 22.378780364990234, "learning_rate": 9.540100250626567e-06, "loss": 32.0723, "step": 15859 }, { "epoch": 377.6208955223881, "grad_norm": 23.338041305541992, "learning_rate": 9.539473684210528e-06, "loss": 32.2472, "step": 15860 }, { "epoch": 377.644776119403, "grad_norm": 22.04779624938965, "learning_rate": 9.538847117794488e-06, "loss": 31.6033, "step": 15861 }, { "epoch": 377.6686567164179, "grad_norm": 23.686084747314453, "learning_rate": 9.538220551378446e-06, "loss": 31.2253, "step": 15862 }, { "epoch": 377.6925373134328, "grad_norm": 18.225317001342773, "learning_rate": 9.537593984962407e-06, "loss": 32.2873, "step": 15863 }, { "epoch": 377.7164179104478, "grad_norm": 19.244224548339844, "learning_rate": 9.536967418546367e-06, "loss": 31.3335, "step": 15864 }, { "epoch": 377.7402985074627, "grad_norm": 20.40607261657715, "learning_rate": 9.536340852130327e-06, "loss": 32.3113, "step": 15865 }, { "epoch": 377.7641791044776, "grad_norm": 17.419404983520508, "learning_rate": 9.535714285714287e-06, "loss": 32.3475, "step": 15866 }, { "epoch": 377.78805970149256, "grad_norm": 17.925275802612305, "learning_rate": 9.535087719298247e-06, "loss": 31.5005, "step": 15867 }, { "epoch": 377.81194029850747, "grad_norm": 17.21285057067871, "learning_rate": 9.534461152882206e-06, "loss": 32.8105, "step": 15868 }, { "epoch": 377.8358208955224, "grad_norm": NaN, "learning_rate": 9.533834586466166e-06, "loss": 50.8566, "step": 15869 }, { "epoch": 377.85970149253734, "grad_norm": 18.049938201904297, "learning_rate": 9.533834586466166e-06, "loss": 30.4199, "step": 15870 }, { "epoch": 377.88358208955225, "grad_norm": 17.153806686401367, "learning_rate": 9.533208020050126e-06, "loss": 31.5917, "step": 15871 }, { "epoch": 377.90746268656716, "grad_norm": 19.73189353942871, "learning_rate": 9.532581453634086e-06, "loss": 31.6223, "step": 15872 }, { "epoch": 377.93134328358207, "grad_norm": 15.848172187805176, "learning_rate": 9.531954887218046e-06, "loss": 31.8707, "step": 15873 }, { "epoch": 377.95522388059703, "grad_norm": 19.68431282043457, "learning_rate": 9.531328320802005e-06, "loss": 31.1429, "step": 15874 }, { "epoch": 377.97910447761194, "grad_norm": 16.735843658447266, "learning_rate": 9.530701754385965e-06, "loss": 31.9342, "step": 15875 }, { "epoch": 378.0, "grad_norm": NaN, "learning_rate": 9.530075187969927e-06, "loss": 35.8194, "step": 15876 }, { "epoch": 378.0238805970149, "grad_norm": 19.21783447265625, "learning_rate": 9.530075187969927e-06, "loss": 31.7106, "step": 15877 }, { "epoch": 378.0477611940299, "grad_norm": 17.19239044189453, "learning_rate": 9.529448621553885e-06, "loss": 31.4753, "step": 15878 }, { "epoch": 378.0716417910448, "grad_norm": 15.164022445678711, "learning_rate": 9.528822055137845e-06, "loss": 30.4925, "step": 15879 }, { "epoch": 378.0955223880597, "grad_norm": 18.01201820373535, "learning_rate": 9.528195488721806e-06, "loss": 31.7913, "step": 15880 }, { "epoch": 378.1194029850746, "grad_norm": 20.186861038208008, "learning_rate": 9.527568922305766e-06, "loss": 32.5956, "step": 15881 }, { "epoch": 378.14328358208957, "grad_norm": 20.592960357666016, "learning_rate": 9.526942355889726e-06, "loss": 32.0871, "step": 15882 }, { "epoch": 378.1671641791045, "grad_norm": 16.473604202270508, "learning_rate": 9.526315789473684e-06, "loss": 31.6023, "step": 15883 }, { "epoch": 378.1910447761194, "grad_norm": 22.3590087890625, "learning_rate": 9.525689223057645e-06, "loss": 33.0083, "step": 15884 }, { "epoch": 378.21492537313435, "grad_norm": 19.668336868286133, "learning_rate": 9.525062656641605e-06, "loss": 31.3263, "step": 15885 }, { "epoch": 378.23880597014926, "grad_norm": 19.205453872680664, "learning_rate": 9.524436090225565e-06, "loss": 31.2491, "step": 15886 }, { "epoch": 378.26268656716417, "grad_norm": 19.854339599609375, "learning_rate": 9.523809523809525e-06, "loss": 31.0131, "step": 15887 }, { "epoch": 378.28656716417913, "grad_norm": 27.01983070373535, "learning_rate": 9.523182957393485e-06, "loss": 30.894, "step": 15888 }, { "epoch": 378.31044776119404, "grad_norm": 19.966522216796875, "learning_rate": 9.522556390977444e-06, "loss": 31.726, "step": 15889 }, { "epoch": 378.33432835820895, "grad_norm": 19.188745498657227, "learning_rate": 9.521929824561404e-06, "loss": 31.0292, "step": 15890 }, { "epoch": 378.35820895522386, "grad_norm": 19.201169967651367, "learning_rate": 9.521303258145364e-06, "loss": 30.6026, "step": 15891 }, { "epoch": 378.3820895522388, "grad_norm": 22.15947723388672, "learning_rate": 9.520676691729324e-06, "loss": 32.1625, "step": 15892 }, { "epoch": 378.40597014925373, "grad_norm": 17.25479507446289, "learning_rate": 9.520050125313284e-06, "loss": 32.7679, "step": 15893 }, { "epoch": 378.42985074626864, "grad_norm": 25.67967987060547, "learning_rate": 9.519423558897243e-06, "loss": 32.2053, "step": 15894 }, { "epoch": 378.4537313432836, "grad_norm": 20.512603759765625, "learning_rate": 9.518796992481205e-06, "loss": 32.23, "step": 15895 }, { "epoch": 378.4776119402985, "grad_norm": 16.905094146728516, "learning_rate": 9.518170426065165e-06, "loss": 31.393, "step": 15896 }, { "epoch": 378.5014925373134, "grad_norm": 23.89948272705078, "learning_rate": 9.517543859649123e-06, "loss": 31.3879, "step": 15897 }, { "epoch": 378.52537313432833, "grad_norm": 17.382038116455078, "learning_rate": 9.516917293233083e-06, "loss": 31.3227, "step": 15898 }, { "epoch": 378.5492537313433, "grad_norm": 20.53512191772461, "learning_rate": 9.516290726817044e-06, "loss": 31.5636, "step": 15899 }, { "epoch": 378.5731343283582, "grad_norm": 17.52760124206543, "learning_rate": 9.515664160401004e-06, "loss": 31.7658, "step": 15900 }, { "epoch": 378.5970149253731, "grad_norm": 20.51746940612793, "learning_rate": 9.515037593984964e-06, "loss": 31.2039, "step": 15901 }, { "epoch": 378.6208955223881, "grad_norm": 18.75714874267578, "learning_rate": 9.514411027568923e-06, "loss": 32.9847, "step": 15902 }, { "epoch": 378.644776119403, "grad_norm": 17.246376037597656, "learning_rate": 9.513784461152883e-06, "loss": 31.9951, "step": 15903 }, { "epoch": 378.6686567164179, "grad_norm": 25.295597076416016, "learning_rate": 9.513157894736843e-06, "loss": 31.5805, "step": 15904 }, { "epoch": 378.6925373134328, "grad_norm": 18.569110870361328, "learning_rate": 9.512531328320803e-06, "loss": 31.6563, "step": 15905 }, { "epoch": 378.7164179104478, "grad_norm": 19.77447509765625, "learning_rate": 9.511904761904763e-06, "loss": 31.7076, "step": 15906 }, { "epoch": 378.7402985074627, "grad_norm": 22.064210891723633, "learning_rate": 9.511278195488722e-06, "loss": 31.7247, "step": 15907 }, { "epoch": 378.7641791044776, "grad_norm": 20.426063537597656, "learning_rate": 9.510651629072682e-06, "loss": 32.5137, "step": 15908 }, { "epoch": 378.78805970149256, "grad_norm": 16.537830352783203, "learning_rate": 9.510025062656642e-06, "loss": 31.0183, "step": 15909 }, { "epoch": 378.81194029850747, "grad_norm": 17.598913192749023, "learning_rate": 9.509398496240604e-06, "loss": 31.5591, "step": 15910 }, { "epoch": 378.8358208955224, "grad_norm": 18.24433135986328, "learning_rate": 9.508771929824562e-06, "loss": 30.3728, "step": 15911 }, { "epoch": 378.85970149253734, "grad_norm": 17.156044006347656, "learning_rate": 9.508145363408521e-06, "loss": 31.8933, "step": 15912 }, { "epoch": 378.88358208955225, "grad_norm": 19.96763801574707, "learning_rate": 9.507518796992481e-06, "loss": 31.8627, "step": 15913 }, { "epoch": 378.90746268656716, "grad_norm": 17.492136001586914, "learning_rate": 9.506892230576443e-06, "loss": 32.9634, "step": 15914 }, { "epoch": 378.93134328358207, "grad_norm": NaN, "learning_rate": 9.506265664160403e-06, "loss": 31.3351, "step": 15915 }, { "epoch": 378.95522388059703, "grad_norm": 22.429630279541016, "learning_rate": 9.506265664160403e-06, "loss": 32.1638, "step": 15916 }, { "epoch": 378.97910447761194, "grad_norm": 19.357730865478516, "learning_rate": 9.50563909774436e-06, "loss": 31.4918, "step": 15917 }, { "epoch": 379.0, "grad_norm": 17.785320281982422, "learning_rate": 9.505012531328322e-06, "loss": 27.6541, "step": 15918 }, { "epoch": 379.0238805970149, "grad_norm": 19.500259399414062, "learning_rate": 9.504385964912282e-06, "loss": 31.3774, "step": 15919 }, { "epoch": 379.0477611940299, "grad_norm": 21.011730194091797, "learning_rate": 9.503759398496242e-06, "loss": 31.8218, "step": 15920 }, { "epoch": 379.0716417910448, "grad_norm": 22.962175369262695, "learning_rate": 9.503132832080202e-06, "loss": 32.6586, "step": 15921 }, { "epoch": 379.0955223880597, "grad_norm": 19.155452728271484, "learning_rate": 9.502506265664161e-06, "loss": 32.2327, "step": 15922 }, { "epoch": 379.1194029850746, "grad_norm": 23.785982131958008, "learning_rate": 9.501879699248121e-06, "loss": 32.3278, "step": 15923 }, { "epoch": 379.14328358208957, "grad_norm": 24.060657501220703, "learning_rate": 9.501253132832081e-06, "loss": 31.0592, "step": 15924 }, { "epoch": 379.1671641791045, "grad_norm": 19.281307220458984, "learning_rate": 9.50062656641604e-06, "loss": 30.9093, "step": 15925 }, { "epoch": 379.1910447761194, "grad_norm": 21.47970199584961, "learning_rate": 9.5e-06, "loss": 30.323, "step": 15926 }, { "epoch": 379.21492537313435, "grad_norm": 18.192237854003906, "learning_rate": 9.49937343358396e-06, "loss": 30.895, "step": 15927 }, { "epoch": 379.23880597014926, "grad_norm": 24.31977653503418, "learning_rate": 9.49874686716792e-06, "loss": 29.9196, "step": 15928 }, { "epoch": 379.26268656716417, "grad_norm": 16.276206970214844, "learning_rate": 9.49812030075188e-06, "loss": 30.6136, "step": 15929 }, { "epoch": 379.28656716417913, "grad_norm": 23.11754035949707, "learning_rate": 9.497493734335841e-06, "loss": 31.0312, "step": 15930 }, { "epoch": 379.31044776119404, "grad_norm": 17.16490364074707, "learning_rate": 9.4968671679198e-06, "loss": 30.3436, "step": 15931 }, { "epoch": 379.33432835820895, "grad_norm": 20.933879852294922, "learning_rate": 9.49624060150376e-06, "loss": 31.749, "step": 15932 }, { "epoch": 379.35820895522386, "grad_norm": 22.423006057739258, "learning_rate": 9.49561403508772e-06, "loss": 32.1347, "step": 15933 }, { "epoch": 379.3820895522388, "grad_norm": 18.124557495117188, "learning_rate": 9.49498746867168e-06, "loss": 31.5066, "step": 15934 }, { "epoch": 379.40597014925373, "grad_norm": 22.877532958984375, "learning_rate": 9.494360902255639e-06, "loss": 31.8723, "step": 15935 }, { "epoch": 379.42985074626864, "grad_norm": 20.344417572021484, "learning_rate": 9.4937343358396e-06, "loss": 31.1676, "step": 15936 }, { "epoch": 379.4537313432836, "grad_norm": 26.261857986450195, "learning_rate": 9.49310776942356e-06, "loss": 31.6491, "step": 15937 }, { "epoch": 379.4776119402985, "grad_norm": 18.061038970947266, "learning_rate": 9.49248120300752e-06, "loss": 31.3321, "step": 15938 }, { "epoch": 379.5014925373134, "grad_norm": 23.36351203918457, "learning_rate": 9.49185463659148e-06, "loss": 31.2883, "step": 15939 }, { "epoch": 379.52537313432833, "grad_norm": 21.14858055114746, "learning_rate": 9.49122807017544e-06, "loss": 31.9338, "step": 15940 }, { "epoch": 379.5492537313433, "grad_norm": 21.407957077026367, "learning_rate": 9.4906015037594e-06, "loss": 31.6206, "step": 15941 }, { "epoch": 379.5731343283582, "grad_norm": 19.971158981323242, "learning_rate": 9.489974937343359e-06, "loss": 31.3695, "step": 15942 }, { "epoch": 379.5970149253731, "grad_norm": 23.457340240478516, "learning_rate": 9.489348370927319e-06, "loss": 30.0718, "step": 15943 }, { "epoch": 379.6208955223881, "grad_norm": 18.409650802612305, "learning_rate": 9.488721804511279e-06, "loss": 32.2795, "step": 15944 }, { "epoch": 379.644776119403, "grad_norm": 22.336862564086914, "learning_rate": 9.488095238095238e-06, "loss": 32.4552, "step": 15945 }, { "epoch": 379.6686567164179, "grad_norm": 21.455549240112305, "learning_rate": 9.487468671679198e-06, "loss": 32.0294, "step": 15946 }, { "epoch": 379.6925373134328, "grad_norm": 17.936628341674805, "learning_rate": 9.486842105263158e-06, "loss": 31.2746, "step": 15947 }, { "epoch": 379.7164179104478, "grad_norm": 16.496122360229492, "learning_rate": 9.48621553884712e-06, "loss": 32.2504, "step": 15948 }, { "epoch": 379.7402985074627, "grad_norm": 20.644977569580078, "learning_rate": 9.48558897243108e-06, "loss": 32.3653, "step": 15949 }, { "epoch": 379.7641791044776, "grad_norm": 20.545482635498047, "learning_rate": 9.484962406015037e-06, "loss": 32.4636, "step": 15950 }, { "epoch": 379.78805970149256, "grad_norm": 17.463809967041016, "learning_rate": 9.484335839598999e-06, "loss": 32.6064, "step": 15951 }, { "epoch": 379.81194029850747, "grad_norm": 14.48058032989502, "learning_rate": 9.483709273182959e-06, "loss": 30.6907, "step": 15952 }, { "epoch": 379.8358208955224, "grad_norm": 18.902481079101562, "learning_rate": 9.483082706766919e-06, "loss": 32.8878, "step": 15953 }, { "epoch": 379.85970149253734, "grad_norm": 15.660988807678223, "learning_rate": 9.482456140350878e-06, "loss": 32.0592, "step": 15954 }, { "epoch": 379.88358208955225, "grad_norm": 18.97564125061035, "learning_rate": 9.481829573934838e-06, "loss": 32.7558, "step": 15955 }, { "epoch": 379.90746268656716, "grad_norm": 20.05362892150879, "learning_rate": 9.481203007518798e-06, "loss": 32.3255, "step": 15956 }, { "epoch": 379.93134328358207, "grad_norm": 17.90984344482422, "learning_rate": 9.480576441102758e-06, "loss": 32.6887, "step": 15957 }, { "epoch": 379.95522388059703, "grad_norm": 18.937055587768555, "learning_rate": 9.479949874686717e-06, "loss": 31.7227, "step": 15958 }, { "epoch": 379.97910447761194, "grad_norm": 15.068392753601074, "learning_rate": 9.479323308270677e-06, "loss": 31.5952, "step": 15959 }, { "epoch": 380.0, "grad_norm": 17.841472625732422, "learning_rate": 9.478696741854637e-06, "loss": 27.6074, "step": 15960 }, { "epoch": 380.0, "step": 15960, "total_flos": 7.845630018318678e+17, "train_loss": 1.6809812497973142, "train_runtime": 25596.4735, "train_samples_per_second": 79.455, "train_steps_per_second": 0.624 }, { "epoch": 380.0238805970149, "grad_norm": 19.81171226501465, "learning_rate": 1e-05, "loss": 31.8237, "step": 15961 }, { "epoch": 380.0477611940299, "grad_norm": Infinity, "learning_rate": 9.999404761904763e-06, "loss": 37.0622, "step": 15962 }, { "epoch": 380.0716417910448, "grad_norm": 271.6819152832031, "learning_rate": 9.999404761904763e-06, "loss": 37.5461, "step": 15963 }, { "epoch": 380.0955223880597, "grad_norm": 141.90977478027344, "learning_rate": 9.998809523809524e-06, "loss": 34.3859, "step": 15964 }, { "epoch": 380.1194029850746, "grad_norm": 83.8337173461914, "learning_rate": 9.998214285714286e-06, "loss": 32.6125, "step": 15965 }, { "epoch": 380.14328358208957, "grad_norm": 77.03105163574219, "learning_rate": 9.997619047619048e-06, "loss": 33.4331, "step": 15966 }, { "epoch": 380.1671641791045, "grad_norm": 44.83564376831055, "learning_rate": 9.99702380952381e-06, "loss": 32.8591, "step": 15967 }, { "epoch": 380.1910447761194, "grad_norm": 65.24427032470703, "learning_rate": 9.996428571428572e-06, "loss": 32.9025, "step": 15968 }, { "epoch": 380.21492537313435, "grad_norm": 42.944244384765625, "learning_rate": 9.995833333333335e-06, "loss": 32.5031, "step": 15969 }, { "epoch": 380.23880597014926, "grad_norm": 47.43098831176758, "learning_rate": 9.995238095238095e-06, "loss": 32.2138, "step": 15970 }, { "epoch": 380.26268656716417, "grad_norm": 39.91004943847656, "learning_rate": 9.994642857142857e-06, "loss": 32.6998, "step": 15971 }, { "epoch": 380.28656716417913, "grad_norm": 29.325395584106445, "learning_rate": 9.99404761904762e-06, "loss": 32.2484, "step": 15972 }, { "epoch": 380.31044776119404, "grad_norm": 30.982845306396484, "learning_rate": 9.993452380952382e-06, "loss": 31.9461, "step": 15973 }, { "epoch": 380.33432835820895, "grad_norm": 30.943849563598633, "learning_rate": 9.992857142857144e-06, "loss": 31.8489, "step": 15974 }, { "epoch": 380.35820895522386, "grad_norm": 25.19125747680664, "learning_rate": 9.992261904761904e-06, "loss": 32.146, "step": 15975 }, { "epoch": 380.3820895522388, "grad_norm": 24.344026565551758, "learning_rate": 9.991666666666668e-06, "loss": 32.2715, "step": 15976 }, { "epoch": 380.40597014925373, "grad_norm": 26.513288497924805, "learning_rate": 9.99107142857143e-06, "loss": 32.0756, "step": 15977 }, { "epoch": 380.42985074626864, "grad_norm": 21.315874099731445, "learning_rate": 9.990476190476191e-06, "loss": 31.8548, "step": 15978 }, { "epoch": 380.4537313432836, "grad_norm": 20.785608291625977, "learning_rate": 9.989880952380953e-06, "loss": 31.4687, "step": 15979 }, { "epoch": 380.4776119402985, "grad_norm": 26.352123260498047, "learning_rate": 9.989285714285715e-06, "loss": 31.7443, "step": 15980 }, { "epoch": 380.5014925373134, "grad_norm": 21.69326400756836, "learning_rate": 9.988690476190477e-06, "loss": 31.3887, "step": 15981 }, { "epoch": 380.52537313432833, "grad_norm": 22.32582664489746, "learning_rate": 9.988095238095239e-06, "loss": 32.7526, "step": 15982 }, { "epoch": 380.5492537313433, "grad_norm": 18.500606536865234, "learning_rate": 9.9875e-06, "loss": 31.9896, "step": 15983 }, { "epoch": 380.5731343283582, "grad_norm": 23.360305786132812, "learning_rate": 9.986904761904764e-06, "loss": 31.7416, "step": 15984 }, { "epoch": 380.5970149253731, "grad_norm": 19.942039489746094, "learning_rate": 9.986309523809524e-06, "loss": 30.7637, "step": 15985 }, { "epoch": 380.6208955223881, "grad_norm": 17.783775329589844, "learning_rate": 9.985714285714286e-06, "loss": 32.0459, "step": 15986 }, { "epoch": 380.644776119403, "grad_norm": 17.822635650634766, "learning_rate": 9.985119047619048e-06, "loss": 31.7512, "step": 15987 }, { "epoch": 380.6686567164179, "grad_norm": 22.624650955200195, "learning_rate": 9.984523809523811e-06, "loss": 31.7233, "step": 15988 }, { "epoch": 380.6925373134328, "grad_norm": 20.165393829345703, "learning_rate": 9.983928571428573e-06, "loss": 32.0389, "step": 15989 }, { "epoch": 380.7164179104478, "grad_norm": 17.92303466796875, "learning_rate": 9.983333333333333e-06, "loss": 31.3312, "step": 15990 }, { "epoch": 380.7402985074627, "grad_norm": 16.85744285583496, "learning_rate": 9.982738095238097e-06, "loss": 30.7716, "step": 15991 }, { "epoch": 380.7641791044776, "grad_norm": 16.1578369140625, "learning_rate": 9.982142857142858e-06, "loss": 32.3187, "step": 15992 }, { "epoch": 380.78805970149256, "grad_norm": 15.582061767578125, "learning_rate": 9.98154761904762e-06, "loss": 31.0295, "step": 15993 }, { "epoch": 380.81194029850747, "grad_norm": 21.836551666259766, "learning_rate": 9.980952380952382e-06, "loss": 31.7255, "step": 15994 }, { "epoch": 380.8358208955224, "grad_norm": 16.40296173095703, "learning_rate": 9.980357142857144e-06, "loss": 30.8564, "step": 15995 }, { "epoch": 380.85970149253734, "grad_norm": 16.774675369262695, "learning_rate": 9.979761904761906e-06, "loss": 32.031, "step": 15996 }, { "epoch": 380.88358208955225, "grad_norm": 21.959762573242188, "learning_rate": 9.979166666666668e-06, "loss": 31.8713, "step": 15997 }, { "epoch": 380.90746268656716, "grad_norm": 16.705278396606445, "learning_rate": 9.97857142857143e-06, "loss": 31.5967, "step": 15998 }, { "epoch": 380.93134328358207, "grad_norm": 18.2200927734375, "learning_rate": 9.977976190476191e-06, "loss": 32.4937, "step": 15999 }, { "epoch": 380.95522388059703, "grad_norm": 19.012588500976562, "learning_rate": 9.977380952380953e-06, "loss": 30.3078, "step": 16000 }, { "epoch": 380.97910447761194, "grad_norm": 21.131380081176758, "learning_rate": 9.976785714285715e-06, "loss": 31.5742, "step": 16001 }, { "epoch": 381.0, "grad_norm": 17.37620735168457, "learning_rate": 9.976190476190477e-06, "loss": 27.9876, "step": 16002 }, { "epoch": 381.0238805970149, "grad_norm": NaN, "learning_rate": 9.975595238095238e-06, "loss": 39.6585, "step": 16003 }, { "epoch": 381.0477611940299, "grad_norm": 17.03713607788086, "learning_rate": 9.975595238095238e-06, "loss": 32.1147, "step": 16004 }, { "epoch": 381.0716417910448, "grad_norm": 20.864765167236328, "learning_rate": 9.975000000000002e-06, "loss": 31.7377, "step": 16005 }, { "epoch": 381.0955223880597, "grad_norm": 17.91866683959961, "learning_rate": 9.974404761904762e-06, "loss": 31.8208, "step": 16006 }, { "epoch": 381.1194029850746, "grad_norm": 19.627910614013672, "learning_rate": 9.973809523809524e-06, "loss": 31.6915, "step": 16007 }, { "epoch": 381.14328358208957, "grad_norm": 16.735336303710938, "learning_rate": 9.973214285714287e-06, "loss": 30.6379, "step": 16008 }, { "epoch": 381.1671641791045, "grad_norm": 18.15534782409668, "learning_rate": 9.972619047619049e-06, "loss": 32.1459, "step": 16009 }, { "epoch": 381.1910447761194, "grad_norm": 17.133621215820312, "learning_rate": 9.972023809523811e-06, "loss": 31.3118, "step": 16010 }, { "epoch": 381.21492537313435, "grad_norm": 18.383630752563477, "learning_rate": 9.971428571428571e-06, "loss": 31.7941, "step": 16011 }, { "epoch": 381.23880597014926, "grad_norm": 24.900360107421875, "learning_rate": 9.970833333333335e-06, "loss": 32.1381, "step": 16012 }, { "epoch": 381.26268656716417, "grad_norm": 17.420650482177734, "learning_rate": 9.970238095238096e-06, "loss": 30.8753, "step": 16013 }, { "epoch": 381.28656716417913, "grad_norm": 16.8143310546875, "learning_rate": 9.969642857142858e-06, "loss": 30.8699, "step": 16014 }, { "epoch": 381.31044776119404, "grad_norm": 19.68670082092285, "learning_rate": 9.96904761904762e-06, "loss": 31.2582, "step": 16015 }, { "epoch": 381.33432835820895, "grad_norm": 21.80328369140625, "learning_rate": 9.968452380952382e-06, "loss": 31.3006, "step": 16016 }, { "epoch": 381.35820895522386, "grad_norm": 17.671180725097656, "learning_rate": 9.967857142857144e-06, "loss": 31.4751, "step": 16017 }, { "epoch": 381.3820895522388, "grad_norm": 20.94354820251465, "learning_rate": 9.967261904761905e-06, "loss": 32.2476, "step": 16018 }, { "epoch": 381.40597014925373, "grad_norm": 20.885517120361328, "learning_rate": 9.966666666666667e-06, "loss": 31.1364, "step": 16019 }, { "epoch": 381.42985074626864, "grad_norm": 22.11631965637207, "learning_rate": 9.966071428571429e-06, "loss": 32.5009, "step": 16020 }, { "epoch": 381.4537313432836, "grad_norm": 19.251211166381836, "learning_rate": 9.965476190476191e-06, "loss": 31.3807, "step": 16021 }, { "epoch": 381.4776119402985, "grad_norm": 20.826810836791992, "learning_rate": 9.964880952380953e-06, "loss": 32.882, "step": 16022 }, { "epoch": 381.5014925373134, "grad_norm": 16.566261291503906, "learning_rate": 9.964285714285714e-06, "loss": 30.5622, "step": 16023 }, { "epoch": 381.52537313432833, "grad_norm": 19.458803176879883, "learning_rate": 9.963690476190478e-06, "loss": 31.2937, "step": 16024 }, { "epoch": 381.5492537313433, "grad_norm": 17.66868019104004, "learning_rate": 9.963095238095238e-06, "loss": 32.559, "step": 16025 }, { "epoch": 381.5731343283582, "grad_norm": 21.109437942504883, "learning_rate": 9.9625e-06, "loss": 32.6014, "step": 16026 }, { "epoch": 381.5970149253731, "grad_norm": 17.954509735107422, "learning_rate": 9.961904761904763e-06, "loss": 32.064, "step": 16027 }, { "epoch": 381.6208955223881, "grad_norm": 17.789915084838867, "learning_rate": 9.961309523809525e-06, "loss": 30.7316, "step": 16028 }, { "epoch": 381.644776119403, "grad_norm": 16.219350814819336, "learning_rate": 9.960714285714287e-06, "loss": 31.7867, "step": 16029 }, { "epoch": 381.6686567164179, "grad_norm": 20.78518295288086, "learning_rate": 9.960119047619047e-06, "loss": 30.9799, "step": 16030 }, { "epoch": 381.6925373134328, "grad_norm": 19.862668991088867, "learning_rate": 9.95952380952381e-06, "loss": 32.1743, "step": 16031 }, { "epoch": 381.7164179104478, "grad_norm": 18.970291137695312, "learning_rate": 9.958928571428572e-06, "loss": 32.6788, "step": 16032 }, { "epoch": 381.7402985074627, "grad_norm": 20.33418083190918, "learning_rate": 9.958333333333334e-06, "loss": 32.6219, "step": 16033 }, { "epoch": 381.7641791044776, "grad_norm": 19.823413848876953, "learning_rate": 9.957738095238096e-06, "loss": 31.3587, "step": 16034 }, { "epoch": 381.78805970149256, "grad_norm": 22.553861618041992, "learning_rate": 9.957142857142858e-06, "loss": 31.6313, "step": 16035 }, { "epoch": 381.81194029850747, "grad_norm": 21.496826171875, "learning_rate": 9.95654761904762e-06, "loss": 31.3406, "step": 16036 }, { "epoch": 381.8358208955224, "grad_norm": 17.12688446044922, "learning_rate": 9.955952380952382e-06, "loss": 31.0774, "step": 16037 }, { "epoch": 381.85970149253734, "grad_norm": 18.4323673248291, "learning_rate": 9.955357142857143e-06, "loss": 30.6829, "step": 16038 }, { "epoch": 381.88358208955225, "grad_norm": 18.555824279785156, "learning_rate": 9.954761904761905e-06, "loss": 31.2206, "step": 16039 }, { "epoch": 381.90746268656716, "grad_norm": 22.945341110229492, "learning_rate": 9.954166666666667e-06, "loss": 30.9577, "step": 16040 }, { "epoch": 381.93134328358207, "grad_norm": 21.579174041748047, "learning_rate": 9.953571428571429e-06, "loss": 32.3625, "step": 16041 }, { "epoch": 381.95522388059703, "grad_norm": 17.47161293029785, "learning_rate": 9.95297619047619e-06, "loss": 31.8817, "step": 16042 }, { "epoch": 381.97910447761194, "grad_norm": 22.1416072845459, "learning_rate": 9.952380952380954e-06, "loss": 30.065, "step": 16043 }, { "epoch": 382.0, "grad_norm": 17.564512252807617, "learning_rate": 9.951785714285716e-06, "loss": 28.0914, "step": 16044 }, { "epoch": 382.0238805970149, "grad_norm": 30.5643310546875, "learning_rate": 9.951190476190476e-06, "loss": 31.4639, "step": 16045 }, { "epoch": 382.0477611940299, "grad_norm": 20.0091609954834, "learning_rate": 9.95059523809524e-06, "loss": 32.4623, "step": 16046 }, { "epoch": 382.0716417910448, "grad_norm": 30.843324661254883, "learning_rate": 9.950000000000001e-06, "loss": 31.474, "step": 16047 }, { "epoch": 382.0955223880597, "grad_norm": 23.881017684936523, "learning_rate": 9.949404761904763e-06, "loss": 30.2202, "step": 16048 }, { "epoch": 382.1194029850746, "grad_norm": 27.615142822265625, "learning_rate": 9.948809523809525e-06, "loss": 30.1956, "step": 16049 }, { "epoch": 382.14328358208957, "grad_norm": 27.60844612121582, "learning_rate": 9.948214285714287e-06, "loss": 32.6057, "step": 16050 }, { "epoch": 382.1671641791045, "grad_norm": 18.87521743774414, "learning_rate": 9.947619047619049e-06, "loss": 30.0185, "step": 16051 }, { "epoch": 382.1910447761194, "grad_norm": 28.86406707763672, "learning_rate": 9.94702380952381e-06, "loss": 31.3004, "step": 16052 }, { "epoch": 382.21492537313435, "grad_norm": 20.739280700683594, "learning_rate": 9.946428571428572e-06, "loss": 30.4528, "step": 16053 }, { "epoch": 382.23880597014926, "grad_norm": 21.37101936340332, "learning_rate": 9.945833333333334e-06, "loss": 31.3095, "step": 16054 }, { "epoch": 382.26268656716417, "grad_norm": 20.987924575805664, "learning_rate": 9.945238095238096e-06, "loss": 31.601, "step": 16055 }, { "epoch": 382.28656716417913, "grad_norm": 22.637855529785156, "learning_rate": 9.944642857142858e-06, "loss": 32.697, "step": 16056 }, { "epoch": 382.31044776119404, "grad_norm": 21.173524856567383, "learning_rate": 9.94404761904762e-06, "loss": 31.425, "step": 16057 }, { "epoch": 382.33432835820895, "grad_norm": 16.42003631591797, "learning_rate": 9.943452380952381e-06, "loss": 30.9219, "step": 16058 }, { "epoch": 382.35820895522386, "grad_norm": 29.418729782104492, "learning_rate": 9.942857142857145e-06, "loss": 31.1095, "step": 16059 }, { "epoch": 382.3820895522388, "grad_norm": 18.15989875793457, "learning_rate": 9.942261904761905e-06, "loss": 31.6159, "step": 16060 }, { "epoch": 382.40597014925373, "grad_norm": 25.514652252197266, "learning_rate": 9.941666666666667e-06, "loss": 30.9924, "step": 16061 }, { "epoch": 382.42985074626864, "grad_norm": 26.61665916442871, "learning_rate": 9.94107142857143e-06, "loss": 30.3087, "step": 16062 }, { "epoch": 382.4537313432836, "grad_norm": 21.26274299621582, "learning_rate": 9.940476190476192e-06, "loss": 32.2182, "step": 16063 }, { "epoch": 382.4776119402985, "grad_norm": 17.34431266784668, "learning_rate": 9.939880952380954e-06, "loss": 30.6975, "step": 16064 }, { "epoch": 382.5014925373134, "grad_norm": 29.654024124145508, "learning_rate": 9.939285714285714e-06, "loss": 31.4566, "step": 16065 }, { "epoch": 382.52537313432833, "grad_norm": 19.027162551879883, "learning_rate": 9.938690476190477e-06, "loss": 31.0827, "step": 16066 }, { "epoch": 382.5492537313433, "grad_norm": 41.36701965332031, "learning_rate": 9.93809523809524e-06, "loss": 32.6753, "step": 16067 }, { "epoch": 382.5731343283582, "grad_norm": 31.004261016845703, "learning_rate": 9.937500000000001e-06, "loss": 32.7999, "step": 16068 }, { "epoch": 382.5970149253731, "grad_norm": 37.49189376831055, "learning_rate": 9.936904761904763e-06, "loss": 32.5376, "step": 16069 }, { "epoch": 382.6208955223881, "grad_norm": 27.8145751953125, "learning_rate": 9.936309523809525e-06, "loss": 32.1167, "step": 16070 }, { "epoch": 382.644776119403, "grad_norm": 42.59127426147461, "learning_rate": 9.935714285714286e-06, "loss": 31.4012, "step": 16071 }, { "epoch": 382.6686567164179, "grad_norm": 28.887666702270508, "learning_rate": 9.935119047619048e-06, "loss": 31.6601, "step": 16072 }, { "epoch": 382.6925373134328, "grad_norm": 40.80937194824219, "learning_rate": 9.93452380952381e-06, "loss": 31.5855, "step": 16073 }, { "epoch": 382.7164179104478, "grad_norm": 34.60734939575195, "learning_rate": 9.933928571428574e-06, "loss": 31.6968, "step": 16074 }, { "epoch": 382.7402985074627, "grad_norm": 37.64997863769531, "learning_rate": 9.933333333333334e-06, "loss": 30.9189, "step": 16075 }, { "epoch": 382.7641791044776, "grad_norm": 36.916038513183594, "learning_rate": 9.932738095238095e-06, "loss": 32.6017, "step": 16076 }, { "epoch": 382.78805970149256, "grad_norm": 35.18901062011719, "learning_rate": 9.932142857142857e-06, "loss": 31.8938, "step": 16077 }, { "epoch": 382.81194029850747, "grad_norm": 31.393796920776367, "learning_rate": 9.93154761904762e-06, "loss": 31.788, "step": 16078 }, { "epoch": 382.8358208955224, "grad_norm": 40.976837158203125, "learning_rate": 9.930952380952383e-06, "loss": 31.6394, "step": 16079 }, { "epoch": 382.85970149253734, "grad_norm": 31.78545379638672, "learning_rate": 9.930357142857143e-06, "loss": 32.4881, "step": 16080 }, { "epoch": 382.88358208955225, "grad_norm": 39.9713020324707, "learning_rate": 9.929761904761906e-06, "loss": 32.4877, "step": 16081 }, { "epoch": 382.90746268656716, "grad_norm": 35.2440185546875, "learning_rate": 9.929166666666668e-06, "loss": 31.6711, "step": 16082 }, { "epoch": 382.93134328358207, "grad_norm": 35.897239685058594, "learning_rate": 9.92857142857143e-06, "loss": 31.0469, "step": 16083 }, { "epoch": 382.95522388059703, "grad_norm": 32.89672088623047, "learning_rate": 9.927976190476192e-06, "loss": 31.9327, "step": 16084 }, { "epoch": 382.97910447761194, "grad_norm": 31.14480972290039, "learning_rate": 9.927380952380953e-06, "loss": 32.2283, "step": 16085 }, { "epoch": 383.0, "grad_norm": 23.345359802246094, "learning_rate": 9.926785714285715e-06, "loss": 27.4965, "step": 16086 }, { "epoch": 383.0238805970149, "grad_norm": 36.6619987487793, "learning_rate": 9.926190476190477e-06, "loss": 31.1128, "step": 16087 }, { "epoch": 383.0477611940299, "grad_norm": 29.31681251525879, "learning_rate": 9.925595238095239e-06, "loss": 31.9634, "step": 16088 }, { "epoch": 383.0716417910448, "grad_norm": 36.53302001953125, "learning_rate": 9.925e-06, "loss": 31.8536, "step": 16089 }, { "epoch": 383.0955223880597, "grad_norm": 35.891845703125, "learning_rate": 9.924404761904762e-06, "loss": 30.4767, "step": 16090 }, { "epoch": 383.1194029850746, "grad_norm": 36.07611846923828, "learning_rate": 9.923809523809524e-06, "loss": 32.2482, "step": 16091 }, { "epoch": 383.14328358208957, "grad_norm": 30.02564811706543, "learning_rate": 9.923214285714286e-06, "loss": 31.355, "step": 16092 }, { "epoch": 383.1671641791045, "grad_norm": 32.45337677001953, "learning_rate": 9.922619047619048e-06, "loss": 31.4327, "step": 16093 }, { "epoch": 383.1910447761194, "grad_norm": 29.737163543701172, "learning_rate": 9.922023809523811e-06, "loss": 30.0838, "step": 16094 }, { "epoch": 383.21492537313435, "grad_norm": 36.99846267700195, "learning_rate": 9.921428571428572e-06, "loss": 32.8689, "step": 16095 }, { "epoch": 383.23880597014926, "grad_norm": 33.277320861816406, "learning_rate": 9.920833333333333e-06, "loss": 30.6591, "step": 16096 }, { "epoch": 383.26268656716417, "grad_norm": 35.308570861816406, "learning_rate": 9.920238095238097e-06, "loss": 31.7971, "step": 16097 }, { "epoch": 383.28656716417913, "grad_norm": 32.15773391723633, "learning_rate": 9.919642857142859e-06, "loss": 30.9182, "step": 16098 }, { "epoch": 383.31044776119404, "grad_norm": 35.116065979003906, "learning_rate": 9.91904761904762e-06, "loss": 31.0777, "step": 16099 }, { "epoch": 383.33432835820895, "grad_norm": 32.25497817993164, "learning_rate": 9.91845238095238e-06, "loss": 31.5772, "step": 16100 }, { "epoch": 383.35820895522386, "grad_norm": 35.16862106323242, "learning_rate": 9.917857142857144e-06, "loss": 30.7156, "step": 16101 }, { "epoch": 383.3820895522388, "grad_norm": 32.17951583862305, "learning_rate": 9.917261904761906e-06, "loss": 31.9786, "step": 16102 }, { "epoch": 383.40597014925373, "grad_norm": NaN, "learning_rate": 9.916666666666668e-06, "loss": 52.2399, "step": 16103 }, { "epoch": 383.42985074626864, "grad_norm": 34.20357131958008, "learning_rate": 9.916666666666668e-06, "loss": 32.0766, "step": 16104 }, { "epoch": 383.4537313432836, "grad_norm": 27.18743896484375, "learning_rate": 9.91607142857143e-06, "loss": 31.8092, "step": 16105 }, { "epoch": 383.4776119402985, "grad_norm": 36.65757369995117, "learning_rate": 9.915476190476191e-06, "loss": 31.9558, "step": 16106 }, { "epoch": 383.5014925373134, "grad_norm": 30.82261848449707, "learning_rate": 9.914880952380953e-06, "loss": 31.7545, "step": 16107 }, { "epoch": 383.52537313432833, "grad_norm": 37.73588562011719, "learning_rate": 9.914285714285715e-06, "loss": 30.3501, "step": 16108 }, { "epoch": 383.5492537313433, "grad_norm": 31.677982330322266, "learning_rate": 9.913690476190477e-06, "loss": 31.3569, "step": 16109 }, { "epoch": 383.5731343283582, "grad_norm": 32.6319694519043, "learning_rate": 9.91309523809524e-06, "loss": 31.7832, "step": 16110 }, { "epoch": 383.5970149253731, "grad_norm": 27.835472106933594, "learning_rate": 9.9125e-06, "loss": 31.5501, "step": 16111 }, { "epoch": 383.6208955223881, "grad_norm": 33.154388427734375, "learning_rate": 9.911904761904762e-06, "loss": 31.3517, "step": 16112 }, { "epoch": 383.644776119403, "grad_norm": 30.630441665649414, "learning_rate": 9.911309523809524e-06, "loss": 31.8968, "step": 16113 }, { "epoch": 383.6686567164179, "grad_norm": 42.44910430908203, "learning_rate": 9.910714285714288e-06, "loss": 32.3752, "step": 16114 }, { "epoch": 383.6925373134328, "grad_norm": 35.22804641723633, "learning_rate": 9.91011904761905e-06, "loss": 31.7221, "step": 16115 }, { "epoch": 383.7164179104478, "grad_norm": 31.43850326538086, "learning_rate": 9.90952380952381e-06, "loss": 32.5193, "step": 16116 }, { "epoch": 383.7402985074627, "grad_norm": 27.685434341430664, "learning_rate": 9.908928571428573e-06, "loss": 31.3131, "step": 16117 }, { "epoch": 383.7641791044776, "grad_norm": 31.522960662841797, "learning_rate": 9.908333333333335e-06, "loss": 31.9096, "step": 16118 }, { "epoch": 383.78805970149256, "grad_norm": 27.524206161499023, "learning_rate": 9.907738095238097e-06, "loss": 31.0031, "step": 16119 }, { "epoch": 383.81194029850747, "grad_norm": 38.7270393371582, "learning_rate": 9.907142857142858e-06, "loss": 30.5195, "step": 16120 }, { "epoch": 383.8358208955224, "grad_norm": 33.950130462646484, "learning_rate": 9.90654761904762e-06, "loss": 31.3107, "step": 16121 }, { "epoch": 383.85970149253734, "grad_norm": 31.93348503112793, "learning_rate": 9.905952380952382e-06, "loss": 31.3036, "step": 16122 }, { "epoch": 383.88358208955225, "grad_norm": 31.724279403686523, "learning_rate": 9.905357142857144e-06, "loss": 30.6898, "step": 16123 }, { "epoch": 383.90746268656716, "grad_norm": 29.71946144104004, "learning_rate": 9.904761904761906e-06, "loss": 31.6857, "step": 16124 }, { "epoch": 383.93134328358207, "grad_norm": 26.805870056152344, "learning_rate": 9.904166666666667e-06, "loss": 31.5954, "step": 16125 }, { "epoch": 383.95522388059703, "grad_norm": 32.27679443359375, "learning_rate": 9.90357142857143e-06, "loss": 31.3116, "step": 16126 }, { "epoch": 383.97910447761194, "grad_norm": 30.086898803710938, "learning_rate": 9.902976190476191e-06, "loss": 31.8662, "step": 16127 }, { "epoch": 384.0, "grad_norm": 31.092683792114258, "learning_rate": 9.902380952380953e-06, "loss": 28.2241, "step": 16128 }, { "epoch": 384.0238805970149, "grad_norm": 32.8645133972168, "learning_rate": 9.901785714285715e-06, "loss": 31.9904, "step": 16129 }, { "epoch": 384.0477611940299, "grad_norm": 32.35710525512695, "learning_rate": 9.901190476190476e-06, "loss": 31.4219, "step": 16130 }, { "epoch": 384.0716417910448, "grad_norm": 27.111024856567383, "learning_rate": 9.900595238095238e-06, "loss": 31.3478, "step": 16131 }, { "epoch": 384.0955223880597, "grad_norm": 31.46786880493164, "learning_rate": 9.9e-06, "loss": 32.2751, "step": 16132 }, { "epoch": 384.1194029850746, "grad_norm": 30.0049991607666, "learning_rate": 9.899404761904764e-06, "loss": 32.7822, "step": 16133 }, { "epoch": 384.14328358208957, "grad_norm": 33.938175201416016, "learning_rate": 9.898809523809525e-06, "loss": 30.7121, "step": 16134 }, { "epoch": 384.1671641791045, "grad_norm": 28.707305908203125, "learning_rate": 9.898214285714286e-06, "loss": 30.6729, "step": 16135 }, { "epoch": 384.1910447761194, "grad_norm": 32.50811767578125, "learning_rate": 9.897619047619047e-06, "loss": 30.0735, "step": 16136 }, { "epoch": 384.21492537313435, "grad_norm": 30.479076385498047, "learning_rate": 9.89702380952381e-06, "loss": 31.5396, "step": 16137 }, { "epoch": 384.23880597014926, "grad_norm": 33.949214935302734, "learning_rate": 9.896428571428573e-06, "loss": 31.8274, "step": 16138 }, { "epoch": 384.26268656716417, "grad_norm": 26.714616775512695, "learning_rate": 9.895833333333334e-06, "loss": 31.9519, "step": 16139 }, { "epoch": 384.28656716417913, "grad_norm": 38.129310607910156, "learning_rate": 9.895238095238096e-06, "loss": 31.742, "step": 16140 }, { "epoch": 384.31044776119404, "grad_norm": 35.66838836669922, "learning_rate": 9.894642857142858e-06, "loss": 31.2153, "step": 16141 }, { "epoch": 384.33432835820895, "grad_norm": 30.687631607055664, "learning_rate": 9.89404761904762e-06, "loss": 31.3691, "step": 16142 }, { "epoch": 384.35820895522386, "grad_norm": 29.71688461303711, "learning_rate": 9.893452380952382e-06, "loss": 32.3767, "step": 16143 }, { "epoch": 384.3820895522388, "grad_norm": NaN, "learning_rate": 9.892857142857143e-06, "loss": 50.6448, "step": 16144 }, { "epoch": 384.40597014925373, "grad_norm": 31.855520248413086, "learning_rate": 9.892857142857143e-06, "loss": 30.7422, "step": 16145 }, { "epoch": 384.42985074626864, "grad_norm": 25.546953201293945, "learning_rate": 9.892261904761905e-06, "loss": 32.2336, "step": 16146 }, { "epoch": 384.4537313432836, "grad_norm": 32.33867263793945, "learning_rate": 9.891666666666667e-06, "loss": 30.6471, "step": 16147 }, { "epoch": 384.4776119402985, "grad_norm": 27.39624786376953, "learning_rate": 9.891071428571429e-06, "loss": 31.9699, "step": 16148 }, { "epoch": 384.5014925373134, "grad_norm": 34.7828369140625, "learning_rate": 9.89047619047619e-06, "loss": 30.8026, "step": 16149 }, { "epoch": 384.52537313432833, "grad_norm": 28.863527297973633, "learning_rate": 9.889880952380954e-06, "loss": 31.0636, "step": 16150 }, { "epoch": 384.5492537313433, "grad_norm": 30.425647735595703, "learning_rate": 9.889285714285714e-06, "loss": 32.5964, "step": 16151 }, { "epoch": 384.5731343283582, "grad_norm": 25.711910247802734, "learning_rate": 9.888690476190476e-06, "loss": 31.3709, "step": 16152 }, { "epoch": 384.5970149253731, "grad_norm": 30.859392166137695, "learning_rate": 9.88809523809524e-06, "loss": 32.0903, "step": 16153 }, { "epoch": 384.6208955223881, "grad_norm": 27.224456787109375, "learning_rate": 9.887500000000001e-06, "loss": 30.7084, "step": 16154 }, { "epoch": 384.644776119403, "grad_norm": 34.85042953491211, "learning_rate": 9.886904761904763e-06, "loss": 31.8646, "step": 16155 }, { "epoch": 384.6686567164179, "grad_norm": 31.036788940429688, "learning_rate": 9.886309523809523e-06, "loss": 31.217, "step": 16156 }, { "epoch": 384.6925373134328, "grad_norm": 34.45530319213867, "learning_rate": 9.885714285714287e-06, "loss": 32.0169, "step": 16157 }, { "epoch": 384.7164179104478, "grad_norm": 29.6302547454834, "learning_rate": 9.885119047619049e-06, "loss": 32.0262, "step": 16158 }, { "epoch": 384.7402985074627, "grad_norm": 27.671533584594727, "learning_rate": 9.88452380952381e-06, "loss": 31.8737, "step": 16159 }, { "epoch": 384.7641791044776, "grad_norm": 24.455589294433594, "learning_rate": 9.883928571428572e-06, "loss": 31.1658, "step": 16160 }, { "epoch": 384.78805970149256, "grad_norm": 25.79328727722168, "learning_rate": 9.883333333333334e-06, "loss": 29.9689, "step": 16161 }, { "epoch": 384.81194029850747, "grad_norm": 21.47579574584961, "learning_rate": 9.882738095238096e-06, "loss": 31.6123, "step": 16162 }, { "epoch": 384.8358208955224, "grad_norm": 28.515636444091797, "learning_rate": 9.882142857142858e-06, "loss": 30.9889, "step": 16163 }, { "epoch": 384.85970149253734, "grad_norm": 23.242191314697266, "learning_rate": 9.88154761904762e-06, "loss": 31.6707, "step": 16164 }, { "epoch": 384.88358208955225, "grad_norm": 29.00843620300293, "learning_rate": 9.880952380952381e-06, "loss": 31.4587, "step": 16165 }, { "epoch": 384.90746268656716, "grad_norm": 22.492422103881836, "learning_rate": 9.880357142857143e-06, "loss": 31.6438, "step": 16166 }, { "epoch": 384.93134328358207, "grad_norm": 27.189645767211914, "learning_rate": 9.879761904761905e-06, "loss": 31.5126, "step": 16167 }, { "epoch": 384.95522388059703, "grad_norm": 23.21908950805664, "learning_rate": 9.879166666666667e-06, "loss": 32.2981, "step": 16168 }, { "epoch": 384.97910447761194, "grad_norm": 27.132030487060547, "learning_rate": 9.87857142857143e-06, "loss": 32.1087, "step": 16169 }, { "epoch": 385.0, "grad_norm": 22.102291107177734, "learning_rate": 9.877976190476192e-06, "loss": 27.1166, "step": 16170 }, { "epoch": 385.0238805970149, "grad_norm": 23.863162994384766, "learning_rate": 9.877380952380952e-06, "loss": 31.7717, "step": 16171 }, { "epoch": 385.0477611940299, "grad_norm": 20.544803619384766, "learning_rate": 9.876785714285714e-06, "loss": 31.0464, "step": 16172 }, { "epoch": 385.0716417910448, "grad_norm": 22.5712947845459, "learning_rate": 9.876190476190478e-06, "loss": 32.1594, "step": 16173 }, { "epoch": 385.0955223880597, "grad_norm": 20.91865348815918, "learning_rate": 9.87559523809524e-06, "loss": 33.1558, "step": 16174 }, { "epoch": 385.1194029850746, "grad_norm": 21.566856384277344, "learning_rate": 9.875000000000001e-06, "loss": 31.3823, "step": 16175 }, { "epoch": 385.14328358208957, "grad_norm": 18.019092559814453, "learning_rate": 9.874404761904763e-06, "loss": 30.0196, "step": 16176 }, { "epoch": 385.1671641791045, "grad_norm": 21.332061767578125, "learning_rate": 9.873809523809525e-06, "loss": 30.5859, "step": 16177 }, { "epoch": 385.1910447761194, "grad_norm": 18.231172561645508, "learning_rate": 9.873214285714287e-06, "loss": 30.5717, "step": 16178 }, { "epoch": 385.21492537313435, "grad_norm": 20.333765029907227, "learning_rate": 9.872619047619048e-06, "loss": 31.2717, "step": 16179 }, { "epoch": 385.23880597014926, "grad_norm": NaN, "learning_rate": 9.87202380952381e-06, "loss": 50.9608, "step": 16180 }, { "epoch": 385.26268656716417, "grad_norm": 20.32228660583496, "learning_rate": 9.87202380952381e-06, "loss": 30.3185, "step": 16181 }, { "epoch": 385.28656716417913, "grad_norm": 18.662813186645508, "learning_rate": 9.871428571428572e-06, "loss": 31.3118, "step": 16182 }, { "epoch": 385.31044776119404, "grad_norm": 19.963438034057617, "learning_rate": 9.870833333333334e-06, "loss": 32.0171, "step": 16183 }, { "epoch": 385.33432835820895, "grad_norm": 20.611982345581055, "learning_rate": 9.870238095238096e-06, "loss": 31.6538, "step": 16184 }, { "epoch": 385.35820895522386, "grad_norm": 18.705074310302734, "learning_rate": 9.869642857142857e-06, "loss": 31.8616, "step": 16185 }, { "epoch": 385.3820895522388, "grad_norm": 20.165292739868164, "learning_rate": 9.869047619047621e-06, "loss": 32.7904, "step": 16186 }, { "epoch": 385.40597014925373, "grad_norm": 16.604703903198242, "learning_rate": 9.868452380952381e-06, "loss": 32.0385, "step": 16187 }, { "epoch": 385.42985074626864, "grad_norm": 19.941293716430664, "learning_rate": 9.867857142857143e-06, "loss": 31.5136, "step": 16188 }, { "epoch": 385.4537313432836, "grad_norm": 16.559743881225586, "learning_rate": 9.867261904761906e-06, "loss": 30.3442, "step": 16189 }, { "epoch": 385.4776119402985, "grad_norm": 21.531822204589844, "learning_rate": 9.866666666666668e-06, "loss": 32.8664, "step": 16190 }, { "epoch": 385.5014925373134, "grad_norm": 18.922697067260742, "learning_rate": 9.86607142857143e-06, "loss": 31.7802, "step": 16191 }, { "epoch": 385.52537313432833, "grad_norm": 18.97344207763672, "learning_rate": 9.86547619047619e-06, "loss": 31.3054, "step": 16192 }, { "epoch": 385.5492537313433, "grad_norm": 22.07037925720215, "learning_rate": 9.864880952380954e-06, "loss": 30.7271, "step": 16193 }, { "epoch": 385.5731343283582, "grad_norm": 19.429729461669922, "learning_rate": 9.864285714285715e-06, "loss": 31.6555, "step": 16194 }, { "epoch": 385.5970149253731, "grad_norm": 22.308879852294922, "learning_rate": 9.863690476190477e-06, "loss": 32.0755, "step": 16195 }, { "epoch": 385.6208955223881, "grad_norm": 20.618770599365234, "learning_rate": 9.863095238095239e-06, "loss": 31.2475, "step": 16196 }, { "epoch": 385.644776119403, "grad_norm": 18.875965118408203, "learning_rate": 9.862500000000001e-06, "loss": 32.2849, "step": 16197 }, { "epoch": 385.6686567164179, "grad_norm": 20.972700119018555, "learning_rate": 9.861904761904763e-06, "loss": 31.9794, "step": 16198 }, { "epoch": 385.6925373134328, "grad_norm": 20.304363250732422, "learning_rate": 9.861309523809524e-06, "loss": 32.1243, "step": 16199 }, { "epoch": 385.7164179104478, "grad_norm": 21.480579376220703, "learning_rate": 9.860714285714286e-06, "loss": 31.4321, "step": 16200 }, { "epoch": 385.7402985074627, "grad_norm": 19.26569175720215, "learning_rate": 9.860119047619048e-06, "loss": 31.5931, "step": 16201 }, { "epoch": 385.7641791044776, "grad_norm": 17.374101638793945, "learning_rate": 9.85952380952381e-06, "loss": 32.2479, "step": 16202 }, { "epoch": 385.78805970149256, "grad_norm": 22.569284439086914, "learning_rate": 9.858928571428572e-06, "loss": 31.0977, "step": 16203 }, { "epoch": 385.81194029850747, "grad_norm": 18.83555793762207, "learning_rate": 9.858333333333334e-06, "loss": 31.0797, "step": 16204 }, { "epoch": 385.8358208955224, "grad_norm": 18.740907669067383, "learning_rate": 9.857738095238097e-06, "loss": 30.9604, "step": 16205 }, { "epoch": 385.85970149253734, "grad_norm": 18.844552993774414, "learning_rate": 9.857142857142859e-06, "loss": 30.92, "step": 16206 }, { "epoch": 385.88358208955225, "grad_norm": 19.82196044921875, "learning_rate": 9.856547619047619e-06, "loss": 30.9999, "step": 16207 }, { "epoch": 385.90746268656716, "grad_norm": 17.69489860534668, "learning_rate": 9.85595238095238e-06, "loss": 32.1222, "step": 16208 }, { "epoch": 385.93134328358207, "grad_norm": 20.43714141845703, "learning_rate": 9.855357142857144e-06, "loss": 30.7134, "step": 16209 }, { "epoch": 385.95522388059703, "grad_norm": 19.41302490234375, "learning_rate": 9.854761904761906e-06, "loss": 32.0069, "step": 16210 }, { "epoch": 385.97910447761194, "grad_norm": 22.72101402282715, "learning_rate": 9.854166666666668e-06, "loss": 30.8264, "step": 16211 }, { "epoch": 386.0, "grad_norm": 15.351430892944336, "learning_rate": 9.85357142857143e-06, "loss": 27.5503, "step": 16212 }, { "epoch": 386.0238805970149, "grad_norm": 21.436670303344727, "learning_rate": 9.852976190476192e-06, "loss": 32.3433, "step": 16213 }, { "epoch": 386.0477611940299, "grad_norm": 16.964204788208008, "learning_rate": 9.852380952380953e-06, "loss": 29.4095, "step": 16214 }, { "epoch": 386.0716417910448, "grad_norm": 23.388107299804688, "learning_rate": 9.851785714285715e-06, "loss": 31.237, "step": 16215 }, { "epoch": 386.0955223880597, "grad_norm": 16.503149032592773, "learning_rate": 9.851190476190477e-06, "loss": 31.8172, "step": 16216 }, { "epoch": 386.1194029850746, "grad_norm": 24.22135353088379, "learning_rate": 9.850595238095239e-06, "loss": 29.8872, "step": 16217 }, { "epoch": 386.14328358208957, "grad_norm": 21.456871032714844, "learning_rate": 9.85e-06, "loss": 32.8561, "step": 16218 }, { "epoch": 386.1671641791045, "grad_norm": 20.144268035888672, "learning_rate": 9.849404761904762e-06, "loss": 32.1675, "step": 16219 }, { "epoch": 386.1910447761194, "grad_norm": 20.185773849487305, "learning_rate": 9.848809523809524e-06, "loss": 30.7105, "step": 16220 }, { "epoch": 386.21492537313435, "grad_norm": 18.498830795288086, "learning_rate": 9.848214285714288e-06, "loss": 31.1193, "step": 16221 }, { "epoch": 386.23880597014926, "grad_norm": 22.099266052246094, "learning_rate": 9.847619047619048e-06, "loss": 31.4756, "step": 16222 }, { "epoch": 386.26268656716417, "grad_norm": 20.17631721496582, "learning_rate": 9.84702380952381e-06, "loss": 31.4381, "step": 16223 }, { "epoch": 386.28656716417913, "grad_norm": 18.7167911529541, "learning_rate": 9.846428571428573e-06, "loss": 31.3872, "step": 16224 }, { "epoch": 386.31044776119404, "grad_norm": 18.53969383239746, "learning_rate": 9.845833333333335e-06, "loss": 31.404, "step": 16225 }, { "epoch": 386.33432835820895, "grad_norm": 19.445709228515625, "learning_rate": 9.845238095238097e-06, "loss": 29.5865, "step": 16226 }, { "epoch": 386.35820895522386, "grad_norm": 20.605234146118164, "learning_rate": 9.844642857142857e-06, "loss": 31.987, "step": 16227 }, { "epoch": 386.3820895522388, "grad_norm": 20.468053817749023, "learning_rate": 9.84404761904762e-06, "loss": 31.9659, "step": 16228 }, { "epoch": 386.40597014925373, "grad_norm": 21.1354923248291, "learning_rate": 9.843452380952382e-06, "loss": 31.8625, "step": 16229 }, { "epoch": 386.42985074626864, "grad_norm": 20.171497344970703, "learning_rate": 9.842857142857144e-06, "loss": 31.3537, "step": 16230 }, { "epoch": 386.4537313432836, "grad_norm": 25.776453018188477, "learning_rate": 9.842261904761906e-06, "loss": 31.3447, "step": 16231 }, { "epoch": 386.4776119402985, "grad_norm": 19.82379722595215, "learning_rate": 9.841666666666668e-06, "loss": 31.3169, "step": 16232 }, { "epoch": 386.5014925373134, "grad_norm": 23.489431381225586, "learning_rate": 9.84107142857143e-06, "loss": 31.9554, "step": 16233 }, { "epoch": 386.52537313432833, "grad_norm": 30.809371948242188, "learning_rate": 9.840476190476191e-06, "loss": 31.2285, "step": 16234 }, { "epoch": 386.5492537313433, "grad_norm": 22.05644989013672, "learning_rate": 9.839880952380953e-06, "loss": 32.0698, "step": 16235 }, { "epoch": 386.5731343283582, "grad_norm": 33.66168212890625, "learning_rate": 9.839285714285715e-06, "loss": 30.8832, "step": 16236 }, { "epoch": 386.5970149253731, "grad_norm": 25.675928115844727, "learning_rate": 9.838690476190477e-06, "loss": 31.5365, "step": 16237 }, { "epoch": 386.6208955223881, "grad_norm": 34.02192687988281, "learning_rate": 9.838095238095238e-06, "loss": 31.6749, "step": 16238 }, { "epoch": 386.644776119403, "grad_norm": 22.996620178222656, "learning_rate": 9.8375e-06, "loss": 31.2574, "step": 16239 }, { "epoch": 386.6686567164179, "grad_norm": 38.00943374633789, "learning_rate": 9.836904761904764e-06, "loss": 31.8429, "step": 16240 }, { "epoch": 386.6925373134328, "grad_norm": 26.652149200439453, "learning_rate": 9.836309523809524e-06, "loss": 32.4164, "step": 16241 }, { "epoch": 386.7164179104478, "grad_norm": 40.26640319824219, "learning_rate": 9.835714285714286e-06, "loss": 32.0015, "step": 16242 }, { "epoch": 386.7402985074627, "grad_norm": 30.79282569885254, "learning_rate": 9.83511904761905e-06, "loss": 31.1453, "step": 16243 }, { "epoch": 386.7641791044776, "grad_norm": 35.20528793334961, "learning_rate": 9.834523809523811e-06, "loss": 32.5224, "step": 16244 }, { "epoch": 386.78805970149256, "grad_norm": 30.823259353637695, "learning_rate": 9.833928571428573e-06, "loss": 31.7743, "step": 16245 }, { "epoch": 386.81194029850747, "grad_norm": 39.33624267578125, "learning_rate": 9.833333333333333e-06, "loss": 30.8932, "step": 16246 }, { "epoch": 386.8358208955224, "grad_norm": 31.379451751708984, "learning_rate": 9.832738095238096e-06, "loss": 31.0847, "step": 16247 }, { "epoch": 386.85970149253734, "grad_norm": 38.356510162353516, "learning_rate": 9.832142857142858e-06, "loss": 31.1917, "step": 16248 }, { "epoch": 386.88358208955225, "grad_norm": 35.24120330810547, "learning_rate": 9.83154761904762e-06, "loss": 31.9965, "step": 16249 }, { "epoch": 386.90746268656716, "grad_norm": 30.99354362487793, "learning_rate": 9.830952380952382e-06, "loss": 31.9035, "step": 16250 }, { "epoch": 386.93134328358207, "grad_norm": 33.42157745361328, "learning_rate": 9.830357142857144e-06, "loss": 31.9036, "step": 16251 }, { "epoch": 386.95522388059703, "grad_norm": 34.370689392089844, "learning_rate": 9.829761904761905e-06, "loss": 31.5059, "step": 16252 }, { "epoch": 386.97910447761194, "grad_norm": 26.31012725830078, "learning_rate": 9.829166666666667e-06, "loss": 31.8671, "step": 16253 }, { "epoch": 387.0, "grad_norm": 35.351680755615234, "learning_rate": 9.828571428571429e-06, "loss": 26.3941, "step": 16254 }, { "epoch": 387.0238805970149, "grad_norm": 36.14506530761719, "learning_rate": 9.827976190476191e-06, "loss": 32.1555, "step": 16255 }, { "epoch": 387.0477611940299, "grad_norm": 32.076114654541016, "learning_rate": 9.827380952380953e-06, "loss": 31.2854, "step": 16256 }, { "epoch": 387.0716417910448, "grad_norm": NaN, "learning_rate": 9.826785714285715e-06, "loss": 32.6211, "step": 16257 }, { "epoch": 387.0955223880597, "grad_norm": 33.81434631347656, "learning_rate": 9.826785714285715e-06, "loss": 31.3886, "step": 16258 }, { "epoch": 387.1194029850746, "grad_norm": 32.51563262939453, "learning_rate": 9.826190476190476e-06, "loss": 31.6933, "step": 16259 }, { "epoch": 387.14328358208957, "grad_norm": 27.100536346435547, "learning_rate": 9.82559523809524e-06, "loss": 31.3381, "step": 16260 }, { "epoch": 387.1671641791045, "grad_norm": 32.19075012207031, "learning_rate": 9.825000000000002e-06, "loss": 31.0665, "step": 16261 }, { "epoch": 387.1910447761194, "grad_norm": 29.4110050201416, "learning_rate": 9.824404761904762e-06, "loss": 31.431, "step": 16262 }, { "epoch": 387.21492537313435, "grad_norm": 34.70882797241211, "learning_rate": 9.823809523809524e-06, "loss": 32.1861, "step": 16263 }, { "epoch": 387.23880597014926, "grad_norm": 30.694459915161133, "learning_rate": 9.823214285714287e-06, "loss": 31.6779, "step": 16264 }, { "epoch": 387.26268656716417, "grad_norm": 33.5014533996582, "learning_rate": 9.822619047619049e-06, "loss": 31.0733, "step": 16265 }, { "epoch": 387.28656716417913, "grad_norm": 29.649723052978516, "learning_rate": 9.82202380952381e-06, "loss": 31.7184, "step": 16266 }, { "epoch": 387.31044776119404, "grad_norm": 33.55296325683594, "learning_rate": 9.821428571428573e-06, "loss": 31.9725, "step": 16267 }, { "epoch": 387.33432835820895, "grad_norm": 32.031715393066406, "learning_rate": 9.820833333333334e-06, "loss": 31.194, "step": 16268 }, { "epoch": 387.35820895522386, "grad_norm": 38.761688232421875, "learning_rate": 9.820238095238096e-06, "loss": 31.4994, "step": 16269 }, { "epoch": 387.3820895522388, "grad_norm": 37.0084342956543, "learning_rate": 9.819642857142858e-06, "loss": 32.1369, "step": 16270 }, { "epoch": 387.40597014925373, "grad_norm": 30.358522415161133, "learning_rate": 9.81904761904762e-06, "loss": 30.7688, "step": 16271 }, { "epoch": 387.42985074626864, "grad_norm": 24.442169189453125, "learning_rate": 9.818452380952382e-06, "loss": 30.1754, "step": 16272 }, { "epoch": 387.4537313432836, "grad_norm": 35.01533508300781, "learning_rate": 9.817857142857143e-06, "loss": 30.5212, "step": 16273 }, { "epoch": 387.4776119402985, "grad_norm": 32.43381118774414, "learning_rate": 9.817261904761905e-06, "loss": 32.2911, "step": 16274 }, { "epoch": 387.5014925373134, "grad_norm": 35.44935607910156, "learning_rate": 9.816666666666667e-06, "loss": 31.2107, "step": 16275 }, { "epoch": 387.52537313432833, "grad_norm": 35.55957794189453, "learning_rate": 9.81607142857143e-06, "loss": 32.334, "step": 16276 }, { "epoch": 387.5492537313433, "grad_norm": 29.662111282348633, "learning_rate": 9.81547619047619e-06, "loss": 31.8393, "step": 16277 }, { "epoch": 387.5731343283582, "grad_norm": 24.669198989868164, "learning_rate": 9.814880952380952e-06, "loss": 30.5643, "step": 16278 }, { "epoch": 387.5970149253731, "grad_norm": 33.51001739501953, "learning_rate": 9.814285714285716e-06, "loss": 30.8368, "step": 16279 }, { "epoch": 387.6208955223881, "grad_norm": 26.814899444580078, "learning_rate": 9.813690476190478e-06, "loss": 31.9893, "step": 16280 }, { "epoch": 387.644776119403, "grad_norm": 34.983394622802734, "learning_rate": 9.81309523809524e-06, "loss": 30.6433, "step": 16281 }, { "epoch": 387.6686567164179, "grad_norm": 30.86996841430664, "learning_rate": 9.8125e-06, "loss": 30.4695, "step": 16282 }, { "epoch": 387.6925373134328, "grad_norm": 33.628150939941406, "learning_rate": 9.811904761904763e-06, "loss": 31.8074, "step": 16283 }, { "epoch": 387.7164179104478, "grad_norm": 29.482221603393555, "learning_rate": 9.811309523809525e-06, "loss": 30.9069, "step": 16284 }, { "epoch": 387.7402985074627, "grad_norm": 34.709224700927734, "learning_rate": 9.810714285714287e-06, "loss": 30.5928, "step": 16285 }, { "epoch": 387.7641791044776, "grad_norm": 34.80800247192383, "learning_rate": 9.810119047619049e-06, "loss": 31.21, "step": 16286 }, { "epoch": 387.78805970149256, "grad_norm": 31.94898223876953, "learning_rate": 9.80952380952381e-06, "loss": 31.9303, "step": 16287 }, { "epoch": 387.81194029850747, "grad_norm": 32.53268051147461, "learning_rate": 9.808928571428572e-06, "loss": 32.5506, "step": 16288 }, { "epoch": 387.8358208955224, "grad_norm": 28.11086082458496, "learning_rate": 9.808333333333334e-06, "loss": 31.3513, "step": 16289 }, { "epoch": 387.85970149253734, "grad_norm": 23.117406845092773, "learning_rate": 9.807738095238096e-06, "loss": 30.733, "step": 16290 }, { "epoch": 387.88358208955225, "grad_norm": 34.6082649230957, "learning_rate": 9.807142857142858e-06, "loss": 30.7887, "step": 16291 }, { "epoch": 387.90746268656716, "grad_norm": 27.700349807739258, "learning_rate": 9.80654761904762e-06, "loss": 31.7152, "step": 16292 }, { "epoch": 387.93134328358207, "grad_norm": 39.212520599365234, "learning_rate": 9.805952380952381e-06, "loss": 31.4239, "step": 16293 }, { "epoch": 387.95522388059703, "grad_norm": 38.54717254638672, "learning_rate": 9.805357142857143e-06, "loss": 31.5663, "step": 16294 }, { "epoch": 387.97910447761194, "grad_norm": 28.900754928588867, "learning_rate": 9.804761904761907e-06, "loss": 31.3817, "step": 16295 }, { "epoch": 388.0, "grad_norm": 25.779775619506836, "learning_rate": 9.804166666666668e-06, "loss": 26.816, "step": 16296 }, { "epoch": 388.0238805970149, "grad_norm": 31.25226402282715, "learning_rate": 9.803571428571428e-06, "loss": 31.8264, "step": 16297 }, { "epoch": 388.0477611940299, "grad_norm": 25.336042404174805, "learning_rate": 9.80297619047619e-06, "loss": 30.7403, "step": 16298 }, { "epoch": 388.0716417910448, "grad_norm": 36.018619537353516, "learning_rate": 9.802380952380954e-06, "loss": 30.9437, "step": 16299 }, { "epoch": 388.0955223880597, "grad_norm": 28.848451614379883, "learning_rate": 9.801785714285716e-06, "loss": 31.3893, "step": 16300 }, { "epoch": 388.1194029850746, "grad_norm": 33.30680465698242, "learning_rate": 9.801190476190477e-06, "loss": 30.7243, "step": 16301 }, { "epoch": 388.14328358208957, "grad_norm": 28.282001495361328, "learning_rate": 9.80059523809524e-06, "loss": 29.7735, "step": 16302 }, { "epoch": 388.1671641791045, "grad_norm": 32.32350540161133, "learning_rate": 9.800000000000001e-06, "loss": 30.4583, "step": 16303 }, { "epoch": 388.1910447761194, "grad_norm": 28.372379302978516, "learning_rate": 9.799404761904763e-06, "loss": 31.7407, "step": 16304 }, { "epoch": 388.21492537313435, "grad_norm": 33.158390045166016, "learning_rate": 9.798809523809525e-06, "loss": 32.5392, "step": 16305 }, { "epoch": 388.23880597014926, "grad_norm": 27.013521194458008, "learning_rate": 9.798214285714286e-06, "loss": 30.975, "step": 16306 }, { "epoch": 388.26268656716417, "grad_norm": 33.014801025390625, "learning_rate": 9.797619047619048e-06, "loss": 30.342, "step": 16307 }, { "epoch": 388.28656716417913, "grad_norm": 28.111276626586914, "learning_rate": 9.79702380952381e-06, "loss": 30.6587, "step": 16308 }, { "epoch": 388.31044776119404, "grad_norm": 29.60170555114746, "learning_rate": 9.796428571428572e-06, "loss": 30.9499, "step": 16309 }, { "epoch": 388.33432835820895, "grad_norm": 25.55117416381836, "learning_rate": 9.795833333333334e-06, "loss": 29.306, "step": 16310 }, { "epoch": 388.35820895522386, "grad_norm": 32.451480865478516, "learning_rate": 9.795238095238097e-06, "loss": 32.0626, "step": 16311 }, { "epoch": 388.3820895522388, "grad_norm": 25.698986053466797, "learning_rate": 9.794642857142857e-06, "loss": 30.864, "step": 16312 }, { "epoch": 388.40597014925373, "grad_norm": 32.51515197753906, "learning_rate": 9.794047619047619e-06, "loss": 31.2816, "step": 16313 }, { "epoch": 388.42985074626864, "grad_norm": 30.733959197998047, "learning_rate": 9.793452380952383e-06, "loss": 32.5379, "step": 16314 }, { "epoch": 388.4537313432836, "grad_norm": 34.93669128417969, "learning_rate": 9.792857142857144e-06, "loss": 32.5641, "step": 16315 }, { "epoch": 388.4776119402985, "grad_norm": 33.55181121826172, "learning_rate": 9.792261904761906e-06, "loss": 32.4263, "step": 16316 }, { "epoch": 388.5014925373134, "grad_norm": 30.82851219177246, "learning_rate": 9.791666666666666e-06, "loss": 31.0749, "step": 16317 }, { "epoch": 388.52537313432833, "grad_norm": 27.4837646484375, "learning_rate": 9.79107142857143e-06, "loss": 31.9993, "step": 16318 }, { "epoch": 388.5492537313433, "grad_norm": 28.8560791015625, "learning_rate": 9.790476190476192e-06, "loss": 32.1957, "step": 16319 }, { "epoch": 388.5731343283582, "grad_norm": 26.107858657836914, "learning_rate": 9.789880952380953e-06, "loss": 31.3664, "step": 16320 }, { "epoch": 388.5970149253731, "grad_norm": 28.55649757385254, "learning_rate": 9.789285714285715e-06, "loss": 31.81, "step": 16321 }, { "epoch": 388.6208955223881, "grad_norm": 23.187219619750977, "learning_rate": 9.788690476190477e-06, "loss": 31.0063, "step": 16322 }, { "epoch": 388.644776119403, "grad_norm": 33.165069580078125, "learning_rate": 9.788095238095239e-06, "loss": 31.5465, "step": 16323 }, { "epoch": 388.6686567164179, "grad_norm": 24.102766036987305, "learning_rate": 9.7875e-06, "loss": 30.2347, "step": 16324 }, { "epoch": 388.6925373134328, "grad_norm": 29.791168212890625, "learning_rate": 9.786904761904763e-06, "loss": 30.8982, "step": 16325 }, { "epoch": 388.7164179104478, "grad_norm": 24.17734146118164, "learning_rate": 9.786309523809524e-06, "loss": 31.4424, "step": 16326 }, { "epoch": 388.7402985074627, "grad_norm": 27.78852081298828, "learning_rate": 9.785714285714286e-06, "loss": 31.2909, "step": 16327 }, { "epoch": 388.7641791044776, "grad_norm": 25.768030166625977, "learning_rate": 9.785119047619048e-06, "loss": 32.6746, "step": 16328 }, { "epoch": 388.78805970149256, "grad_norm": 27.267898559570312, "learning_rate": 9.78452380952381e-06, "loss": 31.4876, "step": 16329 }, { "epoch": 388.81194029850747, "grad_norm": 19.101016998291016, "learning_rate": 9.783928571428573e-06, "loss": 32.2565, "step": 16330 }, { "epoch": 388.8358208955224, "grad_norm": 28.757253646850586, "learning_rate": 9.783333333333335e-06, "loss": 32.2536, "step": 16331 }, { "epoch": 388.85970149253734, "grad_norm": 22.96048927307129, "learning_rate": 9.782738095238095e-06, "loss": 31.7659, "step": 16332 }, { "epoch": 388.88358208955225, "grad_norm": 26.02690315246582, "learning_rate": 9.782142857142857e-06, "loss": 31.5862, "step": 16333 }, { "epoch": 388.90746268656716, "grad_norm": 21.96470832824707, "learning_rate": 9.78154761904762e-06, "loss": 31.8278, "step": 16334 }, { "epoch": 388.93134328358207, "grad_norm": 26.473949432373047, "learning_rate": 9.780952380952382e-06, "loss": 30.8082, "step": 16335 }, { "epoch": 388.95522388059703, "grad_norm": 24.845199584960938, "learning_rate": 9.780357142857142e-06, "loss": 31.9927, "step": 16336 }, { "epoch": 388.97910447761194, "grad_norm": 21.908987045288086, "learning_rate": 9.779761904761906e-06, "loss": 31.3995, "step": 16337 }, { "epoch": 389.0, "grad_norm": 21.260526657104492, "learning_rate": 9.779166666666668e-06, "loss": 27.1762, "step": 16338 }, { "epoch": 389.0238805970149, "grad_norm": 23.290285110473633, "learning_rate": 9.77857142857143e-06, "loss": 32.5599, "step": 16339 }, { "epoch": 389.0477611940299, "grad_norm": 20.371511459350586, "learning_rate": 9.777976190476191e-06, "loss": 31.3901, "step": 16340 }, { "epoch": 389.0716417910448, "grad_norm": 24.474977493286133, "learning_rate": 9.777380952380953e-06, "loss": 30.5494, "step": 16341 }, { "epoch": 389.0955223880597, "grad_norm": 23.1691951751709, "learning_rate": 9.776785714285715e-06, "loss": 30.411, "step": 16342 }, { "epoch": 389.1194029850746, "grad_norm": 18.204683303833008, "learning_rate": 9.776190476190477e-06, "loss": 31.3261, "step": 16343 }, { "epoch": 389.14328358208957, "grad_norm": 21.73653793334961, "learning_rate": 9.775595238095239e-06, "loss": 31.2931, "step": 16344 }, { "epoch": 389.1671641791045, "grad_norm": 19.842016220092773, "learning_rate": 9.775e-06, "loss": 31.1381, "step": 16345 }, { "epoch": 389.1910447761194, "grad_norm": NaN, "learning_rate": 9.774404761904762e-06, "loss": 49.7906, "step": 16346 }, { "epoch": 389.21492537313435, "grad_norm": 21.1848201751709, "learning_rate": 9.774404761904762e-06, "loss": 30.2326, "step": 16347 }, { "epoch": 389.23880597014926, "grad_norm": 19.22425079345703, "learning_rate": 9.773809523809524e-06, "loss": 31.0529, "step": 16348 }, { "epoch": 389.26268656716417, "grad_norm": 17.01042366027832, "learning_rate": 9.773214285714286e-06, "loss": 31.4719, "step": 16349 }, { "epoch": 389.28656716417913, "grad_norm": 29.845762252807617, "learning_rate": 9.77261904761905e-06, "loss": 30.655, "step": 16350 }, { "epoch": 389.31044776119404, "grad_norm": 22.23818588256836, "learning_rate": 9.772023809523811e-06, "loss": 31.7885, "step": 16351 }, { "epoch": 389.33432835820895, "grad_norm": 27.320762634277344, "learning_rate": 9.771428571428571e-06, "loss": 30.5316, "step": 16352 }, { "epoch": 389.35820895522386, "grad_norm": 26.989601135253906, "learning_rate": 9.770833333333333e-06, "loss": 31.1129, "step": 16353 }, { "epoch": 389.3820895522388, "grad_norm": 21.75162696838379, "learning_rate": 9.770238095238097e-06, "loss": 31.9033, "step": 16354 }, { "epoch": 389.40597014925373, "grad_norm": 25.920270919799805, "learning_rate": 9.769642857142858e-06, "loss": 33.0053, "step": 16355 }, { "epoch": 389.42985074626864, "grad_norm": 21.13802719116211, "learning_rate": 9.76904761904762e-06, "loss": 32.3808, "step": 16356 }, { "epoch": 389.4537313432836, "grad_norm": 20.37413215637207, "learning_rate": 9.768452380952382e-06, "loss": 30.5768, "step": 16357 }, { "epoch": 389.4776119402985, "grad_norm": 26.006126403808594, "learning_rate": 9.767857142857144e-06, "loss": 32.0527, "step": 16358 }, { "epoch": 389.5014925373134, "grad_norm": 19.146987915039062, "learning_rate": 9.767261904761906e-06, "loss": 30.2903, "step": 16359 }, { "epoch": 389.52537313432833, "grad_norm": 25.838464736938477, "learning_rate": 9.766666666666667e-06, "loss": 31.661, "step": 16360 }, { "epoch": 389.5492537313433, "grad_norm": 23.29790496826172, "learning_rate": 9.76607142857143e-06, "loss": 32.0736, "step": 16361 }, { "epoch": 389.5731343283582, "grad_norm": 17.556703567504883, "learning_rate": 9.765476190476191e-06, "loss": 32.0744, "step": 16362 }, { "epoch": 389.5970149253731, "grad_norm": 26.709442138671875, "learning_rate": 9.764880952380953e-06, "loss": 29.9062, "step": 16363 }, { "epoch": 389.6208955223881, "grad_norm": 20.65360450744629, "learning_rate": 9.764285714285715e-06, "loss": 32.6888, "step": 16364 }, { "epoch": 389.644776119403, "grad_norm": 21.492149353027344, "learning_rate": 9.763690476190477e-06, "loss": 31.8798, "step": 16365 }, { "epoch": 389.6686567164179, "grad_norm": 24.044824600219727, "learning_rate": 9.76309523809524e-06, "loss": 31.6606, "step": 16366 }, { "epoch": 389.6925373134328, "grad_norm": 21.279258728027344, "learning_rate": 9.7625e-06, "loss": 31.6572, "step": 16367 }, { "epoch": 389.7164179104478, "grad_norm": 19.301111221313477, "learning_rate": 9.761904761904762e-06, "loss": 30.2098, "step": 16368 }, { "epoch": 389.7402985074627, "grad_norm": 27.06990623474121, "learning_rate": 9.761309523809524e-06, "loss": 30.784, "step": 16369 }, { "epoch": 389.7641791044776, "grad_norm": 17.46279525756836, "learning_rate": 9.760714285714287e-06, "loss": 29.7866, "step": 16370 }, { "epoch": 389.78805970149256, "grad_norm": 35.74827575683594, "learning_rate": 9.760119047619049e-06, "loss": 31.9352, "step": 16371 }, { "epoch": 389.81194029850747, "grad_norm": 20.53690528869629, "learning_rate": 9.75952380952381e-06, "loss": 30.6821, "step": 16372 }, { "epoch": 389.8358208955224, "grad_norm": 34.603843688964844, "learning_rate": 9.758928571428573e-06, "loss": 31.6675, "step": 16373 }, { "epoch": 389.85970149253734, "grad_norm": 25.659683227539062, "learning_rate": 9.758333333333334e-06, "loss": 31.1865, "step": 16374 }, { "epoch": 389.88358208955225, "grad_norm": 37.89887619018555, "learning_rate": 9.757738095238096e-06, "loss": 31.7436, "step": 16375 }, { "epoch": 389.90746268656716, "grad_norm": 31.363710403442383, "learning_rate": 9.757142857142858e-06, "loss": 33.2901, "step": 16376 }, { "epoch": 389.93134328358207, "grad_norm": 33.875370025634766, "learning_rate": 9.75654761904762e-06, "loss": 31.0055, "step": 16377 }, { "epoch": 389.95522388059703, "grad_norm": 31.001718521118164, "learning_rate": 9.755952380952382e-06, "loss": 33.0547, "step": 16378 }, { "epoch": 389.97910447761194, "grad_norm": 35.636051177978516, "learning_rate": 9.755357142857144e-06, "loss": 31.671, "step": 16379 }, { "epoch": 390.0, "grad_norm": 22.893800735473633, "learning_rate": 9.754761904761905e-06, "loss": 26.8595, "step": 16380 }, { "epoch": 390.0238805970149, "grad_norm": 29.35207748413086, "learning_rate": 9.754166666666667e-06, "loss": 29.6748, "step": 16381 }, { "epoch": 390.0477611940299, "grad_norm": 25.894886016845703, "learning_rate": 9.753571428571429e-06, "loss": 32.133, "step": 16382 }, { "epoch": 390.0716417910448, "grad_norm": 33.5870475769043, "learning_rate": 9.75297619047619e-06, "loss": 31.5938, "step": 16383 }, { "epoch": 390.0955223880597, "grad_norm": 26.548860549926758, "learning_rate": 9.752380952380953e-06, "loss": 30.8034, "step": 16384 }, { "epoch": 390.1194029850746, "grad_norm": 35.63127899169922, "learning_rate": 9.751785714285716e-06, "loss": 31.7315, "step": 16385 }, { "epoch": 390.14328358208957, "grad_norm": 28.513118743896484, "learning_rate": 9.751190476190478e-06, "loss": 31.5308, "step": 16386 }, { "epoch": 390.1671641791045, "grad_norm": 35.640899658203125, "learning_rate": 9.750595238095238e-06, "loss": 31.6477, "step": 16387 }, { "epoch": 390.1910447761194, "grad_norm": 26.226648330688477, "learning_rate": 9.75e-06, "loss": 30.4413, "step": 16388 }, { "epoch": 390.21492537313435, "grad_norm": 37.20478820800781, "learning_rate": 9.749404761904763e-06, "loss": 31.3381, "step": 16389 }, { "epoch": 390.23880597014926, "grad_norm": 29.90780258178711, "learning_rate": 9.748809523809525e-06, "loss": 30.9246, "step": 16390 }, { "epoch": 390.26268656716417, "grad_norm": 32.69132614135742, "learning_rate": 9.748214285714287e-06, "loss": 31.653, "step": 16391 }, { "epoch": 390.28656716417913, "grad_norm": 27.816987991333008, "learning_rate": 9.747619047619049e-06, "loss": 31.411, "step": 16392 }, { "epoch": 390.31044776119404, "grad_norm": 29.577363967895508, "learning_rate": 9.74702380952381e-06, "loss": 31.654, "step": 16393 }, { "epoch": 390.33432835820895, "grad_norm": 22.633867263793945, "learning_rate": 9.746428571428572e-06, "loss": 31.4019, "step": 16394 }, { "epoch": 390.35820895522386, "grad_norm": 39.5258903503418, "learning_rate": 9.745833333333334e-06, "loss": 29.8451, "step": 16395 }, { "epoch": 390.3820895522388, "grad_norm": 32.41792297363281, "learning_rate": 9.745238095238096e-06, "loss": 30.2906, "step": 16396 }, { "epoch": 390.40597014925373, "grad_norm": 37.13993835449219, "learning_rate": 9.744642857142858e-06, "loss": 30.4198, "step": 16397 }, { "epoch": 390.42985074626864, "grad_norm": 38.53489685058594, "learning_rate": 9.74404761904762e-06, "loss": 31.6754, "step": 16398 }, { "epoch": 390.4537313432836, "grad_norm": 27.62884521484375, "learning_rate": 9.743452380952381e-06, "loss": 31.3364, "step": 16399 }, { "epoch": 390.4776119402985, "grad_norm": 31.210311889648438, "learning_rate": 9.742857142857143e-06, "loss": 31.2983, "step": 16400 }, { "epoch": 390.5014925373134, "grad_norm": 29.904752731323242, "learning_rate": 9.742261904761907e-06, "loss": 32.132, "step": 16401 }, { "epoch": 390.52537313432833, "grad_norm": 25.746906280517578, "learning_rate": 9.741666666666667e-06, "loss": 31.2874, "step": 16402 }, { "epoch": 390.5492537313433, "grad_norm": 30.04813575744629, "learning_rate": 9.741071428571429e-06, "loss": 30.7112, "step": 16403 }, { "epoch": 390.5731343283582, "grad_norm": 24.54204750061035, "learning_rate": 9.74047619047619e-06, "loss": 32.5849, "step": 16404 }, { "epoch": 390.5970149253731, "grad_norm": 33.865020751953125, "learning_rate": 9.739880952380954e-06, "loss": 31.1069, "step": 16405 }, { "epoch": 390.6208955223881, "grad_norm": 30.01352882385254, "learning_rate": 9.739285714285716e-06, "loss": 32.3905, "step": 16406 }, { "epoch": 390.644776119403, "grad_norm": 34.7811393737793, "learning_rate": 9.738690476190476e-06, "loss": 30.8243, "step": 16407 }, { "epoch": 390.6686567164179, "grad_norm": 29.043743133544922, "learning_rate": 9.73809523809524e-06, "loss": 32.4975, "step": 16408 }, { "epoch": 390.6925373134328, "grad_norm": 30.478422164916992, "learning_rate": 9.737500000000001e-06, "loss": 31.3406, "step": 16409 }, { "epoch": 390.7164179104478, "grad_norm": 25.178617477416992, "learning_rate": 9.736904761904763e-06, "loss": 30.8249, "step": 16410 }, { "epoch": 390.7402985074627, "grad_norm": 30.27109146118164, "learning_rate": 9.736309523809525e-06, "loss": 32.6285, "step": 16411 }, { "epoch": 390.7641791044776, "grad_norm": 27.961475372314453, "learning_rate": 9.735714285714287e-06, "loss": 31.5123, "step": 16412 }, { "epoch": 390.78805970149256, "grad_norm": 30.353574752807617, "learning_rate": 9.735119047619048e-06, "loss": 31.6004, "step": 16413 }, { "epoch": 390.81194029850747, "grad_norm": 27.45003318786621, "learning_rate": 9.73452380952381e-06, "loss": 31.9344, "step": 16414 }, { "epoch": 390.8358208955224, "grad_norm": 30.07430648803711, "learning_rate": 9.733928571428572e-06, "loss": 32.1091, "step": 16415 }, { "epoch": 390.85970149253734, "grad_norm": 26.37547492980957, "learning_rate": 9.733333333333334e-06, "loss": 31.8889, "step": 16416 }, { "epoch": 390.88358208955225, "grad_norm": 30.38176155090332, "learning_rate": 9.732738095238096e-06, "loss": 30.271, "step": 16417 }, { "epoch": 390.90746268656716, "grad_norm": 27.770626068115234, "learning_rate": 9.732142857142858e-06, "loss": 31.9441, "step": 16418 }, { "epoch": 390.93134328358207, "grad_norm": 32.194908142089844, "learning_rate": 9.73154761904762e-06, "loss": 31.8723, "step": 16419 }, { "epoch": 390.95522388059703, "grad_norm": 28.570674896240234, "learning_rate": 9.730952380952383e-06, "loss": 31.6136, "step": 16420 }, { "epoch": 390.97910447761194, "grad_norm": 26.941953659057617, "learning_rate": 9.730357142857145e-06, "loss": 30.484, "step": 16421 }, { "epoch": 391.0, "grad_norm": 23.557344436645508, "learning_rate": 9.729761904761905e-06, "loss": 25.403, "step": 16422 }, { "epoch": 391.0238805970149, "grad_norm": 29.310739517211914, "learning_rate": 9.729166666666667e-06, "loss": 31.4626, "step": 16423 }, { "epoch": 391.0477611940299, "grad_norm": 24.535303115844727, "learning_rate": 9.72857142857143e-06, "loss": 31.0161, "step": 16424 }, { "epoch": 391.0716417910448, "grad_norm": 27.39189338684082, "learning_rate": 9.727976190476192e-06, "loss": 32.2284, "step": 16425 }, { "epoch": 391.0955223880597, "grad_norm": 20.453712463378906, "learning_rate": 9.727380952380954e-06, "loss": 31.7956, "step": 16426 }, { "epoch": 391.1194029850746, "grad_norm": NaN, "learning_rate": 9.726785714285715e-06, "loss": 51.877, "step": 16427 }, { "epoch": 391.14328358208957, "grad_norm": 26.24725914001465, "learning_rate": 9.726785714285715e-06, "loss": 30.8665, "step": 16428 }, { "epoch": 391.1671641791045, "grad_norm": 21.113723754882812, "learning_rate": 9.726190476190477e-06, "loss": 30.9764, "step": 16429 }, { "epoch": 391.1910447761194, "grad_norm": 25.212806701660156, "learning_rate": 9.725595238095239e-06, "loss": 31.2019, "step": 16430 }, { "epoch": 391.21492537313435, "grad_norm": 22.409730911254883, "learning_rate": 9.725000000000001e-06, "loss": 31.8457, "step": 16431 }, { "epoch": 391.23880597014926, "grad_norm": 24.708051681518555, "learning_rate": 9.724404761904763e-06, "loss": 32.0696, "step": 16432 }, { "epoch": 391.26268656716417, "grad_norm": 22.0627384185791, "learning_rate": 9.723809523809525e-06, "loss": 31.355, "step": 16433 }, { "epoch": 391.28656716417913, "grad_norm": 24.677188873291016, "learning_rate": 9.723214285714286e-06, "loss": 29.9739, "step": 16434 }, { "epoch": 391.31044776119404, "grad_norm": 20.176437377929688, "learning_rate": 9.722619047619048e-06, "loss": 32.4173, "step": 16435 }, { "epoch": 391.33432835820895, "grad_norm": 23.677534103393555, "learning_rate": 9.72202380952381e-06, "loss": 31.065, "step": 16436 }, { "epoch": 391.35820895522386, "grad_norm": 21.294153213500977, "learning_rate": 9.721428571428573e-06, "loss": 32.0519, "step": 16437 }, { "epoch": 391.3820895522388, "grad_norm": 18.736831665039062, "learning_rate": 9.720833333333334e-06, "loss": 30.2835, "step": 16438 }, { "epoch": 391.40597014925373, "grad_norm": 18.900318145751953, "learning_rate": 9.720238095238095e-06, "loss": 31.8502, "step": 16439 }, { "epoch": 391.42985074626864, "grad_norm": 19.314485549926758, "learning_rate": 9.719642857142859e-06, "loss": 31.2654, "step": 16440 }, { "epoch": 391.4537313432836, "grad_norm": 17.646129608154297, "learning_rate": 9.71904761904762e-06, "loss": 30.9776, "step": 16441 }, { "epoch": 391.4776119402985, "grad_norm": 19.75379180908203, "learning_rate": 9.71845238095238e-06, "loss": 30.3464, "step": 16442 }, { "epoch": 391.5014925373134, "grad_norm": 17.314546585083008, "learning_rate": 9.717857142857143e-06, "loss": 31.5072, "step": 16443 }, { "epoch": 391.52537313432833, "grad_norm": 24.70691680908203, "learning_rate": 9.717261904761906e-06, "loss": 30.76, "step": 16444 }, { "epoch": 391.5492537313433, "grad_norm": 21.464452743530273, "learning_rate": 9.716666666666668e-06, "loss": 31.7995, "step": 16445 }, { "epoch": 391.5731343283582, "grad_norm": 18.588397979736328, "learning_rate": 9.71607142857143e-06, "loss": 31.1106, "step": 16446 }, { "epoch": 391.5970149253731, "grad_norm": 20.397750854492188, "learning_rate": 9.715476190476192e-06, "loss": 31.4806, "step": 16447 }, { "epoch": 391.6208955223881, "grad_norm": 18.86182403564453, "learning_rate": 9.714880952380953e-06, "loss": 31.6908, "step": 16448 }, { "epoch": 391.644776119403, "grad_norm": 21.76464080810547, "learning_rate": 9.714285714285715e-06, "loss": 31.6445, "step": 16449 }, { "epoch": 391.6686567164179, "grad_norm": 17.58875274658203, "learning_rate": 9.713690476190477e-06, "loss": 32.1033, "step": 16450 }, { "epoch": 391.6925373134328, "grad_norm": 19.376646041870117, "learning_rate": 9.713095238095239e-06, "loss": 30.7335, "step": 16451 }, { "epoch": 391.7164179104478, "grad_norm": 25.674732208251953, "learning_rate": 9.7125e-06, "loss": 30.7509, "step": 16452 }, { "epoch": 391.7402985074627, "grad_norm": 18.902231216430664, "learning_rate": 9.711904761904762e-06, "loss": 32.1143, "step": 16453 }, { "epoch": 391.7641791044776, "grad_norm": 17.41876220703125, "learning_rate": 9.711309523809524e-06, "loss": 30.5472, "step": 16454 }, { "epoch": 391.78805970149256, "grad_norm": 18.64491844177246, "learning_rate": 9.710714285714286e-06, "loss": 31.4101, "step": 16455 }, { "epoch": 391.81194029850747, "grad_norm": 20.355897903442383, "learning_rate": 9.71011904761905e-06, "loss": 30.8667, "step": 16456 }, { "epoch": 391.8358208955224, "grad_norm": 17.163890838623047, "learning_rate": 9.70952380952381e-06, "loss": 30.9724, "step": 16457 }, { "epoch": 391.85970149253734, "grad_norm": 20.216176986694336, "learning_rate": 9.708928571428571e-06, "loss": 31.3025, "step": 16458 }, { "epoch": 391.88358208955225, "grad_norm": 20.08291244506836, "learning_rate": 9.708333333333333e-06, "loss": 33.5975, "step": 16459 }, { "epoch": 391.90746268656716, "grad_norm": 17.577011108398438, "learning_rate": 9.707738095238097e-06, "loss": 29.7382, "step": 16460 }, { "epoch": 391.93134328358207, "grad_norm": 17.22465705871582, "learning_rate": 9.707142857142859e-06, "loss": 31.1636, "step": 16461 }, { "epoch": 391.95522388059703, "grad_norm": 18.496328353881836, "learning_rate": 9.706547619047619e-06, "loss": 30.2448, "step": 16462 }, { "epoch": 391.97910447761194, "grad_norm": 17.75228500366211, "learning_rate": 9.705952380952382e-06, "loss": 30.703, "step": 16463 }, { "epoch": 392.0, "grad_norm": 18.882986068725586, "learning_rate": 9.705357142857144e-06, "loss": 28.359, "step": 16464 }, { "epoch": 392.0238805970149, "grad_norm": 17.181751251220703, "learning_rate": 9.704761904761906e-06, "loss": 31.6314, "step": 16465 }, { "epoch": 392.0477611940299, "grad_norm": 19.29618263244629, "learning_rate": 9.704166666666668e-06, "loss": 32.1336, "step": 16466 }, { "epoch": 392.0716417910448, "grad_norm": 19.281169891357422, "learning_rate": 9.70357142857143e-06, "loss": 31.0754, "step": 16467 }, { "epoch": 392.0955223880597, "grad_norm": 22.606828689575195, "learning_rate": 9.702976190476191e-06, "loss": 31.5669, "step": 16468 }, { "epoch": 392.1194029850746, "grad_norm": 19.79329490661621, "learning_rate": 9.702380952380953e-06, "loss": 31.5799, "step": 16469 }, { "epoch": 392.14328358208957, "grad_norm": 18.98135757446289, "learning_rate": 9.701785714285715e-06, "loss": 30.4859, "step": 16470 }, { "epoch": 392.1671641791045, "grad_norm": 20.63869285583496, "learning_rate": 9.701190476190477e-06, "loss": 32.6835, "step": 16471 }, { "epoch": 392.1910447761194, "grad_norm": 17.536327362060547, "learning_rate": 9.700595238095238e-06, "loss": 31.5062, "step": 16472 }, { "epoch": 392.21492537313435, "grad_norm": 18.95645523071289, "learning_rate": 9.7e-06, "loss": 31.2597, "step": 16473 }, { "epoch": 392.23880597014926, "grad_norm": 18.115530014038086, "learning_rate": 9.699404761904762e-06, "loss": 31.0092, "step": 16474 }, { "epoch": 392.26268656716417, "grad_norm": 17.346193313598633, "learning_rate": 9.698809523809526e-06, "loss": 31.391, "step": 16475 }, { "epoch": 392.28656716417913, "grad_norm": 20.828800201416016, "learning_rate": 9.698214285714287e-06, "loss": 31.3353, "step": 16476 }, { "epoch": 392.31044776119404, "grad_norm": 17.44601058959961, "learning_rate": 9.697619047619048e-06, "loss": 31.3625, "step": 16477 }, { "epoch": 392.33432835820895, "grad_norm": 18.767868041992188, "learning_rate": 9.69702380952381e-06, "loss": 31.5475, "step": 16478 }, { "epoch": 392.35820895522386, "grad_norm": 15.861065864562988, "learning_rate": 9.696428571428573e-06, "loss": 31.8539, "step": 16479 }, { "epoch": 392.3820895522388, "grad_norm": 21.5611515045166, "learning_rate": 9.695833333333335e-06, "loss": 32.9212, "step": 16480 }, { "epoch": 392.40597014925373, "grad_norm": 21.332988739013672, "learning_rate": 9.695238095238096e-06, "loss": 32.4479, "step": 16481 }, { "epoch": 392.42985074626864, "grad_norm": 23.342430114746094, "learning_rate": 9.694642857142858e-06, "loss": 31.6261, "step": 16482 }, { "epoch": 392.4537313432836, "grad_norm": 19.13523292541504, "learning_rate": 9.69404761904762e-06, "loss": 30.7945, "step": 16483 }, { "epoch": 392.4776119402985, "grad_norm": 16.57522201538086, "learning_rate": 9.693452380952382e-06, "loss": 30.6693, "step": 16484 }, { "epoch": 392.5014925373134, "grad_norm": 17.404447555541992, "learning_rate": 9.692857142857144e-06, "loss": 30.272, "step": 16485 }, { "epoch": 392.52537313432833, "grad_norm": 16.987762451171875, "learning_rate": 9.692261904761906e-06, "loss": 31.2822, "step": 16486 }, { "epoch": 392.5492537313433, "grad_norm": 20.205286026000977, "learning_rate": 9.691666666666667e-06, "loss": 31.4389, "step": 16487 }, { "epoch": 392.5731343283582, "grad_norm": 25.10284423828125, "learning_rate": 9.691071428571429e-06, "loss": 31.4368, "step": 16488 }, { "epoch": 392.5970149253731, "grad_norm": 20.7637939453125, "learning_rate": 9.690476190476191e-06, "loss": 32.2761, "step": 16489 }, { "epoch": 392.6208955223881, "grad_norm": 19.260000228881836, "learning_rate": 9.689880952380953e-06, "loss": 31.633, "step": 16490 }, { "epoch": 392.644776119403, "grad_norm": 30.960908889770508, "learning_rate": 9.689285714285716e-06, "loss": 30.5168, "step": 16491 }, { "epoch": 392.6686567164179, "grad_norm": 18.382041931152344, "learning_rate": 9.688690476190476e-06, "loss": 30.6955, "step": 16492 }, { "epoch": 392.6925373134328, "grad_norm": 33.018348693847656, "learning_rate": 9.688095238095238e-06, "loss": 30.4928, "step": 16493 }, { "epoch": 392.7164179104478, "grad_norm": 22.341228485107422, "learning_rate": 9.6875e-06, "loss": 30.7801, "step": 16494 }, { "epoch": 392.7402985074627, "grad_norm": 32.300472259521484, "learning_rate": 9.686904761904764e-06, "loss": 30.6573, "step": 16495 }, { "epoch": 392.7641791044776, "grad_norm": 25.2115421295166, "learning_rate": 9.686309523809525e-06, "loss": 31.3364, "step": 16496 }, { "epoch": 392.78805970149256, "grad_norm": 28.91110610961914, "learning_rate": 9.685714285714285e-06, "loss": 30.7299, "step": 16497 }, { "epoch": 392.81194029850747, "grad_norm": 24.175539016723633, "learning_rate": 9.685119047619049e-06, "loss": 30.2812, "step": 16498 }, { "epoch": 392.8358208955224, "grad_norm": 28.419218063354492, "learning_rate": 9.68452380952381e-06, "loss": 31.3165, "step": 16499 }, { "epoch": 392.85970149253734, "grad_norm": 26.446331024169922, "learning_rate": 9.683928571428573e-06, "loss": 32.2847, "step": 16500 }, { "epoch": 392.88358208955225, "grad_norm": 19.74005889892578, "learning_rate": 9.683333333333334e-06, "loss": 29.7685, "step": 16501 }, { "epoch": 392.90746268656716, "grad_norm": 24.966724395751953, "learning_rate": 9.682738095238096e-06, "loss": 30.26, "step": 16502 }, { "epoch": 392.93134328358207, "grad_norm": 19.56442642211914, "learning_rate": 9.682142857142858e-06, "loss": 31.7183, "step": 16503 }, { "epoch": 392.95522388059703, "grad_norm": 23.34406280517578, "learning_rate": 9.68154761904762e-06, "loss": 31.2273, "step": 16504 }, { "epoch": 392.97910447761194, "grad_norm": 20.585844039916992, "learning_rate": 9.680952380952382e-06, "loss": 31.4356, "step": 16505 }, { "epoch": 393.0, "grad_norm": 17.45985984802246, "learning_rate": 9.680357142857143e-06, "loss": 27.6711, "step": 16506 }, { "epoch": 393.0238805970149, "grad_norm": 18.485904693603516, "learning_rate": 9.679761904761905e-06, "loss": 30.6437, "step": 16507 }, { "epoch": 393.0477611940299, "grad_norm": 25.823047637939453, "learning_rate": 9.679166666666667e-06, "loss": 31.4364, "step": 16508 }, { "epoch": 393.0716417910448, "grad_norm": 19.41379737854004, "learning_rate": 9.678571428571429e-06, "loss": 31.3302, "step": 16509 }, { "epoch": 393.0955223880597, "grad_norm": 17.947589874267578, "learning_rate": 9.677976190476192e-06, "loss": 32.2393, "step": 16510 }, { "epoch": 393.1194029850746, "grad_norm": 21.385234832763672, "learning_rate": 9.677380952380954e-06, "loss": 30.9784, "step": 16511 }, { "epoch": 393.14328358208957, "grad_norm": 17.71302604675293, "learning_rate": 9.676785714285714e-06, "loss": 30.7161, "step": 16512 }, { "epoch": 393.1671641791045, "grad_norm": 23.4676456451416, "learning_rate": 9.676190476190476e-06, "loss": 30.3894, "step": 16513 }, { "epoch": 393.1910447761194, "grad_norm": 19.868331909179688, "learning_rate": 9.67559523809524e-06, "loss": 30.8656, "step": 16514 }, { "epoch": 393.21492537313435, "grad_norm": 22.355960845947266, "learning_rate": 9.675000000000001e-06, "loss": 31.1684, "step": 16515 }, { "epoch": 393.23880597014926, "grad_norm": 17.21105194091797, "learning_rate": 9.674404761904763e-06, "loss": 31.6164, "step": 16516 }, { "epoch": 393.26268656716417, "grad_norm": 21.77312469482422, "learning_rate": 9.673809523809525e-06, "loss": 31.5075, "step": 16517 }, { "epoch": 393.28656716417913, "grad_norm": 21.612089157104492, "learning_rate": 9.673214285714287e-06, "loss": 32.278, "step": 16518 }, { "epoch": 393.31044776119404, "grad_norm": 20.267621994018555, "learning_rate": 9.672619047619049e-06, "loss": 31.1718, "step": 16519 }, { "epoch": 393.33432835820895, "grad_norm": 18.022846221923828, "learning_rate": 9.67202380952381e-06, "loss": 31.1893, "step": 16520 }, { "epoch": 393.35820895522386, "grad_norm": 20.32938575744629, "learning_rate": 9.671428571428572e-06, "loss": 30.6629, "step": 16521 }, { "epoch": 393.3820895522388, "grad_norm": 21.79494857788086, "learning_rate": 9.670833333333334e-06, "loss": 31.9163, "step": 16522 }, { "epoch": 393.40597014925373, "grad_norm": 18.37645721435547, "learning_rate": 9.670238095238096e-06, "loss": 31.9426, "step": 16523 }, { "epoch": 393.42985074626864, "grad_norm": 20.292821884155273, "learning_rate": 9.669642857142858e-06, "loss": 31.664, "step": 16524 }, { "epoch": 393.4537313432836, "grad_norm": 21.310020446777344, "learning_rate": 9.66904761904762e-06, "loss": 31.7991, "step": 16525 }, { "epoch": 393.4776119402985, "grad_norm": 23.35830307006836, "learning_rate": 9.668452380952383e-06, "loss": 32.1127, "step": 16526 }, { "epoch": 393.5014925373134, "grad_norm": 20.169979095458984, "learning_rate": 9.667857142857143e-06, "loss": 30.6385, "step": 16527 }, { "epoch": 393.52537313432833, "grad_norm": 15.472123146057129, "learning_rate": 9.667261904761905e-06, "loss": 31.5138, "step": 16528 }, { "epoch": 393.5492537313433, "grad_norm": 19.554691314697266, "learning_rate": 9.666666666666667e-06, "loss": 30.9746, "step": 16529 }, { "epoch": 393.5731343283582, "grad_norm": 23.469707489013672, "learning_rate": 9.66607142857143e-06, "loss": 31.5301, "step": 16530 }, { "epoch": 393.5970149253731, "grad_norm": 21.12062644958496, "learning_rate": 9.665476190476192e-06, "loss": 29.9563, "step": 16531 }, { "epoch": 393.6208955223881, "grad_norm": 16.849655151367188, "learning_rate": 9.664880952380952e-06, "loss": 29.8341, "step": 16532 }, { "epoch": 393.644776119403, "grad_norm": 16.26079750061035, "learning_rate": 9.664285714285716e-06, "loss": 30.2833, "step": 16533 }, { "epoch": 393.6686567164179, "grad_norm": 17.466516494750977, "learning_rate": 9.663690476190477e-06, "loss": 31.4941, "step": 16534 }, { "epoch": 393.6925373134328, "grad_norm": 21.156137466430664, "learning_rate": 9.66309523809524e-06, "loss": 31.4996, "step": 16535 }, { "epoch": 393.7164179104478, "grad_norm": 21.75018882751465, "learning_rate": 9.662500000000001e-06, "loss": 30.7024, "step": 16536 }, { "epoch": 393.7402985074627, "grad_norm": 25.015518188476562, "learning_rate": 9.661904761904763e-06, "loss": 31.4117, "step": 16537 }, { "epoch": 393.7641791044776, "grad_norm": 16.228118896484375, "learning_rate": 9.661309523809525e-06, "loss": 32.1889, "step": 16538 }, { "epoch": 393.78805970149256, "grad_norm": 27.60285186767578, "learning_rate": 9.660714285714287e-06, "loss": 31.7577, "step": 16539 }, { "epoch": 393.81194029850747, "grad_norm": 20.638507843017578, "learning_rate": 9.660119047619048e-06, "loss": 31.9245, "step": 16540 }, { "epoch": 393.8358208955224, "grad_norm": 22.617639541625977, "learning_rate": 9.65952380952381e-06, "loss": 31.4598, "step": 16541 }, { "epoch": 393.85970149253734, "grad_norm": 24.75657081604004, "learning_rate": 9.658928571428572e-06, "loss": 30.8452, "step": 16542 }, { "epoch": 393.88358208955225, "grad_norm": 21.58110237121582, "learning_rate": 9.658333333333334e-06, "loss": 30.4439, "step": 16543 }, { "epoch": 393.90746268656716, "grad_norm": 18.69927978515625, "learning_rate": 9.657738095238096e-06, "loss": 30.6121, "step": 16544 }, { "epoch": 393.93134328358207, "grad_norm": 29.413753509521484, "learning_rate": 9.657142857142859e-06, "loss": 31.4029, "step": 16545 }, { "epoch": 393.95522388059703, "grad_norm": 19.673093795776367, "learning_rate": 9.656547619047621e-06, "loss": 31.0743, "step": 16546 }, { "epoch": 393.97910447761194, "grad_norm": 18.085433959960938, "learning_rate": 9.655952380952381e-06, "loss": 32.1028, "step": 16547 }, { "epoch": 394.0, "grad_norm": 23.433103561401367, "learning_rate": 9.655357142857143e-06, "loss": 27.2201, "step": 16548 }, { "epoch": 394.0238805970149, "grad_norm": 19.056753158569336, "learning_rate": 9.654761904761906e-06, "loss": 31.5069, "step": 16549 }, { "epoch": 394.0477611940299, "grad_norm": 18.356182098388672, "learning_rate": 9.654166666666668e-06, "loss": 30.1049, "step": 16550 }, { "epoch": 394.0716417910448, "grad_norm": 19.064579010009766, "learning_rate": 9.653571428571428e-06, "loss": 29.9186, "step": 16551 }, { "epoch": 394.0955223880597, "grad_norm": 19.439306259155273, "learning_rate": 9.652976190476192e-06, "loss": 30.6292, "step": 16552 }, { "epoch": 394.1194029850746, "grad_norm": 15.978523254394531, "learning_rate": 9.652380952380954e-06, "loss": 30.46, "step": 16553 }, { "epoch": 394.14328358208957, "grad_norm": 17.57875633239746, "learning_rate": 9.651785714285715e-06, "loss": 29.9802, "step": 16554 }, { "epoch": 394.1671641791045, "grad_norm": 17.737642288208008, "learning_rate": 9.651190476190477e-06, "loss": 31.1575, "step": 16555 }, { "epoch": 394.1910447761194, "grad_norm": 20.065645217895508, "learning_rate": 9.650595238095239e-06, "loss": 30.7144, "step": 16556 }, { "epoch": 394.21492537313435, "grad_norm": 21.767803192138672, "learning_rate": 9.65e-06, "loss": 29.3319, "step": 16557 }, { "epoch": 394.23880597014926, "grad_norm": 19.06543731689453, "learning_rate": 9.649404761904763e-06, "loss": 31.6025, "step": 16558 }, { "epoch": 394.26268656716417, "grad_norm": 22.794673919677734, "learning_rate": 9.648809523809524e-06, "loss": 32.2114, "step": 16559 }, { "epoch": 394.28656716417913, "grad_norm": 25.42393684387207, "learning_rate": 9.648214285714286e-06, "loss": 31.8628, "step": 16560 }, { "epoch": 394.31044776119404, "grad_norm": 20.32715606689453, "learning_rate": 9.647619047619048e-06, "loss": 32.0349, "step": 16561 }, { "epoch": 394.33432835820895, "grad_norm": 18.05319595336914, "learning_rate": 9.64702380952381e-06, "loss": 30.8603, "step": 16562 }, { "epoch": 394.35820895522386, "grad_norm": 23.052047729492188, "learning_rate": 9.646428571428572e-06, "loss": 31.2589, "step": 16563 }, { "epoch": 394.3820895522388, "grad_norm": 20.820653915405273, "learning_rate": 9.645833333333333e-06, "loss": 30.8611, "step": 16564 }, { "epoch": 394.40597014925373, "grad_norm": 16.364179611206055, "learning_rate": 9.645238095238097e-06, "loss": 31.3999, "step": 16565 }, { "epoch": 394.42985074626864, "grad_norm": 19.812480926513672, "learning_rate": 9.644642857142857e-06, "loss": 31.7923, "step": 16566 }, { "epoch": 394.4537313432836, "grad_norm": 21.836849212646484, "learning_rate": 9.644047619047619e-06, "loss": 33.2977, "step": 16567 }, { "epoch": 394.4776119402985, "grad_norm": 17.77836799621582, "learning_rate": 9.643452380952382e-06, "loss": 31.7209, "step": 16568 }, { "epoch": 394.5014925373134, "grad_norm": 22.15043067932129, "learning_rate": 9.642857142857144e-06, "loss": 32.0888, "step": 16569 }, { "epoch": 394.52537313432833, "grad_norm": 15.695204734802246, "learning_rate": 9.642261904761906e-06, "loss": 31.2755, "step": 16570 }, { "epoch": 394.5492537313433, "grad_norm": 26.243276596069336, "learning_rate": 9.641666666666666e-06, "loss": 31.3449, "step": 16571 }, { "epoch": 394.5731343283582, "grad_norm": 16.394758224487305, "learning_rate": 9.64107142857143e-06, "loss": 30.1301, "step": 16572 }, { "epoch": 394.5970149253731, "grad_norm": 22.238359451293945, "learning_rate": 9.640476190476191e-06, "loss": 31.6778, "step": 16573 }, { "epoch": 394.6208955223881, "grad_norm": 19.042591094970703, "learning_rate": 9.639880952380953e-06, "loss": 30.6233, "step": 16574 }, { "epoch": 394.644776119403, "grad_norm": 23.523181915283203, "learning_rate": 9.639285714285715e-06, "loss": 32.3055, "step": 16575 }, { "epoch": 394.6686567164179, "grad_norm": 25.505687713623047, "learning_rate": 9.638690476190477e-06, "loss": 30.1301, "step": 16576 }, { "epoch": 394.6925373134328, "grad_norm": 19.258235931396484, "learning_rate": 9.638095238095239e-06, "loss": 31.6026, "step": 16577 }, { "epoch": 394.7164179104478, "grad_norm": 24.81572914123535, "learning_rate": 9.6375e-06, "loss": 30.9966, "step": 16578 }, { "epoch": 394.7402985074627, "grad_norm": 24.1857967376709, "learning_rate": 9.636904761904762e-06, "loss": 32.3317, "step": 16579 }, { "epoch": 394.7641791044776, "grad_norm": 19.592126846313477, "learning_rate": 9.636309523809526e-06, "loss": 30.6968, "step": 16580 }, { "epoch": 394.78805970149256, "grad_norm": 19.79613494873047, "learning_rate": 9.635714285714286e-06, "loss": 30.9267, "step": 16581 }, { "epoch": 394.81194029850747, "grad_norm": 22.39429473876953, "learning_rate": 9.635119047619048e-06, "loss": 31.9018, "step": 16582 }, { "epoch": 394.8358208955224, "grad_norm": 16.94050407409668, "learning_rate": 9.63452380952381e-06, "loss": 31.5597, "step": 16583 }, { "epoch": 394.85970149253734, "grad_norm": 16.844341278076172, "learning_rate": 9.633928571428573e-06, "loss": 30.7179, "step": 16584 }, { "epoch": 394.88358208955225, "grad_norm": 19.728256225585938, "learning_rate": 9.633333333333335e-06, "loss": 31.5021, "step": 16585 }, { "epoch": 394.90746268656716, "grad_norm": 21.01833724975586, "learning_rate": 9.632738095238095e-06, "loss": 31.2969, "step": 16586 }, { "epoch": 394.93134328358207, "grad_norm": 16.520946502685547, "learning_rate": 9.632142857142858e-06, "loss": 31.9674, "step": 16587 }, { "epoch": 394.95522388059703, "grad_norm": 18.850202560424805, "learning_rate": 9.63154761904762e-06, "loss": 31.9042, "step": 16588 }, { "epoch": 394.97910447761194, "grad_norm": 15.861136436462402, "learning_rate": 9.630952380952382e-06, "loss": 30.7848, "step": 16589 }, { "epoch": 395.0, "grad_norm": 18.09653091430664, "learning_rate": 9.630357142857144e-06, "loss": 25.999, "step": 16590 }, { "epoch": 395.0238805970149, "grad_norm": 18.900066375732422, "learning_rate": 9.629761904761906e-06, "loss": 31.3835, "step": 16591 }, { "epoch": 395.0477611940299, "grad_norm": 18.146682739257812, "learning_rate": 9.629166666666668e-06, "loss": 31.2379, "step": 16592 }, { "epoch": 395.0716417910448, "grad_norm": 21.965797424316406, "learning_rate": 9.62857142857143e-06, "loss": 30.6798, "step": 16593 }, { "epoch": 395.0955223880597, "grad_norm": 22.59465217590332, "learning_rate": 9.627976190476191e-06, "loss": 30.2535, "step": 16594 }, { "epoch": 395.1194029850746, "grad_norm": 19.063858032226562, "learning_rate": 9.627380952380953e-06, "loss": 30.664, "step": 16595 }, { "epoch": 395.14328358208957, "grad_norm": 18.029199600219727, "learning_rate": 9.626785714285715e-06, "loss": 30.3232, "step": 16596 }, { "epoch": 395.1671641791045, "grad_norm": 17.064918518066406, "learning_rate": 9.626190476190477e-06, "loss": 31.4215, "step": 16597 }, { "epoch": 395.1910447761194, "grad_norm": 19.695556640625, "learning_rate": 9.625595238095238e-06, "loss": 30.1845, "step": 16598 }, { "epoch": 395.21492537313435, "grad_norm": 21.29120635986328, "learning_rate": 9.625e-06, "loss": 31.8835, "step": 16599 }, { "epoch": 395.23880597014926, "grad_norm": 20.6686954498291, "learning_rate": 9.624404761904764e-06, "loss": 31.8821, "step": 16600 }, { "epoch": 395.26268656716417, "grad_norm": 18.07406997680664, "learning_rate": 9.623809523809524e-06, "loss": 31.1092, "step": 16601 }, { "epoch": 395.28656716417913, "grad_norm": 17.966976165771484, "learning_rate": 9.623214285714286e-06, "loss": 30.3551, "step": 16602 }, { "epoch": 395.31044776119404, "grad_norm": 23.98894500732422, "learning_rate": 9.622619047619049e-06, "loss": 30.2829, "step": 16603 }, { "epoch": 395.33432835820895, "grad_norm": 17.835453033447266, "learning_rate": 9.622023809523811e-06, "loss": 30.4599, "step": 16604 }, { "epoch": 395.35820895522386, "grad_norm": 16.08890724182129, "learning_rate": 9.621428571428573e-06, "loss": 30.8228, "step": 16605 }, { "epoch": 395.3820895522388, "grad_norm": 26.890722274780273, "learning_rate": 9.620833333333335e-06, "loss": 31.7994, "step": 16606 }, { "epoch": 395.40597014925373, "grad_norm": 18.69220542907715, "learning_rate": 9.620238095238096e-06, "loss": 31.3295, "step": 16607 }, { "epoch": 395.42985074626864, "grad_norm": 19.22516441345215, "learning_rate": 9.619642857142858e-06, "loss": 31.2985, "step": 16608 }, { "epoch": 395.4537313432836, "grad_norm": 28.353347778320312, "learning_rate": 9.61904761904762e-06, "loss": 31.5362, "step": 16609 }, { "epoch": 395.4776119402985, "grad_norm": 17.4637508392334, "learning_rate": 9.618452380952382e-06, "loss": 30.8674, "step": 16610 }, { "epoch": 395.5014925373134, "grad_norm": 30.24287223815918, "learning_rate": 9.617857142857144e-06, "loss": 30.9374, "step": 16611 }, { "epoch": 395.52537313432833, "grad_norm": 23.158342361450195, "learning_rate": 9.617261904761905e-06, "loss": 31.9625, "step": 16612 }, { "epoch": 395.5492537313433, "grad_norm": 23.577930450439453, "learning_rate": 9.616666666666667e-06, "loss": 32.3882, "step": 16613 }, { "epoch": 395.5731343283582, "grad_norm": 28.652990341186523, "learning_rate": 9.616071428571429e-06, "loss": 31.3116, "step": 16614 }, { "epoch": 395.5970149253731, "grad_norm": 20.64177894592285, "learning_rate": 9.615476190476193e-06, "loss": 30.9665, "step": 16615 }, { "epoch": 395.6208955223881, "grad_norm": 37.987701416015625, "learning_rate": 9.614880952380953e-06, "loss": 31.1373, "step": 16616 }, { "epoch": 395.644776119403, "grad_norm": 27.08494758605957, "learning_rate": 9.614285714285714e-06, "loss": 31.2365, "step": 16617 }, { "epoch": 395.6686567164179, "grad_norm": 42.456336975097656, "learning_rate": 9.613690476190476e-06, "loss": 32.2337, "step": 16618 }, { "epoch": 395.6925373134328, "grad_norm": 32.765262603759766, "learning_rate": 9.61309523809524e-06, "loss": 31.1218, "step": 16619 }, { "epoch": 395.7164179104478, "grad_norm": 43.60858917236328, "learning_rate": 9.612500000000002e-06, "loss": 30.2923, "step": 16620 }, { "epoch": 395.7402985074627, "grad_norm": 42.65446853637695, "learning_rate": 9.611904761904762e-06, "loss": 31.4776, "step": 16621 }, { "epoch": 395.7641791044776, "grad_norm": 30.62103843688965, "learning_rate": 9.611309523809525e-06, "loss": 30.8037, "step": 16622 }, { "epoch": 395.78805970149256, "grad_norm": 31.356796264648438, "learning_rate": 9.610714285714287e-06, "loss": 32.1796, "step": 16623 }, { "epoch": 395.81194029850747, "grad_norm": 30.23118782043457, "learning_rate": 9.610119047619049e-06, "loss": 31.4694, "step": 16624 }, { "epoch": 395.8358208955224, "grad_norm": 25.252370834350586, "learning_rate": 9.60952380952381e-06, "loss": 31.9266, "step": 16625 }, { "epoch": 395.85970149253734, "grad_norm": 44.541969299316406, "learning_rate": 9.608928571428572e-06, "loss": 31.8544, "step": 16626 }, { "epoch": 395.88358208955225, "grad_norm": 35.47584533691406, "learning_rate": 9.608333333333334e-06, "loss": 30.4277, "step": 16627 }, { "epoch": 395.90746268656716, "grad_norm": 38.233604431152344, "learning_rate": 9.607738095238096e-06, "loss": 30.5344, "step": 16628 }, { "epoch": 395.93134328358207, "grad_norm": 37.811973571777344, "learning_rate": 9.607142857142858e-06, "loss": 30.0792, "step": 16629 }, { "epoch": 395.95522388059703, "grad_norm": 28.466703414916992, "learning_rate": 9.60654761904762e-06, "loss": 31.8081, "step": 16630 }, { "epoch": 395.97910447761194, "grad_norm": 25.1666259765625, "learning_rate": 9.605952380952381e-06, "loss": 32.0192, "step": 16631 }, { "epoch": 396.0, "grad_norm": 33.15395736694336, "learning_rate": 9.605357142857143e-06, "loss": 28.3371, "step": 16632 }, { "epoch": 396.0238805970149, "grad_norm": 31.699602127075195, "learning_rate": 9.604761904761905e-06, "loss": 30.8777, "step": 16633 }, { "epoch": 396.0477611940299, "grad_norm": 42.341495513916016, "learning_rate": 9.604166666666669e-06, "loss": 31.5053, "step": 16634 }, { "epoch": 396.0716417910448, "grad_norm": 34.36485290527344, "learning_rate": 9.60357142857143e-06, "loss": 32.9534, "step": 16635 }, { "epoch": 396.0955223880597, "grad_norm": 32.37449264526367, "learning_rate": 9.60297619047619e-06, "loss": 31.1525, "step": 16636 }, { "epoch": 396.1194029850746, "grad_norm": 27.004873275756836, "learning_rate": 9.602380952380952e-06, "loss": 30.6301, "step": 16637 }, { "epoch": 396.14328358208957, "grad_norm": 36.502906799316406, "learning_rate": 9.601785714285716e-06, "loss": 31.3992, "step": 16638 }, { "epoch": 396.1671641791045, "grad_norm": 29.16812515258789, "learning_rate": 9.601190476190478e-06, "loss": 30.064, "step": 16639 }, { "epoch": 396.1910447761194, "grad_norm": 34.83269500732422, "learning_rate": 9.60059523809524e-06, "loss": 31.2044, "step": 16640 }, { "epoch": 396.21492537313435, "grad_norm": 37.278987884521484, "learning_rate": 9.600000000000001e-06, "loss": 31.656, "step": 16641 }, { "epoch": 396.23880597014926, "grad_norm": 30.657453536987305, "learning_rate": 9.599404761904763e-06, "loss": 30.8146, "step": 16642 }, { "epoch": 396.26268656716417, "grad_norm": 27.678909301757812, "learning_rate": 9.598809523809525e-06, "loss": 30.6719, "step": 16643 }, { "epoch": 396.28656716417913, "grad_norm": 34.551876068115234, "learning_rate": 9.598214285714287e-06, "loss": 31.0905, "step": 16644 }, { "epoch": 396.31044776119404, "grad_norm": 28.76030731201172, "learning_rate": 9.597619047619048e-06, "loss": 31.1123, "step": 16645 }, { "epoch": 396.33432835820895, "grad_norm": 37.68404769897461, "learning_rate": 9.59702380952381e-06, "loss": 31.3855, "step": 16646 }, { "epoch": 396.35820895522386, "grad_norm": 33.67521667480469, "learning_rate": 9.596428571428572e-06, "loss": 30.5938, "step": 16647 }, { "epoch": 396.3820895522388, "grad_norm": 35.538673400878906, "learning_rate": 9.595833333333334e-06, "loss": 30.8305, "step": 16648 }, { "epoch": 396.40597014925373, "grad_norm": 30.006072998046875, "learning_rate": 9.595238095238096e-06, "loss": 31.4339, "step": 16649 }, { "epoch": 396.42985074626864, "grad_norm": 32.67222595214844, "learning_rate": 9.59464285714286e-06, "loss": 31.8208, "step": 16650 }, { "epoch": 396.4537313432836, "grad_norm": 27.725492477416992, "learning_rate": 9.59404761904762e-06, "loss": 30.8556, "step": 16651 }, { "epoch": 396.4776119402985, "grad_norm": 33.49855422973633, "learning_rate": 9.593452380952381e-06, "loss": 31.3964, "step": 16652 }, { "epoch": 396.5014925373134, "grad_norm": 27.76362419128418, "learning_rate": 9.592857142857143e-06, "loss": 31.0002, "step": 16653 }, { "epoch": 396.52537313432833, "grad_norm": 35.20942687988281, "learning_rate": 9.592261904761906e-06, "loss": 31.1557, "step": 16654 }, { "epoch": 396.5492537313433, "grad_norm": 33.215999603271484, "learning_rate": 9.591666666666667e-06, "loss": 31.4064, "step": 16655 }, { "epoch": 396.5731343283582, "grad_norm": 31.689847946166992, "learning_rate": 9.591071428571428e-06, "loss": 32.0374, "step": 16656 }, { "epoch": 396.5970149253731, "grad_norm": 27.90886878967285, "learning_rate": 9.590476190476192e-06, "loss": 30.0792, "step": 16657 }, { "epoch": 396.6208955223881, "grad_norm": 36.104854583740234, "learning_rate": 9.589880952380954e-06, "loss": 31.7099, "step": 16658 }, { "epoch": 396.644776119403, "grad_norm": 32.245887756347656, "learning_rate": 9.589285714285716e-06, "loss": 30.9505, "step": 16659 }, { "epoch": 396.6686567164179, "grad_norm": 34.97145080566406, "learning_rate": 9.588690476190476e-06, "loss": 31.2625, "step": 16660 }, { "epoch": 396.6925373134328, "grad_norm": 32.9593391418457, "learning_rate": 9.588095238095239e-06, "loss": 31.7602, "step": 16661 }, { "epoch": 396.7164179104478, "grad_norm": 29.946687698364258, "learning_rate": 9.587500000000001e-06, "loss": 30.1575, "step": 16662 }, { "epoch": 396.7402985074627, "grad_norm": 25.896459579467773, "learning_rate": 9.586904761904763e-06, "loss": 31.2349, "step": 16663 }, { "epoch": 396.7641791044776, "grad_norm": 32.72298812866211, "learning_rate": 9.586309523809525e-06, "loss": 30.6864, "step": 16664 }, { "epoch": 396.78805970149256, "grad_norm": 27.926958084106445, "learning_rate": 9.585714285714286e-06, "loss": 31.7503, "step": 16665 }, { "epoch": 396.81194029850747, "grad_norm": 34.445335388183594, "learning_rate": 9.585119047619048e-06, "loss": 31.3729, "step": 16666 }, { "epoch": 396.8358208955224, "grad_norm": 33.05970001220703, "learning_rate": 9.58452380952381e-06, "loss": 29.862, "step": 16667 }, { "epoch": 396.85970149253734, "grad_norm": 31.89533042907715, "learning_rate": 9.583928571428572e-06, "loss": 30.946, "step": 16668 }, { "epoch": 396.88358208955225, "grad_norm": 28.336143493652344, "learning_rate": 9.583333333333335e-06, "loss": 31.0802, "step": 16669 }, { "epoch": 396.90746268656716, "grad_norm": 35.388946533203125, "learning_rate": 9.582738095238095e-06, "loss": 31.4563, "step": 16670 }, { "epoch": 396.93134328358207, "grad_norm": 30.30738067626953, "learning_rate": 9.582142857142857e-06, "loss": 30.6146, "step": 16671 }, { "epoch": 396.95522388059703, "grad_norm": 33.11353302001953, "learning_rate": 9.581547619047619e-06, "loss": 31.227, "step": 16672 }, { "epoch": 396.97910447761194, "grad_norm": 31.145061492919922, "learning_rate": 9.580952380952383e-06, "loss": 31.3158, "step": 16673 }, { "epoch": 397.0, "grad_norm": 28.533903121948242, "learning_rate": 9.580357142857144e-06, "loss": 26.6179, "step": 16674 }, { "epoch": 397.0238805970149, "grad_norm": 27.6167049407959, "learning_rate": 9.579761904761904e-06, "loss": 31.3673, "step": 16675 }, { "epoch": 397.0477611940299, "grad_norm": 34.531558990478516, "learning_rate": 9.579166666666668e-06, "loss": 30.3208, "step": 16676 }, { "epoch": 397.0716417910448, "grad_norm": 31.28594398498535, "learning_rate": 9.57857142857143e-06, "loss": 31.1348, "step": 16677 }, { "epoch": 397.0955223880597, "grad_norm": 31.299909591674805, "learning_rate": 9.577976190476192e-06, "loss": 31.634, "step": 16678 }, { "epoch": 397.1194029850746, "grad_norm": 29.449188232421875, "learning_rate": 9.577380952380953e-06, "loss": 31.2931, "step": 16679 }, { "epoch": 397.14328358208957, "grad_norm": 31.746688842773438, "learning_rate": 9.576785714285715e-06, "loss": 30.6019, "step": 16680 }, { "epoch": 397.1671641791045, "grad_norm": 27.6197452545166, "learning_rate": 9.576190476190477e-06, "loss": 30.9896, "step": 16681 }, { "epoch": 397.1910447761194, "grad_norm": 34.12122344970703, "learning_rate": 9.575595238095239e-06, "loss": 30.9409, "step": 16682 }, { "epoch": 397.21492537313435, "grad_norm": 30.74228286743164, "learning_rate": 9.575e-06, "loss": 29.8835, "step": 16683 }, { "epoch": 397.23880597014926, "grad_norm": 34.26853942871094, "learning_rate": 9.574404761904762e-06, "loss": 30.3535, "step": 16684 }, { "epoch": 397.26268656716417, "grad_norm": 30.06424331665039, "learning_rate": 9.573809523809524e-06, "loss": 30.5033, "step": 16685 }, { "epoch": 397.28656716417913, "grad_norm": 29.320924758911133, "learning_rate": 9.573214285714286e-06, "loss": 30.938, "step": 16686 }, { "epoch": 397.31044776119404, "grad_norm": 24.558950424194336, "learning_rate": 9.572619047619048e-06, "loss": 31.675, "step": 16687 }, { "epoch": 397.33432835820895, "grad_norm": 35.58845138549805, "learning_rate": 9.57202380952381e-06, "loss": 31.0714, "step": 16688 }, { "epoch": 397.35820895522386, "grad_norm": 30.322538375854492, "learning_rate": 9.571428571428573e-06, "loss": 30.6867, "step": 16689 }, { "epoch": 397.3820895522388, "grad_norm": 33.869937896728516, "learning_rate": 9.570833333333333e-06, "loss": 31.3392, "step": 16690 }, { "epoch": 397.40597014925373, "grad_norm": 29.600238800048828, "learning_rate": 9.570238095238095e-06, "loss": 31.5124, "step": 16691 }, { "epoch": 397.42985074626864, "grad_norm": 30.72909927368164, "learning_rate": 9.569642857142859e-06, "loss": 30.4904, "step": 16692 }, { "epoch": 397.4537313432836, "grad_norm": 25.677860260009766, "learning_rate": 9.56904761904762e-06, "loss": 30.9418, "step": 16693 }, { "epoch": 397.4776119402985, "grad_norm": 35.1114616394043, "learning_rate": 9.568452380952382e-06, "loss": 31.4766, "step": 16694 }, { "epoch": 397.5014925373134, "grad_norm": 29.866853713989258, "learning_rate": 9.567857142857142e-06, "loss": 30.2016, "step": 16695 }, { "epoch": 397.52537313432833, "grad_norm": 33.91158676147461, "learning_rate": 9.567261904761906e-06, "loss": 31.032, "step": 16696 }, { "epoch": 397.5492537313433, "grad_norm": 31.699338912963867, "learning_rate": 9.566666666666668e-06, "loss": 30.5229, "step": 16697 }, { "epoch": 397.5731343283582, "grad_norm": 31.88851547241211, "learning_rate": 9.56607142857143e-06, "loss": 32.2403, "step": 16698 }, { "epoch": 397.5970149253731, "grad_norm": 29.413162231445312, "learning_rate": 9.565476190476191e-06, "loss": 32.0086, "step": 16699 }, { "epoch": 397.6208955223881, "grad_norm": 32.275978088378906, "learning_rate": 9.564880952380953e-06, "loss": 31.3495, "step": 16700 }, { "epoch": 397.644776119403, "grad_norm": 26.762638092041016, "learning_rate": 9.564285714285715e-06, "loss": 32.9203, "step": 16701 }, { "epoch": 397.6686567164179, "grad_norm": 33.741455078125, "learning_rate": 9.563690476190477e-06, "loss": 31.4001, "step": 16702 }, { "epoch": 397.6925373134328, "grad_norm": 30.879159927368164, "learning_rate": 9.563095238095239e-06, "loss": 32.1866, "step": 16703 }, { "epoch": 397.7164179104478, "grad_norm": 34.93317794799805, "learning_rate": 9.562500000000002e-06, "loss": 30.9474, "step": 16704 }, { "epoch": 397.7402985074627, "grad_norm": 27.91045379638672, "learning_rate": 9.561904761904762e-06, "loss": 30.6972, "step": 16705 }, { "epoch": 397.7641791044776, "grad_norm": 34.52175521850586, "learning_rate": 9.561309523809524e-06, "loss": 31.2406, "step": 16706 }, { "epoch": 397.78805970149256, "grad_norm": 28.630678176879883, "learning_rate": 9.560714285714286e-06, "loss": 30.4547, "step": 16707 }, { "epoch": 397.81194029850747, "grad_norm": 31.232086181640625, "learning_rate": 9.56011904761905e-06, "loss": 29.5587, "step": 16708 }, { "epoch": 397.8358208955224, "grad_norm": 27.394580841064453, "learning_rate": 9.559523809523811e-06, "loss": 31.1712, "step": 16709 }, { "epoch": 397.85970149253734, "grad_norm": 35.872867584228516, "learning_rate": 9.558928571428571e-06, "loss": 31.9228, "step": 16710 }, { "epoch": 397.88358208955225, "grad_norm": 26.121097564697266, "learning_rate": 9.558333333333335e-06, "loss": 31.7529, "step": 16711 }, { "epoch": 397.90746268656716, "grad_norm": 30.0105037689209, "learning_rate": 9.557738095238097e-06, "loss": 30.5681, "step": 16712 }, { "epoch": 397.93134328358207, "grad_norm": 27.735515594482422, "learning_rate": 9.557142857142858e-06, "loss": 31.976, "step": 16713 }, { "epoch": 397.95522388059703, "grad_norm": 33.157508850097656, "learning_rate": 9.55654761904762e-06, "loss": 31.5556, "step": 16714 }, { "epoch": 397.97910447761194, "grad_norm": 28.184017181396484, "learning_rate": 9.555952380952382e-06, "loss": 30.6026, "step": 16715 }, { "epoch": 398.0, "grad_norm": 29.705965042114258, "learning_rate": 9.555357142857144e-06, "loss": 27.0042, "step": 16716 }, { "epoch": 398.0238805970149, "grad_norm": 31.84282112121582, "learning_rate": 9.554761904761906e-06, "loss": 30.4317, "step": 16717 }, { "epoch": 398.0477611940299, "grad_norm": 35.78097915649414, "learning_rate": 9.554166666666667e-06, "loss": 31.5071, "step": 16718 }, { "epoch": 398.0716417910448, "grad_norm": 33.300533294677734, "learning_rate": 9.55357142857143e-06, "loss": 30.8626, "step": 16719 }, { "epoch": 398.0955223880597, "grad_norm": 29.378177642822266, "learning_rate": 9.552976190476191e-06, "loss": 29.6357, "step": 16720 }, { "epoch": 398.1194029850746, "grad_norm": 27.08431625366211, "learning_rate": 9.552380952380953e-06, "loss": 30.6963, "step": 16721 }, { "epoch": 398.14328358208957, "grad_norm": 32.26847457885742, "learning_rate": 9.551785714285715e-06, "loss": 31.8527, "step": 16722 }, { "epoch": 398.1671641791045, "grad_norm": 27.086942672729492, "learning_rate": 9.551190476190476e-06, "loss": 31.3711, "step": 16723 }, { "epoch": 398.1910447761194, "grad_norm": 35.80241775512695, "learning_rate": 9.55059523809524e-06, "loss": 31.0931, "step": 16724 }, { "epoch": 398.21492537313435, "grad_norm": 30.800640106201172, "learning_rate": 9.55e-06, "loss": 32.0653, "step": 16725 }, { "epoch": 398.23880597014926, "grad_norm": 32.04216003417969, "learning_rate": 9.549404761904762e-06, "loss": 30.3713, "step": 16726 }, { "epoch": 398.26268656716417, "grad_norm": 28.524978637695312, "learning_rate": 9.548809523809525e-06, "loss": 31.4776, "step": 16727 }, { "epoch": 398.28656716417913, "grad_norm": 32.854637145996094, "learning_rate": 9.548214285714287e-06, "loss": 31.4661, "step": 16728 }, { "epoch": 398.31044776119404, "grad_norm": 29.422914505004883, "learning_rate": 9.547619047619049e-06, "loss": 30.2009, "step": 16729 }, { "epoch": 398.33432835820895, "grad_norm": 34.19198226928711, "learning_rate": 9.547023809523809e-06, "loss": 31.805, "step": 16730 }, { "epoch": 398.35820895522386, "grad_norm": 25.075660705566406, "learning_rate": 9.546428571428573e-06, "loss": 31.1434, "step": 16731 }, { "epoch": 398.3820895522388, "grad_norm": 36.68419647216797, "learning_rate": 9.545833333333334e-06, "loss": 31.0459, "step": 16732 }, { "epoch": 398.40597014925373, "grad_norm": 31.8087215423584, "learning_rate": 9.545238095238096e-06, "loss": 31.1787, "step": 16733 }, { "epoch": 398.42985074626864, "grad_norm": 32.61941146850586, "learning_rate": 9.544642857142858e-06, "loss": 31.876, "step": 16734 }, { "epoch": 398.4537313432836, "grad_norm": 29.566112518310547, "learning_rate": 9.54404761904762e-06, "loss": 31.1784, "step": 16735 }, { "epoch": 398.4776119402985, "grad_norm": 30.375717163085938, "learning_rate": 9.543452380952382e-06, "loss": 31.0403, "step": 16736 }, { "epoch": 398.5014925373134, "grad_norm": 26.639244079589844, "learning_rate": 9.542857142857143e-06, "loss": 30.9947, "step": 16737 }, { "epoch": 398.52537313432833, "grad_norm": 31.981529235839844, "learning_rate": 9.542261904761905e-06, "loss": 31.2653, "step": 16738 }, { "epoch": 398.5492537313433, "grad_norm": 27.035037994384766, "learning_rate": 9.541666666666669e-06, "loss": 31.008, "step": 16739 }, { "epoch": 398.5731343283582, "grad_norm": 34.97123718261719, "learning_rate": 9.541071428571429e-06, "loss": 31.0956, "step": 16740 }, { "epoch": 398.5970149253731, "grad_norm": 31.80586814880371, "learning_rate": 9.54047619047619e-06, "loss": 32.1095, "step": 16741 }, { "epoch": 398.6208955223881, "grad_norm": 32.614200592041016, "learning_rate": 9.539880952380953e-06, "loss": 31.3034, "step": 16742 }, { "epoch": 398.644776119403, "grad_norm": 29.285017013549805, "learning_rate": 9.539285714285716e-06, "loss": 31.2842, "step": 16743 }, { "epoch": 398.6686567164179, "grad_norm": 32.6673583984375, "learning_rate": 9.538690476190478e-06, "loss": 30.9135, "step": 16744 }, { "epoch": 398.6925373134328, "grad_norm": 25.550617218017578, "learning_rate": 9.538095238095238e-06, "loss": 31.2939, "step": 16745 }, { "epoch": 398.7164179104478, "grad_norm": 29.780179977416992, "learning_rate": 9.537500000000001e-06, "loss": 29.9004, "step": 16746 }, { "epoch": 398.7402985074627, "grad_norm": 25.99339485168457, "learning_rate": 9.536904761904763e-06, "loss": 30.5079, "step": 16747 }, { "epoch": 398.7641791044776, "grad_norm": 34.089752197265625, "learning_rate": 9.536309523809525e-06, "loss": 30.3218, "step": 16748 }, { "epoch": 398.78805970149256, "grad_norm": 30.218477249145508, "learning_rate": 9.535714285714287e-06, "loss": 32.472, "step": 16749 }, { "epoch": 398.81194029850747, "grad_norm": 27.748638153076172, "learning_rate": 9.535119047619049e-06, "loss": 31.2098, "step": 16750 }, { "epoch": 398.8358208955224, "grad_norm": 25.21699333190918, "learning_rate": 9.53452380952381e-06, "loss": 30.4431, "step": 16751 }, { "epoch": 398.85970149253734, "grad_norm": 28.97269058227539, "learning_rate": 9.533928571428572e-06, "loss": 31.8221, "step": 16752 }, { "epoch": 398.88358208955225, "grad_norm": 24.254507064819336, "learning_rate": 9.533333333333334e-06, "loss": 30.3948, "step": 16753 }, { "epoch": 398.90746268656716, "grad_norm": 31.532323837280273, "learning_rate": 9.532738095238096e-06, "loss": 31.2338, "step": 16754 }, { "epoch": 398.93134328358207, "grad_norm": 24.351436614990234, "learning_rate": 9.532142857142858e-06, "loss": 30.3739, "step": 16755 }, { "epoch": 398.95522388059703, "grad_norm": 34.84754180908203, "learning_rate": 9.53154761904762e-06, "loss": 31.6995, "step": 16756 }, { "epoch": 398.97910447761194, "grad_norm": 23.07961082458496, "learning_rate": 9.530952380952381e-06, "loss": 30.4715, "step": 16757 }, { "epoch": 399.0, "grad_norm": 24.954120635986328, "learning_rate": 9.530357142857143e-06, "loss": 27.2571, "step": 16758 }, { "epoch": 399.0238805970149, "grad_norm": 27.035842895507812, "learning_rate": 9.529761904761905e-06, "loss": 31.2928, "step": 16759 }, { "epoch": 399.0477611940299, "grad_norm": 28.470870971679688, "learning_rate": 9.529166666666667e-06, "loss": 31.2636, "step": 16760 }, { "epoch": 399.0716417910448, "grad_norm": 21.631488800048828, "learning_rate": 9.528571428571429e-06, "loss": 30.8329, "step": 16761 }, { "epoch": 399.0955223880597, "grad_norm": 30.524328231811523, "learning_rate": 9.527976190476192e-06, "loss": 31.04, "step": 16762 }, { "epoch": 399.1194029850746, "grad_norm": 23.0170841217041, "learning_rate": 9.527380952380954e-06, "loss": 30.8095, "step": 16763 }, { "epoch": 399.14328358208957, "grad_norm": 32.84740447998047, "learning_rate": 9.526785714285714e-06, "loss": 31.5631, "step": 16764 }, { "epoch": 399.1671641791045, "grad_norm": 25.2762451171875, "learning_rate": 9.526190476190476e-06, "loss": 30.8093, "step": 16765 }, { "epoch": 399.1910447761194, "grad_norm": 31.601844787597656, "learning_rate": 9.52559523809524e-06, "loss": 30.4768, "step": 16766 }, { "epoch": 399.21492537313435, "grad_norm": 28.054853439331055, "learning_rate": 9.525000000000001e-06, "loss": 31.3206, "step": 16767 }, { "epoch": 399.23880597014926, "grad_norm": 25.569419860839844, "learning_rate": 9.524404761904763e-06, "loss": 31.7122, "step": 16768 }, { "epoch": 399.26268656716417, "grad_norm": 23.35809898376465, "learning_rate": 9.523809523809525e-06, "loss": 30.367, "step": 16769 }, { "epoch": 399.28656716417913, "grad_norm": 23.588653564453125, "learning_rate": 9.523214285714287e-06, "loss": 31.3669, "step": 16770 }, { "epoch": 399.31044776119404, "grad_norm": 23.405994415283203, "learning_rate": 9.522619047619048e-06, "loss": 31.4575, "step": 16771 }, { "epoch": 399.33432835820895, "grad_norm": 20.56930160522461, "learning_rate": 9.52202380952381e-06, "loss": 30.3676, "step": 16772 }, { "epoch": 399.35820895522386, "grad_norm": 18.158884048461914, "learning_rate": 9.521428571428572e-06, "loss": 30.0671, "step": 16773 }, { "epoch": 399.3820895522388, "grad_norm": 23.44738006591797, "learning_rate": 9.520833333333334e-06, "loss": 30.2954, "step": 16774 }, { "epoch": 399.40597014925373, "grad_norm": 19.364089965820312, "learning_rate": 9.520238095238096e-06, "loss": 31.8877, "step": 16775 }, { "epoch": 399.42985074626864, "grad_norm": 22.331945419311523, "learning_rate": 9.519642857142857e-06, "loss": 30.6507, "step": 16776 }, { "epoch": 399.4537313432836, "grad_norm": 19.487991333007812, "learning_rate": 9.51904761904762e-06, "loss": 31.6449, "step": 16777 }, { "epoch": 399.4776119402985, "grad_norm": 20.457239151000977, "learning_rate": 9.518452380952383e-06, "loss": 31.7808, "step": 16778 }, { "epoch": 399.5014925373134, "grad_norm": 15.00794792175293, "learning_rate": 9.517857142857143e-06, "loss": 29.7277, "step": 16779 }, { "epoch": 399.52537313432833, "grad_norm": 24.077810287475586, "learning_rate": 9.517261904761905e-06, "loss": 31.4422, "step": 16780 }, { "epoch": 399.5492537313433, "grad_norm": 18.673059463500977, "learning_rate": 9.516666666666668e-06, "loss": 30.5987, "step": 16781 }, { "epoch": 399.5731343283582, "grad_norm": 20.430227279663086, "learning_rate": 9.51607142857143e-06, "loss": 31.5846, "step": 16782 }, { "epoch": 399.5970149253731, "grad_norm": 19.923513412475586, "learning_rate": 9.515476190476192e-06, "loss": 31.11, "step": 16783 }, { "epoch": 399.6208955223881, "grad_norm": 23.15761375427246, "learning_rate": 9.514880952380952e-06, "loss": 30.6794, "step": 16784 }, { "epoch": 399.644776119403, "grad_norm": 20.51605987548828, "learning_rate": 9.514285714285715e-06, "loss": 31.3281, "step": 16785 }, { "epoch": 399.6686567164179, "grad_norm": 18.474712371826172, "learning_rate": 9.513690476190477e-06, "loss": 31.3219, "step": 16786 }, { "epoch": 399.6925373134328, "grad_norm": 20.799448013305664, "learning_rate": 9.513095238095239e-06, "loss": 31.7438, "step": 16787 }, { "epoch": 399.7164179104478, "grad_norm": 20.413267135620117, "learning_rate": 9.5125e-06, "loss": 31.2168, "step": 16788 }, { "epoch": 399.7402985074627, "grad_norm": 20.1457462310791, "learning_rate": 9.511904761904763e-06, "loss": 30.8774, "step": 16789 }, { "epoch": 399.7641791044776, "grad_norm": 20.93187713623047, "learning_rate": 9.511309523809524e-06, "loss": 31.4587, "step": 16790 }, { "epoch": 399.78805970149256, "grad_norm": 19.636737823486328, "learning_rate": 9.510714285714286e-06, "loss": 31.1715, "step": 16791 }, { "epoch": 399.81194029850747, "grad_norm": 19.83819580078125, "learning_rate": 9.510119047619048e-06, "loss": 31.0146, "step": 16792 }, { "epoch": 399.8358208955224, "grad_norm": 18.057538986206055, "learning_rate": 9.50952380952381e-06, "loss": 31.2029, "step": 16793 }, { "epoch": 399.85970149253734, "grad_norm": 25.070390701293945, "learning_rate": 9.508928571428572e-06, "loss": 32.3302, "step": 16794 }, { "epoch": 399.88358208955225, "grad_norm": 19.523630142211914, "learning_rate": 9.508333333333333e-06, "loss": 31.3145, "step": 16795 }, { "epoch": 399.90746268656716, "grad_norm": 19.136125564575195, "learning_rate": 9.507738095238095e-06, "loss": 30.6125, "step": 16796 }, { "epoch": 399.93134328358207, "grad_norm": 20.086763381958008, "learning_rate": 9.507142857142859e-06, "loss": 30.7412, "step": 16797 }, { "epoch": 399.95522388059703, "grad_norm": 24.598318099975586, "learning_rate": 9.50654761904762e-06, "loss": 30.579, "step": 16798 }, { "epoch": 399.97910447761194, "grad_norm": 19.404541015625, "learning_rate": 9.50595238095238e-06, "loss": 31.4261, "step": 16799 }, { "epoch": 400.0, "grad_norm": 17.034467697143555, "learning_rate": 9.505357142857144e-06, "loss": 26.7138, "step": 16800 }, { "epoch": 400.0, "step": 16800, "total_flos": 8.258106306923356e+17, "train_loss": 1.570451229867481, "train_runtime": 25616.4387, "train_samples_per_second": 83.571, "train_steps_per_second": 0.656 }, { "epoch": 400.0238805970149, "grad_norm": 19.236080169677734, "learning_rate": 1e-05, "loss": 30.7231, "step": 16801 }, { "epoch": 400.0477611940299, "grad_norm": Infinity, "learning_rate": 9.999433106575964e-06, "loss": 34.9673, "step": 16802 }, { "epoch": 400.0716417910448, "grad_norm": 311.71990966796875, "learning_rate": 9.999433106575964e-06, "loss": 37.6479, "step": 16803 }, { "epoch": 400.0955223880597, "grad_norm": 141.22789001464844, "learning_rate": 9.998866213151928e-06, "loss": 34.1772, "step": 16804 }, { "epoch": 400.1194029850746, "grad_norm": 84.35620880126953, "learning_rate": 9.998299319727893e-06, "loss": 35.0214, "step": 16805 }, { "epoch": 400.14328358208957, "grad_norm": 63.754234313964844, "learning_rate": 9.997732426303856e-06, "loss": 31.6235, "step": 16806 }, { "epoch": 400.1671641791045, "grad_norm": 46.571712493896484, "learning_rate": 9.997165532879819e-06, "loss": 31.1043, "step": 16807 }, { "epoch": 400.1910447761194, "grad_norm": 40.43476104736328, "learning_rate": 9.996598639455783e-06, "loss": 32.5885, "step": 16808 }, { "epoch": 400.21492537313435, "grad_norm": 42.0704345703125, "learning_rate": 9.996031746031746e-06, "loss": 31.767, "step": 16809 }, { "epoch": 400.23880597014926, "grad_norm": 36.093997955322266, "learning_rate": 9.99546485260771e-06, "loss": 32.3834, "step": 16810 }, { "epoch": 400.26268656716417, "grad_norm": 29.10378074645996, "learning_rate": 9.994897959183675e-06, "loss": 30.6534, "step": 16811 }, { "epoch": 400.28656716417913, "grad_norm": 27.766040802001953, "learning_rate": 9.994331065759638e-06, "loss": 30.9044, "step": 16812 }, { "epoch": 400.31044776119404, "grad_norm": 28.87425994873047, "learning_rate": 9.993764172335601e-06, "loss": 30.3871, "step": 16813 }, { "epoch": 400.33432835820895, "grad_norm": 33.46707534790039, "learning_rate": 9.993197278911566e-06, "loss": 31.9699, "step": 16814 }, { "epoch": 400.35820895522386, "grad_norm": 23.63628387451172, "learning_rate": 9.992630385487529e-06, "loss": 31.7989, "step": 16815 }, { "epoch": 400.3820895522388, "grad_norm": 26.49193572998047, "learning_rate": 9.992063492063493e-06, "loss": 30.8326, "step": 16816 }, { "epoch": 400.40597014925373, "grad_norm": 31.20513916015625, "learning_rate": 9.991496598639456e-06, "loss": 31.3319, "step": 16817 }, { "epoch": 400.42985074626864, "grad_norm": 26.55478286743164, "learning_rate": 9.99092970521542e-06, "loss": 31.0836, "step": 16818 }, { "epoch": 400.4537313432836, "grad_norm": 20.33902931213379, "learning_rate": 9.990362811791384e-06, "loss": 32.284, "step": 16819 }, { "epoch": 400.4776119402985, "grad_norm": 27.636608123779297, "learning_rate": 9.989795918367348e-06, "loss": 30.9584, "step": 16820 }, { "epoch": 400.5014925373134, "grad_norm": 25.69060516357422, "learning_rate": 9.989229024943311e-06, "loss": 31.1324, "step": 16821 }, { "epoch": 400.52537313432833, "grad_norm": 18.75033950805664, "learning_rate": 9.988662131519276e-06, "loss": 30.9402, "step": 16822 }, { "epoch": 400.5492537313433, "grad_norm": 23.087989807128906, "learning_rate": 9.988095238095239e-06, "loss": 31.4712, "step": 16823 }, { "epoch": 400.5731343283582, "grad_norm": 20.75522232055664, "learning_rate": 9.987528344671202e-06, "loss": 31.3741, "step": 16824 }, { "epoch": 400.5970149253731, "grad_norm": 17.48866844177246, "learning_rate": 9.986961451247166e-06, "loss": 30.8529, "step": 16825 }, { "epoch": 400.6208955223881, "grad_norm": 18.83658218383789, "learning_rate": 9.98639455782313e-06, "loss": 31.9591, "step": 16826 }, { "epoch": 400.644776119403, "grad_norm": 23.694780349731445, "learning_rate": 9.985827664399094e-06, "loss": 30.2027, "step": 16827 }, { "epoch": 400.6686567164179, "grad_norm": 22.643640518188477, "learning_rate": 9.985260770975057e-06, "loss": 29.3691, "step": 16828 }, { "epoch": 400.6925373134328, "grad_norm": 17.421306610107422, "learning_rate": 9.984693877551021e-06, "loss": 31.3057, "step": 16829 }, { "epoch": 400.7164179104478, "grad_norm": 21.523630142211914, "learning_rate": 9.984126984126986e-06, "loss": 31.839, "step": 16830 }, { "epoch": 400.7402985074627, "grad_norm": 21.368457794189453, "learning_rate": 9.983560090702949e-06, "loss": 31.2177, "step": 16831 }, { "epoch": 400.7641791044776, "grad_norm": 21.82390594482422, "learning_rate": 9.982993197278913e-06, "loss": 32.4152, "step": 16832 }, { "epoch": 400.78805970149256, "grad_norm": 17.901853561401367, "learning_rate": 9.982426303854876e-06, "loss": 30.6141, "step": 16833 }, { "epoch": 400.81194029850747, "grad_norm": 24.431535720825195, "learning_rate": 9.981859410430839e-06, "loss": 32.3977, "step": 16834 }, { "epoch": 400.8358208955224, "grad_norm": NaN, "learning_rate": 9.981292517006804e-06, "loss": 56.8003, "step": 16835 }, { "epoch": 400.85970149253734, "grad_norm": 17.254962921142578, "learning_rate": 9.981292517006804e-06, "loss": 30.2194, "step": 16836 }, { "epoch": 400.88358208955225, "grad_norm": 22.819360733032227, "learning_rate": 9.980725623582768e-06, "loss": 31.6324, "step": 16837 }, { "epoch": 400.90746268656716, "grad_norm": 19.341094970703125, "learning_rate": 9.980158730158731e-06, "loss": 30.3093, "step": 16838 }, { "epoch": 400.93134328358207, "grad_norm": 21.019248962402344, "learning_rate": 9.979591836734694e-06, "loss": 32.384, "step": 16839 }, { "epoch": 400.95522388059703, "grad_norm": 18.01608657836914, "learning_rate": 9.979024943310659e-06, "loss": 31.0385, "step": 16840 }, { "epoch": 400.97910447761194, "grad_norm": 21.27842903137207, "learning_rate": 9.978458049886622e-06, "loss": 30.5699, "step": 16841 }, { "epoch": 401.0, "grad_norm": 16.495031356811523, "learning_rate": 9.977891156462586e-06, "loss": 25.8223, "step": 16842 }, { "epoch": 401.0238805970149, "grad_norm": 18.222089767456055, "learning_rate": 9.977324263038549e-06, "loss": 32.0258, "step": 16843 }, { "epoch": 401.0477611940299, "grad_norm": 21.727060317993164, "learning_rate": 9.976757369614514e-06, "loss": 31.3581, "step": 16844 }, { "epoch": 401.0716417910448, "grad_norm": 20.427047729492188, "learning_rate": 9.976190476190477e-06, "loss": 30.6954, "step": 16845 }, { "epoch": 401.0955223880597, "grad_norm": 16.091264724731445, "learning_rate": 9.975623582766441e-06, "loss": 30.2019, "step": 16846 }, { "epoch": 401.1194029850746, "grad_norm": 16.839252471923828, "learning_rate": 9.975056689342404e-06, "loss": 30.8647, "step": 16847 }, { "epoch": 401.14328358208957, "grad_norm": 18.731321334838867, "learning_rate": 9.974489795918369e-06, "loss": 31.5526, "step": 16848 }, { "epoch": 401.1671641791045, "grad_norm": 17.480756759643555, "learning_rate": 9.973922902494332e-06, "loss": 30.8418, "step": 16849 }, { "epoch": 401.1910447761194, "grad_norm": 22.90028953552246, "learning_rate": 9.973356009070294e-06, "loss": 31.4254, "step": 16850 }, { "epoch": 401.21492537313435, "grad_norm": 17.838167190551758, "learning_rate": 9.972789115646259e-06, "loss": 31.2838, "step": 16851 }, { "epoch": 401.23880597014926, "grad_norm": 17.68524742126465, "learning_rate": 9.972222222222224e-06, "loss": 32.1313, "step": 16852 }, { "epoch": 401.26268656716417, "grad_norm": 15.703474998474121, "learning_rate": 9.971655328798187e-06, "loss": 31.302, "step": 16853 }, { "epoch": 401.28656716417913, "grad_norm": 16.799213409423828, "learning_rate": 9.97108843537415e-06, "loss": 30.9738, "step": 16854 }, { "epoch": 401.31044776119404, "grad_norm": 16.973718643188477, "learning_rate": 9.970521541950114e-06, "loss": 30.7046, "step": 16855 }, { "epoch": 401.33432835820895, "grad_norm": 17.80534553527832, "learning_rate": 9.969954648526079e-06, "loss": 31.1274, "step": 16856 }, { "epoch": 401.35820895522386, "grad_norm": 23.12386703491211, "learning_rate": 9.969387755102042e-06, "loss": 31.0468, "step": 16857 }, { "epoch": 401.3820895522388, "grad_norm": 17.315753936767578, "learning_rate": 9.968820861678006e-06, "loss": 30.9335, "step": 16858 }, { "epoch": 401.40597014925373, "grad_norm": 15.378706932067871, "learning_rate": 9.968253968253969e-06, "loss": 29.2263, "step": 16859 }, { "epoch": 401.42985074626864, "grad_norm": 17.434106826782227, "learning_rate": 9.967687074829932e-06, "loss": 30.0167, "step": 16860 }, { "epoch": 401.4537313432836, "grad_norm": 18.828147888183594, "learning_rate": 9.967120181405897e-06, "loss": 29.8575, "step": 16861 }, { "epoch": 401.4776119402985, "grad_norm": 19.391490936279297, "learning_rate": 9.966553287981861e-06, "loss": 31.5681, "step": 16862 }, { "epoch": 401.5014925373134, "grad_norm": 20.753772735595703, "learning_rate": 9.965986394557824e-06, "loss": 31.2438, "step": 16863 }, { "epoch": 401.52537313432833, "grad_norm": 19.081161499023438, "learning_rate": 9.965419501133787e-06, "loss": 30.9956, "step": 16864 }, { "epoch": 401.5492537313433, "grad_norm": 15.791476249694824, "learning_rate": 9.964852607709752e-06, "loss": 31.0609, "step": 16865 }, { "epoch": 401.5731343283582, "grad_norm": 26.492345809936523, "learning_rate": 9.964285714285714e-06, "loss": 30.9209, "step": 16866 }, { "epoch": 401.5970149253731, "grad_norm": 21.19564437866211, "learning_rate": 9.963718820861679e-06, "loss": 31.4632, "step": 16867 }, { "epoch": 401.6208955223881, "grad_norm": 22.00580596923828, "learning_rate": 9.963151927437642e-06, "loss": 31.5752, "step": 16868 }, { "epoch": 401.644776119403, "grad_norm": 24.39080810546875, "learning_rate": 9.962585034013607e-06, "loss": 31.3848, "step": 16869 }, { "epoch": 401.6686567164179, "grad_norm": 25.685808181762695, "learning_rate": 9.96201814058957e-06, "loss": 31.0735, "step": 16870 }, { "epoch": 401.6925373134328, "grad_norm": 17.7955379486084, "learning_rate": 9.961451247165534e-06, "loss": 31.1636, "step": 16871 }, { "epoch": 401.7164179104478, "grad_norm": 26.66135597229004, "learning_rate": 9.960884353741499e-06, "loss": 31.876, "step": 16872 }, { "epoch": 401.7402985074627, "grad_norm": 21.049110412597656, "learning_rate": 9.960317460317462e-06, "loss": 30.4948, "step": 16873 }, { "epoch": 401.7641791044776, "grad_norm": 18.58393096923828, "learning_rate": 9.959750566893424e-06, "loss": 30.592, "step": 16874 }, { "epoch": 401.78805970149256, "grad_norm": 22.98539161682129, "learning_rate": 9.959183673469387e-06, "loss": 29.9171, "step": 16875 }, { "epoch": 401.81194029850747, "grad_norm": 20.962472915649414, "learning_rate": 9.958616780045352e-06, "loss": 31.3894, "step": 16876 }, { "epoch": 401.8358208955224, "grad_norm": 18.580272674560547, "learning_rate": 9.958049886621317e-06, "loss": 31.0705, "step": 16877 }, { "epoch": 401.85970149253734, "grad_norm": 16.85481071472168, "learning_rate": 9.95748299319728e-06, "loss": 30.511, "step": 16878 }, { "epoch": 401.88358208955225, "grad_norm": 18.063079833984375, "learning_rate": 9.956916099773244e-06, "loss": 31.5687, "step": 16879 }, { "epoch": 401.90746268656716, "grad_norm": 16.066482543945312, "learning_rate": 9.956349206349207e-06, "loss": 30.6445, "step": 16880 }, { "epoch": 401.93134328358207, "grad_norm": 18.891555786132812, "learning_rate": 9.955782312925172e-06, "loss": 31.0396, "step": 16881 }, { "epoch": 401.95522388059703, "grad_norm": 19.531452178955078, "learning_rate": 9.955215419501134e-06, "loss": 31.6228, "step": 16882 }, { "epoch": 401.97910447761194, "grad_norm": 17.721233367919922, "learning_rate": 9.954648526077099e-06, "loss": 31.2063, "step": 16883 }, { "epoch": 402.0, "grad_norm": 14.193357467651367, "learning_rate": 9.954081632653062e-06, "loss": 27.6322, "step": 16884 }, { "epoch": 402.0238805970149, "grad_norm": 17.433488845825195, "learning_rate": 9.953514739229025e-06, "loss": 31.3941, "step": 16885 }, { "epoch": 402.0477611940299, "grad_norm": 18.68000602722168, "learning_rate": 9.95294784580499e-06, "loss": 31.8399, "step": 16886 }, { "epoch": 402.0716417910448, "grad_norm": 26.427698135375977, "learning_rate": 9.952380952380954e-06, "loss": 30.4522, "step": 16887 }, { "epoch": 402.0955223880597, "grad_norm": 22.38204002380371, "learning_rate": 9.951814058956917e-06, "loss": 30.7499, "step": 16888 }, { "epoch": 402.1194029850746, "grad_norm": 16.085290908813477, "learning_rate": 9.95124716553288e-06, "loss": 30.6177, "step": 16889 }, { "epoch": 402.14328358208957, "grad_norm": 34.455780029296875, "learning_rate": 9.950680272108844e-06, "loss": 31.4126, "step": 16890 }, { "epoch": 402.1671641791045, "grad_norm": 23.277976989746094, "learning_rate": 9.950113378684807e-06, "loss": 31.2081, "step": 16891 }, { "epoch": 402.1910447761194, "grad_norm": 29.880111694335938, "learning_rate": 9.949546485260772e-06, "loss": 31.1892, "step": 16892 }, { "epoch": 402.21492537313435, "grad_norm": 25.491518020629883, "learning_rate": 9.948979591836737e-06, "loss": 30.7009, "step": 16893 }, { "epoch": 402.23880597014926, "grad_norm": 22.70813751220703, "learning_rate": 9.9484126984127e-06, "loss": 30.9564, "step": 16894 }, { "epoch": 402.26268656716417, "grad_norm": 32.41717529296875, "learning_rate": 9.947845804988662e-06, "loss": 31.0667, "step": 16895 }, { "epoch": 402.28656716417913, "grad_norm": 18.49527359008789, "learning_rate": 9.947278911564627e-06, "loss": 31.9441, "step": 16896 }, { "epoch": 402.31044776119404, "grad_norm": 35.96527099609375, "learning_rate": 9.946712018140592e-06, "loss": 30.9551, "step": 16897 }, { "epoch": 402.33432835820895, "grad_norm": 24.00267791748047, "learning_rate": 9.946145124716554e-06, "loss": 32.069, "step": 16898 }, { "epoch": 402.35820895522386, "grad_norm": 27.444225311279297, "learning_rate": 9.945578231292517e-06, "loss": 30.53, "step": 16899 }, { "epoch": 402.3820895522388, "grad_norm": 26.21395492553711, "learning_rate": 9.945011337868482e-06, "loss": 30.4552, "step": 16900 }, { "epoch": 402.40597014925373, "grad_norm": 21.139373779296875, "learning_rate": 9.944444444444445e-06, "loss": 31.3025, "step": 16901 }, { "epoch": 402.42985074626864, "grad_norm": 33.18841552734375, "learning_rate": 9.94387755102041e-06, "loss": 29.6865, "step": 16902 }, { "epoch": 402.4537313432836, "grad_norm": 21.520919799804688, "learning_rate": 9.943310657596372e-06, "loss": 30.8559, "step": 16903 }, { "epoch": 402.4776119402985, "grad_norm": 37.26947784423828, "learning_rate": 9.942743764172337e-06, "loss": 30.3123, "step": 16904 }, { "epoch": 402.5014925373134, "grad_norm": 28.98073387145996, "learning_rate": 9.9421768707483e-06, "loss": 30.9669, "step": 16905 }, { "epoch": 402.52537313432833, "grad_norm": 34.28809356689453, "learning_rate": 9.941609977324263e-06, "loss": 31.4575, "step": 16906 }, { "epoch": 402.5492537313433, "grad_norm": 25.58087158203125, "learning_rate": 9.941043083900227e-06, "loss": 31.7515, "step": 16907 }, { "epoch": 402.5731343283582, "grad_norm": 30.296483993530273, "learning_rate": 9.940476190476192e-06, "loss": 31.4462, "step": 16908 }, { "epoch": 402.5970149253731, "grad_norm": 20.742158889770508, "learning_rate": 9.939909297052155e-06, "loss": 29.5965, "step": 16909 }, { "epoch": 402.6208955223881, "grad_norm": 32.30087661743164, "learning_rate": 9.939342403628118e-06, "loss": 31.6445, "step": 16910 }, { "epoch": 402.644776119403, "grad_norm": 23.726449966430664, "learning_rate": 9.938775510204082e-06, "loss": 30.9719, "step": 16911 }, { "epoch": 402.6686567164179, "grad_norm": 28.384923934936523, "learning_rate": 9.938208616780047e-06, "loss": 31.3353, "step": 16912 }, { "epoch": 402.6925373134328, "grad_norm": 25.4388370513916, "learning_rate": 9.93764172335601e-06, "loss": 30.8237, "step": 16913 }, { "epoch": 402.7164179104478, "grad_norm": 27.198352813720703, "learning_rate": 9.937074829931974e-06, "loss": 31.0413, "step": 16914 }, { "epoch": 402.7402985074627, "grad_norm": 25.418916702270508, "learning_rate": 9.936507936507937e-06, "loss": 30.7082, "step": 16915 }, { "epoch": 402.7641791044776, "grad_norm": 28.595592498779297, "learning_rate": 9.9359410430839e-06, "loss": 31.3851, "step": 16916 }, { "epoch": 402.78805970149256, "grad_norm": 24.97838020324707, "learning_rate": 9.935374149659865e-06, "loss": 31.1574, "step": 16917 }, { "epoch": 402.81194029850747, "grad_norm": 27.653484344482422, "learning_rate": 9.93480725623583e-06, "loss": 31.1965, "step": 16918 }, { "epoch": 402.8358208955224, "grad_norm": 23.469223022460938, "learning_rate": 9.934240362811792e-06, "loss": 31.0656, "step": 16919 }, { "epoch": 402.85970149253734, "grad_norm": 27.161710739135742, "learning_rate": 9.933673469387755e-06, "loss": 31.3268, "step": 16920 }, { "epoch": 402.88358208955225, "grad_norm": 23.48308753967285, "learning_rate": 9.93310657596372e-06, "loss": 31.0566, "step": 16921 }, { "epoch": 402.90746268656716, "grad_norm": 23.823659896850586, "learning_rate": 9.932539682539684e-06, "loss": 30.3514, "step": 16922 }, { "epoch": 402.93134328358207, "grad_norm": 20.73362159729004, "learning_rate": 9.931972789115647e-06, "loss": 29.3461, "step": 16923 }, { "epoch": 402.95522388059703, "grad_norm": 21.970233917236328, "learning_rate": 9.93140589569161e-06, "loss": 31.0073, "step": 16924 }, { "epoch": 402.97910447761194, "grad_norm": NaN, "learning_rate": 9.930839002267575e-06, "loss": 51.1836, "step": 16925 }, { "epoch": 403.0, "grad_norm": 18.906526565551758, "learning_rate": 9.930839002267575e-06, "loss": 27.3897, "step": 16926 }, { "epoch": 403.0238805970149, "grad_norm": 22.018394470214844, "learning_rate": 9.930272108843538e-06, "loss": 29.3437, "step": 16927 }, { "epoch": 403.0477611940299, "grad_norm": 21.189476013183594, "learning_rate": 9.929705215419502e-06, "loss": 31.4675, "step": 16928 }, { "epoch": 403.0716417910448, "grad_norm": 22.949079513549805, "learning_rate": 9.929138321995465e-06, "loss": 31.5757, "step": 16929 }, { "epoch": 403.0955223880597, "grad_norm": 20.074411392211914, "learning_rate": 9.92857142857143e-06, "loss": 30.3693, "step": 16930 }, { "epoch": 403.1194029850746, "grad_norm": 22.19842529296875, "learning_rate": 9.928004535147393e-06, "loss": 31.1422, "step": 16931 }, { "epoch": 403.14328358208957, "grad_norm": 19.386714935302734, "learning_rate": 9.927437641723356e-06, "loss": 29.6273, "step": 16932 }, { "epoch": 403.1671641791045, "grad_norm": 22.950700759887695, "learning_rate": 9.92687074829932e-06, "loss": 30.1628, "step": 16933 }, { "epoch": 403.1910447761194, "grad_norm": 18.49382209777832, "learning_rate": 9.926303854875285e-06, "loss": 31.9745, "step": 16934 }, { "epoch": 403.21492537313435, "grad_norm": 25.086666107177734, "learning_rate": 9.925736961451248e-06, "loss": 30.9489, "step": 16935 }, { "epoch": 403.23880597014926, "grad_norm": 21.343698501586914, "learning_rate": 9.92517006802721e-06, "loss": 31.0264, "step": 16936 }, { "epoch": 403.26268656716417, "grad_norm": 23.167051315307617, "learning_rate": 9.924603174603175e-06, "loss": 30.348, "step": 16937 }, { "epoch": 403.28656716417913, "grad_norm": 21.987293243408203, "learning_rate": 9.92403628117914e-06, "loss": 32.2085, "step": 16938 }, { "epoch": 403.31044776119404, "grad_norm": 21.484840393066406, "learning_rate": 9.923469387755103e-06, "loss": 32.1084, "step": 16939 }, { "epoch": 403.33432835820895, "grad_norm": 22.352279663085938, "learning_rate": 9.922902494331067e-06, "loss": 31.2459, "step": 16940 }, { "epoch": 403.35820895522386, "grad_norm": 18.30046844482422, "learning_rate": 9.92233560090703e-06, "loss": 30.7262, "step": 16941 }, { "epoch": 403.3820895522388, "grad_norm": 21.983285903930664, "learning_rate": 9.921768707482993e-06, "loss": 31.6438, "step": 16942 }, { "epoch": 403.40597014925373, "grad_norm": 18.815380096435547, "learning_rate": 9.921201814058958e-06, "loss": 30.3058, "step": 16943 }, { "epoch": 403.42985074626864, "grad_norm": 17.1711368560791, "learning_rate": 9.920634920634922e-06, "loss": 31.1327, "step": 16944 }, { "epoch": 403.4537313432836, "grad_norm": 18.554462432861328, "learning_rate": 9.920068027210885e-06, "loss": 30.6388, "step": 16945 }, { "epoch": 403.4776119402985, "grad_norm": 17.991119384765625, "learning_rate": 9.919501133786848e-06, "loss": 31.1263, "step": 16946 }, { "epoch": 403.5014925373134, "grad_norm": 17.035293579101562, "learning_rate": 9.918934240362813e-06, "loss": 30.9584, "step": 16947 }, { "epoch": 403.52537313432833, "grad_norm": 17.614078521728516, "learning_rate": 9.918367346938776e-06, "loss": 31.2794, "step": 16948 }, { "epoch": 403.5492537313433, "grad_norm": 19.074827194213867, "learning_rate": 9.91780045351474e-06, "loss": 31.2004, "step": 16949 }, { "epoch": 403.5731343283582, "grad_norm": 20.858890533447266, "learning_rate": 9.917233560090703e-06, "loss": 31.1087, "step": 16950 }, { "epoch": 403.5970149253731, "grad_norm": 17.543880462646484, "learning_rate": 9.916666666666668e-06, "loss": 31.233, "step": 16951 }, { "epoch": 403.6208955223881, "grad_norm": 16.920913696289062, "learning_rate": 9.91609977324263e-06, "loss": 31.0293, "step": 16952 }, { "epoch": 403.644776119403, "grad_norm": 19.769798278808594, "learning_rate": 9.915532879818595e-06, "loss": 31.1607, "step": 16953 }, { "epoch": 403.6686567164179, "grad_norm": 20.002456665039062, "learning_rate": 9.91496598639456e-06, "loss": 30.9741, "step": 16954 }, { "epoch": 403.6925373134328, "grad_norm": 17.78573989868164, "learning_rate": 9.914399092970523e-06, "loss": 29.1995, "step": 16955 }, { "epoch": 403.7164179104478, "grad_norm": 15.988506317138672, "learning_rate": 9.913832199546486e-06, "loss": 30.9922, "step": 16956 }, { "epoch": 403.7402985074627, "grad_norm": 16.265708923339844, "learning_rate": 9.913265306122449e-06, "loss": 32.3465, "step": 16957 }, { "epoch": 403.7641791044776, "grad_norm": 18.048246383666992, "learning_rate": 9.912698412698413e-06, "loss": 31.4117, "step": 16958 }, { "epoch": 403.78805970149256, "grad_norm": 27.613380432128906, "learning_rate": 9.912131519274378e-06, "loss": 30.1975, "step": 16959 }, { "epoch": 403.81194029850747, "grad_norm": 21.412445068359375, "learning_rate": 9.91156462585034e-06, "loss": 31.1824, "step": 16960 }, { "epoch": 403.8358208955224, "grad_norm": 15.627758026123047, "learning_rate": 9.910997732426305e-06, "loss": 29.8711, "step": 16961 }, { "epoch": 403.85970149253734, "grad_norm": 17.799707412719727, "learning_rate": 9.910430839002268e-06, "loss": 31.122, "step": 16962 }, { "epoch": 403.88358208955225, "grad_norm": 24.470787048339844, "learning_rate": 9.909863945578233e-06, "loss": 30.9715, "step": 16963 }, { "epoch": 403.90746268656716, "grad_norm": 23.644081115722656, "learning_rate": 9.909297052154196e-06, "loss": 29.733, "step": 16964 }, { "epoch": 403.93134328358207, "grad_norm": 17.018402099609375, "learning_rate": 9.90873015873016e-06, "loss": 31.1744, "step": 16965 }, { "epoch": 403.95522388059703, "grad_norm": 17.959978103637695, "learning_rate": 9.908163265306123e-06, "loss": 31.198, "step": 16966 }, { "epoch": 403.97910447761194, "grad_norm": 15.238065719604492, "learning_rate": 9.907596371882086e-06, "loss": 31.2774, "step": 16967 }, { "epoch": 404.0, "grad_norm": NaN, "learning_rate": 9.90702947845805e-06, "loss": 27.5237, "step": 16968 }, { "epoch": 404.0238805970149, "grad_norm": 21.82721710205078, "learning_rate": 9.90702947845805e-06, "loss": 31.1855, "step": 16969 }, { "epoch": 404.0477611940299, "grad_norm": 22.108537673950195, "learning_rate": 9.906462585034015e-06, "loss": 29.9266, "step": 16970 }, { "epoch": 404.0716417910448, "grad_norm": 18.823047637939453, "learning_rate": 9.905895691609978e-06, "loss": 31.2415, "step": 16971 }, { "epoch": 404.0955223880597, "grad_norm": 19.243526458740234, "learning_rate": 9.905328798185941e-06, "loss": 29.7391, "step": 16972 }, { "epoch": 404.1194029850746, "grad_norm": 18.996889114379883, "learning_rate": 9.904761904761906e-06, "loss": 31.3894, "step": 16973 }, { "epoch": 404.14328358208957, "grad_norm": NaN, "learning_rate": 9.904195011337869e-06, "loss": 31.1024, "step": 16974 }, { "epoch": 404.1671641791045, "grad_norm": 21.685443878173828, "learning_rate": 9.904195011337869e-06, "loss": 31.273, "step": 16975 }, { "epoch": 404.1910447761194, "grad_norm": 17.583675384521484, "learning_rate": 9.903628117913833e-06, "loss": 31.0446, "step": 16976 }, { "epoch": 404.21492537313435, "grad_norm": 22.83050537109375, "learning_rate": 9.903061224489798e-06, "loss": 30.3282, "step": 16977 }, { "epoch": 404.23880597014926, "grad_norm": NaN, "learning_rate": 9.90249433106576e-06, "loss": 42.5643, "step": 16978 }, { "epoch": 404.26268656716417, "grad_norm": 25.750978469848633, "learning_rate": 9.90249433106576e-06, "loss": 31.4035, "step": 16979 }, { "epoch": 404.28656716417913, "grad_norm": 18.988866806030273, "learning_rate": 9.901927437641724e-06, "loss": 31.6592, "step": 16980 }, { "epoch": 404.31044776119404, "grad_norm": 17.866527557373047, "learning_rate": 9.901360544217688e-06, "loss": 29.8526, "step": 16981 }, { "epoch": 404.33432835820895, "grad_norm": 17.667320251464844, "learning_rate": 9.900793650793653e-06, "loss": 30.7798, "step": 16982 }, { "epoch": 404.35820895522386, "grad_norm": 21.57267951965332, "learning_rate": 9.900226757369616e-06, "loss": 31.132, "step": 16983 }, { "epoch": 404.3820895522388, "grad_norm": 21.000722885131836, "learning_rate": 9.899659863945579e-06, "loss": 31.3184, "step": 16984 }, { "epoch": 404.40597014925373, "grad_norm": 17.419523239135742, "learning_rate": 9.899092970521543e-06, "loss": 31.3596, "step": 16985 }, { "epoch": 404.42985074626864, "grad_norm": 18.504884719848633, "learning_rate": 9.898526077097506e-06, "loss": 30.8438, "step": 16986 }, { "epoch": 404.4537313432836, "grad_norm": 18.881832122802734, "learning_rate": 9.89795918367347e-06, "loss": 30.2917, "step": 16987 }, { "epoch": 404.4776119402985, "grad_norm": 14.251657485961914, "learning_rate": 9.897392290249433e-06, "loss": 30.8155, "step": 16988 }, { "epoch": 404.5014925373134, "grad_norm": 19.814865112304688, "learning_rate": 9.896825396825398e-06, "loss": 31.6332, "step": 16989 }, { "epoch": 404.52537313432833, "grad_norm": 19.100130081176758, "learning_rate": 9.896258503401361e-06, "loss": 30.5632, "step": 16990 }, { "epoch": 404.5492537313433, "grad_norm": 22.569988250732422, "learning_rate": 9.895691609977326e-06, "loss": 30.6678, "step": 16991 }, { "epoch": 404.5731343283582, "grad_norm": 22.602933883666992, "learning_rate": 9.895124716553288e-06, "loss": 31.5504, "step": 16992 }, { "epoch": 404.5970149253731, "grad_norm": 18.80979347229004, "learning_rate": 9.894557823129253e-06, "loss": 31.4335, "step": 16993 }, { "epoch": 404.6208955223881, "grad_norm": 17.524200439453125, "learning_rate": 9.893990929705216e-06, "loss": 31.1216, "step": 16994 }, { "epoch": 404.644776119403, "grad_norm": 16.63074493408203, "learning_rate": 9.893424036281179e-06, "loss": 30.694, "step": 16995 }, { "epoch": 404.6686567164179, "grad_norm": 17.04694366455078, "learning_rate": 9.892857142857143e-06, "loss": 31.897, "step": 16996 }, { "epoch": 404.6925373134328, "grad_norm": 16.07754135131836, "learning_rate": 9.892290249433108e-06, "loss": 31.4077, "step": 16997 }, { "epoch": 404.7164179104478, "grad_norm": 17.20372200012207, "learning_rate": 9.891723356009071e-06, "loss": 30.5793, "step": 16998 }, { "epoch": 404.7402985074627, "grad_norm": 18.453689575195312, "learning_rate": 9.891156462585036e-06, "loss": 30.3079, "step": 16999 }, { "epoch": 404.7641791044776, "grad_norm": 17.784910202026367, "learning_rate": 9.890589569160998e-06, "loss": 30.5712, "step": 17000 }, { "epoch": 404.78805970149256, "grad_norm": 19.61137580871582, "learning_rate": 9.890022675736961e-06, "loss": 30.6252, "step": 17001 }, { "epoch": 404.81194029850747, "grad_norm": 19.092275619506836, "learning_rate": 9.889455782312926e-06, "loss": 30.228, "step": 17002 }, { "epoch": 404.8358208955224, "grad_norm": 22.4578857421875, "learning_rate": 9.88888888888889e-06, "loss": 29.8657, "step": 17003 }, { "epoch": 404.85970149253734, "grad_norm": 21.06572723388672, "learning_rate": 9.888321995464853e-06, "loss": 30.2763, "step": 17004 }, { "epoch": 404.88358208955225, "grad_norm": 22.17323112487793, "learning_rate": 9.887755102040816e-06, "loss": 31.2704, "step": 17005 }, { "epoch": 404.90746268656716, "grad_norm": 18.96662139892578, "learning_rate": 9.887188208616781e-06, "loss": 30.9814, "step": 17006 }, { "epoch": 404.93134328358207, "grad_norm": 21.42250633239746, "learning_rate": 9.886621315192746e-06, "loss": 31.4689, "step": 17007 }, { "epoch": 404.95522388059703, "grad_norm": 23.838497161865234, "learning_rate": 9.886054421768708e-06, "loss": 31.8555, "step": 17008 }, { "epoch": 404.97910447761194, "grad_norm": 19.52179718017578, "learning_rate": 9.885487528344671e-06, "loss": 31.0752, "step": 17009 }, { "epoch": 405.0, "grad_norm": 17.59729766845703, "learning_rate": 9.884920634920636e-06, "loss": 26.1024, "step": 17010 }, { "epoch": 405.0238805970149, "grad_norm": 18.205644607543945, "learning_rate": 9.884353741496599e-06, "loss": 30.9611, "step": 17011 }, { "epoch": 405.0477611940299, "grad_norm": NaN, "learning_rate": 9.883786848072563e-06, "loss": 45.3269, "step": 17012 }, { "epoch": 405.0716417910448, "grad_norm": 19.729263305664062, "learning_rate": 9.883786848072563e-06, "loss": 31.0441, "step": 17013 }, { "epoch": 405.0955223880597, "grad_norm": 19.006351470947266, "learning_rate": 9.883219954648526e-06, "loss": 31.161, "step": 17014 }, { "epoch": 405.1194029850746, "grad_norm": 18.71268653869629, "learning_rate": 9.882653061224491e-06, "loss": 32.3478, "step": 17015 }, { "epoch": 405.14328358208957, "grad_norm": 18.07982063293457, "learning_rate": 9.882086167800454e-06, "loss": 32.0008, "step": 17016 }, { "epoch": 405.1671641791045, "grad_norm": 23.311058044433594, "learning_rate": 9.881519274376418e-06, "loss": 29.8327, "step": 17017 }, { "epoch": 405.1910447761194, "grad_norm": 19.43598175048828, "learning_rate": 9.880952380952381e-06, "loss": 30.2444, "step": 17018 }, { "epoch": 405.21492537313435, "grad_norm": 20.689617156982422, "learning_rate": 9.880385487528346e-06, "loss": 30.5942, "step": 17019 }, { "epoch": 405.23880597014926, "grad_norm": 16.17937469482422, "learning_rate": 9.879818594104309e-06, "loss": 30.325, "step": 17020 }, { "epoch": 405.26268656716417, "grad_norm": 21.156057357788086, "learning_rate": 9.879251700680272e-06, "loss": 32.2917, "step": 17021 }, { "epoch": 405.28656716417913, "grad_norm": 22.3360595703125, "learning_rate": 9.878684807256236e-06, "loss": 31.0669, "step": 17022 }, { "epoch": 405.31044776119404, "grad_norm": 19.197404861450195, "learning_rate": 9.878117913832201e-06, "loss": 31.7879, "step": 17023 }, { "epoch": 405.33432835820895, "grad_norm": 19.768911361694336, "learning_rate": 9.877551020408164e-06, "loss": 30.5908, "step": 17024 }, { "epoch": 405.35820895522386, "grad_norm": 26.982616424560547, "learning_rate": 9.876984126984128e-06, "loss": 31.0877, "step": 17025 }, { "epoch": 405.3820895522388, "grad_norm": 19.847026824951172, "learning_rate": 9.876417233560091e-06, "loss": 30.4891, "step": 17026 }, { "epoch": 405.40597014925373, "grad_norm": 15.999998092651367, "learning_rate": 9.875850340136054e-06, "loss": 31.2124, "step": 17027 }, { "epoch": 405.42985074626864, "grad_norm": 15.895896911621094, "learning_rate": 9.875283446712019e-06, "loss": 30.2103, "step": 17028 }, { "epoch": 405.4537313432836, "grad_norm": 15.329496383666992, "learning_rate": 9.874716553287983e-06, "loss": 30.0036, "step": 17029 }, { "epoch": 405.4776119402985, "grad_norm": 16.12322235107422, "learning_rate": 9.874149659863946e-06, "loss": 29.7037, "step": 17030 }, { "epoch": 405.5014925373134, "grad_norm": 16.98004722595215, "learning_rate": 9.87358276643991e-06, "loss": 30.9011, "step": 17031 }, { "epoch": 405.52537313432833, "grad_norm": 19.170299530029297, "learning_rate": 9.873015873015874e-06, "loss": 30.192, "step": 17032 }, { "epoch": 405.5492537313433, "grad_norm": 17.483421325683594, "learning_rate": 9.872448979591838e-06, "loss": 30.5255, "step": 17033 }, { "epoch": 405.5731343283582, "grad_norm": 20.62899398803711, "learning_rate": 9.871882086167801e-06, "loss": 30.8915, "step": 17034 }, { "epoch": 405.5970149253731, "grad_norm": 17.30540657043457, "learning_rate": 9.871315192743764e-06, "loss": 31.0716, "step": 17035 }, { "epoch": 405.6208955223881, "grad_norm": 18.22004508972168, "learning_rate": 9.870748299319729e-06, "loss": 31.9494, "step": 17036 }, { "epoch": 405.644776119403, "grad_norm": 16.671100616455078, "learning_rate": 9.870181405895692e-06, "loss": 30.6172, "step": 17037 }, { "epoch": 405.6686567164179, "grad_norm": NaN, "learning_rate": 9.869614512471656e-06, "loss": 45.332, "step": 17038 }, { "epoch": 405.6925373134328, "grad_norm": 17.983890533447266, "learning_rate": 9.869614512471656e-06, "loss": 30.4078, "step": 17039 }, { "epoch": 405.7164179104478, "grad_norm": 21.861013412475586, "learning_rate": 9.869047619047621e-06, "loss": 30.7618, "step": 17040 }, { "epoch": 405.7402985074627, "grad_norm": 22.274782180786133, "learning_rate": 9.868480725623584e-06, "loss": 31.8571, "step": 17041 }, { "epoch": 405.7641791044776, "grad_norm": 15.382903099060059, "learning_rate": 9.867913832199547e-06, "loss": 31.0003, "step": 17042 }, { "epoch": 405.78805970149256, "grad_norm": 24.925888061523438, "learning_rate": 9.867346938775511e-06, "loss": 31.1132, "step": 17043 }, { "epoch": 405.81194029850747, "grad_norm": 21.787288665771484, "learning_rate": 9.866780045351474e-06, "loss": 31.2963, "step": 17044 }, { "epoch": 405.8358208955224, "grad_norm": 18.153474807739258, "learning_rate": 9.866213151927439e-06, "loss": 30.8227, "step": 17045 }, { "epoch": 405.85970149253734, "grad_norm": 25.045469284057617, "learning_rate": 9.865646258503402e-06, "loss": 30.42, "step": 17046 }, { "epoch": 405.88358208955225, "grad_norm": 23.71382713317871, "learning_rate": 9.865079365079366e-06, "loss": 31.5095, "step": 17047 }, { "epoch": 405.90746268656716, "grad_norm": 19.81368637084961, "learning_rate": 9.86451247165533e-06, "loss": 31.6538, "step": 17048 }, { "epoch": 405.93134328358207, "grad_norm": 19.46569061279297, "learning_rate": 9.863945578231294e-06, "loss": 31.2084, "step": 17049 }, { "epoch": 405.95522388059703, "grad_norm": 16.42058563232422, "learning_rate": 9.863378684807257e-06, "loss": 30.8875, "step": 17050 }, { "epoch": 405.97910447761194, "grad_norm": 23.119794845581055, "learning_rate": 9.862811791383221e-06, "loss": 30.2666, "step": 17051 }, { "epoch": 406.0, "grad_norm": 15.268242835998535, "learning_rate": 9.862244897959184e-06, "loss": 26.7074, "step": 17052 }, { "epoch": 406.0238805970149, "grad_norm": 22.865819931030273, "learning_rate": 9.861678004535147e-06, "loss": 30.5228, "step": 17053 }, { "epoch": 406.0477611940299, "grad_norm": 17.812450408935547, "learning_rate": 9.861111111111112e-06, "loss": 30.5346, "step": 17054 }, { "epoch": 406.0716417910448, "grad_norm": 25.216026306152344, "learning_rate": 9.860544217687076e-06, "loss": 30.5902, "step": 17055 }, { "epoch": 406.0955223880597, "grad_norm": 20.24772834777832, "learning_rate": 9.85997732426304e-06, "loss": 30.2208, "step": 17056 }, { "epoch": 406.1194029850746, "grad_norm": NaN, "learning_rate": 9.859410430839002e-06, "loss": 52.1957, "step": 17057 }, { "epoch": 406.14328358208957, "grad_norm": 19.716794967651367, "learning_rate": 9.859410430839002e-06, "loss": 30.7717, "step": 17058 }, { "epoch": 406.1671641791045, "grad_norm": 18.68526268005371, "learning_rate": 9.858843537414967e-06, "loss": 30.2788, "step": 17059 }, { "epoch": 406.1910447761194, "grad_norm": 19.732078552246094, "learning_rate": 9.858276643990931e-06, "loss": 31.6837, "step": 17060 }, { "epoch": 406.21492537313435, "grad_norm": 21.107961654663086, "learning_rate": 9.857709750566894e-06, "loss": 30.4119, "step": 17061 }, { "epoch": 406.23880597014926, "grad_norm": 18.77997398376465, "learning_rate": 9.857142857142859e-06, "loss": 30.8591, "step": 17062 }, { "epoch": 406.26268656716417, "grad_norm": 17.919878005981445, "learning_rate": 9.856575963718822e-06, "loss": 32.7938, "step": 17063 }, { "epoch": 406.28656716417913, "grad_norm": 21.54792594909668, "learning_rate": 9.856009070294785e-06, "loss": 30.6374, "step": 17064 }, { "epoch": 406.31044776119404, "grad_norm": 17.233793258666992, "learning_rate": 9.85544217687075e-06, "loss": 31.1178, "step": 17065 }, { "epoch": 406.33432835820895, "grad_norm": 20.764001846313477, "learning_rate": 9.854875283446714e-06, "loss": 31.244, "step": 17066 }, { "epoch": 406.35820895522386, "grad_norm": 16.163888931274414, "learning_rate": 9.854308390022677e-06, "loss": 30.951, "step": 17067 }, { "epoch": 406.3820895522388, "grad_norm": 20.328454971313477, "learning_rate": 9.85374149659864e-06, "loss": 31.2245, "step": 17068 }, { "epoch": 406.40597014925373, "grad_norm": 19.258499145507812, "learning_rate": 9.853174603174604e-06, "loss": 30.5636, "step": 17069 }, { "epoch": 406.42985074626864, "grad_norm": 19.76752281188965, "learning_rate": 9.852607709750567e-06, "loss": 28.7173, "step": 17070 }, { "epoch": 406.4537313432836, "grad_norm": 24.23910903930664, "learning_rate": 9.852040816326532e-06, "loss": 32.9057, "step": 17071 }, { "epoch": 406.4776119402985, "grad_norm": 20.138818740844727, "learning_rate": 9.851473922902495e-06, "loss": 31.6671, "step": 17072 }, { "epoch": 406.5014925373134, "grad_norm": 18.762805938720703, "learning_rate": 9.85090702947846e-06, "loss": 31.1464, "step": 17073 }, { "epoch": 406.52537313432833, "grad_norm": 16.108400344848633, "learning_rate": 9.850340136054422e-06, "loss": 30.4823, "step": 17074 }, { "epoch": 406.5492537313433, "grad_norm": 20.8375244140625, "learning_rate": 9.849773242630387e-06, "loss": 30.6664, "step": 17075 }, { "epoch": 406.5731343283582, "grad_norm": 26.487838745117188, "learning_rate": 9.849206349206351e-06, "loss": 30.1852, "step": 17076 }, { "epoch": 406.5970149253731, "grad_norm": 18.415733337402344, "learning_rate": 9.848639455782314e-06, "loss": 30.3024, "step": 17077 }, { "epoch": 406.6208955223881, "grad_norm": 21.358409881591797, "learning_rate": 9.848072562358277e-06, "loss": 30.8718, "step": 17078 }, { "epoch": 406.644776119403, "grad_norm": 21.802520751953125, "learning_rate": 9.84750566893424e-06, "loss": 29.3102, "step": 17079 }, { "epoch": 406.6686567164179, "grad_norm": 22.83088493347168, "learning_rate": 9.846938775510205e-06, "loss": 31.9597, "step": 17080 }, { "epoch": 406.6925373134328, "grad_norm": 17.50154685974121, "learning_rate": 9.84637188208617e-06, "loss": 32.1017, "step": 17081 }, { "epoch": 406.7164179104478, "grad_norm": 20.930034637451172, "learning_rate": 9.845804988662132e-06, "loss": 31.1494, "step": 17082 }, { "epoch": 406.7402985074627, "grad_norm": 24.347320556640625, "learning_rate": 9.845238095238097e-06, "loss": 31.3371, "step": 17083 }, { "epoch": 406.7641791044776, "grad_norm": 20.09071922302246, "learning_rate": 9.84467120181406e-06, "loss": 30.4365, "step": 17084 }, { "epoch": 406.78805970149256, "grad_norm": 18.996219635009766, "learning_rate": 9.844104308390024e-06, "loss": 31.5441, "step": 17085 }, { "epoch": 406.81194029850747, "grad_norm": 18.961217880249023, "learning_rate": 9.843537414965987e-06, "loss": 30.1595, "step": 17086 }, { "epoch": 406.8358208955224, "grad_norm": 20.991409301757812, "learning_rate": 9.842970521541952e-06, "loss": 31.6773, "step": 17087 }, { "epoch": 406.85970149253734, "grad_norm": 17.288860321044922, "learning_rate": 9.842403628117915e-06, "loss": 30.7588, "step": 17088 }, { "epoch": 406.88358208955225, "grad_norm": 18.68027114868164, "learning_rate": 9.841836734693878e-06, "loss": 30.3646, "step": 17089 }, { "epoch": 406.90746268656716, "grad_norm": 18.632766723632812, "learning_rate": 9.841269841269842e-06, "loss": 31.178, "step": 17090 }, { "epoch": 406.93134328358207, "grad_norm": 20.20024299621582, "learning_rate": 9.840702947845807e-06, "loss": 31.5622, "step": 17091 }, { "epoch": 406.95522388059703, "grad_norm": 19.179670333862305, "learning_rate": 9.84013605442177e-06, "loss": 31.0176, "step": 17092 }, { "epoch": 406.97910447761194, "grad_norm": 23.177379608154297, "learning_rate": 9.839569160997733e-06, "loss": 30.1033, "step": 17093 }, { "epoch": 407.0, "grad_norm": 18.88588523864746, "learning_rate": 9.839002267573697e-06, "loss": 26.1528, "step": 17094 }, { "epoch": 407.0238805970149, "grad_norm": 22.23098373413086, "learning_rate": 9.83843537414966e-06, "loss": 30.0929, "step": 17095 }, { "epoch": 407.0477611940299, "grad_norm": 18.42059326171875, "learning_rate": 9.837868480725625e-06, "loss": 30.8388, "step": 17096 }, { "epoch": 407.0716417910448, "grad_norm": 20.517749786376953, "learning_rate": 9.837301587301588e-06, "loss": 30.4366, "step": 17097 }, { "epoch": 407.0955223880597, "grad_norm": 20.482519149780273, "learning_rate": 9.836734693877552e-06, "loss": 30.1441, "step": 17098 }, { "epoch": 407.1194029850746, "grad_norm": 19.885637283325195, "learning_rate": 9.836167800453515e-06, "loss": 31.2451, "step": 17099 }, { "epoch": 407.14328358208957, "grad_norm": 17.006126403808594, "learning_rate": 9.83560090702948e-06, "loss": 30.3939, "step": 17100 }, { "epoch": 407.1671641791045, "grad_norm": 18.33284568786621, "learning_rate": 9.835034013605444e-06, "loss": 29.9203, "step": 17101 }, { "epoch": 407.1910447761194, "grad_norm": 23.153682708740234, "learning_rate": 9.834467120181407e-06, "loss": 30.9469, "step": 17102 }, { "epoch": 407.21492537313435, "grad_norm": 20.087182998657227, "learning_rate": 9.83390022675737e-06, "loss": 30.6324, "step": 17103 }, { "epoch": 407.23880597014926, "grad_norm": 19.266353607177734, "learning_rate": 9.833333333333333e-06, "loss": 29.798, "step": 17104 }, { "epoch": 407.26268656716417, "grad_norm": 16.637983322143555, "learning_rate": 9.832766439909298e-06, "loss": 31.0228, "step": 17105 }, { "epoch": 407.28656716417913, "grad_norm": 20.662580490112305, "learning_rate": 9.832199546485262e-06, "loss": 31.0412, "step": 17106 }, { "epoch": 407.31044776119404, "grad_norm": 24.179311752319336, "learning_rate": 9.831632653061225e-06, "loss": 31.3721, "step": 17107 }, { "epoch": 407.33432835820895, "grad_norm": 18.55788803100586, "learning_rate": 9.83106575963719e-06, "loss": 30.7381, "step": 17108 }, { "epoch": 407.35820895522386, "grad_norm": 20.20111083984375, "learning_rate": 9.830498866213153e-06, "loss": 30.8676, "step": 17109 }, { "epoch": 407.3820895522388, "grad_norm": 17.19261932373047, "learning_rate": 9.829931972789115e-06, "loss": 31.3582, "step": 17110 }, { "epoch": 407.40597014925373, "grad_norm": 19.75364875793457, "learning_rate": 9.82936507936508e-06, "loss": 29.8319, "step": 17111 }, { "epoch": 407.42985074626864, "grad_norm": 19.894805908203125, "learning_rate": 9.828798185941045e-06, "loss": 31.0655, "step": 17112 }, { "epoch": 407.4537313432836, "grad_norm": 20.540538787841797, "learning_rate": 9.828231292517008e-06, "loss": 31.0926, "step": 17113 }, { "epoch": 407.4776119402985, "grad_norm": 18.901662826538086, "learning_rate": 9.82766439909297e-06, "loss": 30.6047, "step": 17114 }, { "epoch": 407.5014925373134, "grad_norm": 18.279712677001953, "learning_rate": 9.827097505668935e-06, "loss": 30.435, "step": 17115 }, { "epoch": 407.52537313432833, "grad_norm": 19.222557067871094, "learning_rate": 9.8265306122449e-06, "loss": 31.2252, "step": 17116 }, { "epoch": 407.5492537313433, "grad_norm": 20.993911743164062, "learning_rate": 9.825963718820863e-06, "loss": 31.9963, "step": 17117 }, { "epoch": 407.5731343283582, "grad_norm": 19.337543487548828, "learning_rate": 9.825396825396825e-06, "loss": 29.7285, "step": 17118 }, { "epoch": 407.5970149253731, "grad_norm": 22.092134475708008, "learning_rate": 9.82482993197279e-06, "loss": 30.4033, "step": 17119 }, { "epoch": 407.6208955223881, "grad_norm": 16.811466217041016, "learning_rate": 9.824263038548753e-06, "loss": 30.9516, "step": 17120 }, { "epoch": 407.644776119403, "grad_norm": 23.478229522705078, "learning_rate": 9.823696145124718e-06, "loss": 30.6678, "step": 17121 }, { "epoch": 407.6686567164179, "grad_norm": 22.2537841796875, "learning_rate": 9.823129251700682e-06, "loss": 31.1874, "step": 17122 }, { "epoch": 407.6925373134328, "grad_norm": 24.31056022644043, "learning_rate": 9.822562358276645e-06, "loss": 29.8586, "step": 17123 }, { "epoch": 407.7164179104478, "grad_norm": 20.723657608032227, "learning_rate": 9.821995464852608e-06, "loss": 30.6623, "step": 17124 }, { "epoch": 407.7402985074627, "grad_norm": 17.87926483154297, "learning_rate": 9.821428571428573e-06, "loss": 30.9176, "step": 17125 }, { "epoch": 407.7641791044776, "grad_norm": 20.93740463256836, "learning_rate": 9.820861678004537e-06, "loss": 30.8719, "step": 17126 }, { "epoch": 407.78805970149256, "grad_norm": 22.472055435180664, "learning_rate": 9.8202947845805e-06, "loss": 32.0218, "step": 17127 }, { "epoch": 407.81194029850747, "grad_norm": 18.69917106628418, "learning_rate": 9.819727891156463e-06, "loss": 31.4956, "step": 17128 }, { "epoch": 407.8358208955224, "grad_norm": 20.267702102661133, "learning_rate": 9.819160997732428e-06, "loss": 31.8654, "step": 17129 }, { "epoch": 407.85970149253734, "grad_norm": 16.86379051208496, "learning_rate": 9.81859410430839e-06, "loss": 32.0695, "step": 17130 }, { "epoch": 407.88358208955225, "grad_norm": 18.317886352539062, "learning_rate": 9.818027210884355e-06, "loss": 30.0415, "step": 17131 }, { "epoch": 407.90746268656716, "grad_norm": 18.96920394897461, "learning_rate": 9.817460317460318e-06, "loss": 30.5568, "step": 17132 }, { "epoch": 407.93134328358207, "grad_norm": 18.7996883392334, "learning_rate": 9.816893424036282e-06, "loss": 30.9763, "step": 17133 }, { "epoch": 407.95522388059703, "grad_norm": 21.947856903076172, "learning_rate": 9.816326530612245e-06, "loss": 31.0727, "step": 17134 }, { "epoch": 407.97910447761194, "grad_norm": 19.53034210205078, "learning_rate": 9.815759637188208e-06, "loss": 31.3669, "step": 17135 }, { "epoch": 408.0, "grad_norm": 14.76440715789795, "learning_rate": 9.815192743764173e-06, "loss": 27.1142, "step": 17136 }, { "epoch": 408.0238805970149, "grad_norm": 18.433521270751953, "learning_rate": 9.814625850340137e-06, "loss": 30.4153, "step": 17137 }, { "epoch": 408.0477611940299, "grad_norm": 18.82404899597168, "learning_rate": 9.8140589569161e-06, "loss": 30.9344, "step": 17138 }, { "epoch": 408.0716417910448, "grad_norm": 23.097026824951172, "learning_rate": 9.813492063492063e-06, "loss": 30.7174, "step": 17139 }, { "epoch": 408.0955223880597, "grad_norm": 19.073869705200195, "learning_rate": 9.812925170068028e-06, "loss": 30.063, "step": 17140 }, { "epoch": 408.1194029850746, "grad_norm": 17.834747314453125, "learning_rate": 9.812358276643992e-06, "loss": 30.1185, "step": 17141 }, { "epoch": 408.14328358208957, "grad_norm": 17.94736671447754, "learning_rate": 9.811791383219955e-06, "loss": 29.8447, "step": 17142 }, { "epoch": 408.1671641791045, "grad_norm": 28.02310562133789, "learning_rate": 9.81122448979592e-06, "loss": 31.3027, "step": 17143 }, { "epoch": 408.1910447761194, "grad_norm": 18.83740997314453, "learning_rate": 9.810657596371883e-06, "loss": 30.637, "step": 17144 }, { "epoch": 408.21492537313435, "grad_norm": 19.65155029296875, "learning_rate": 9.810090702947846e-06, "loss": 30.2705, "step": 17145 }, { "epoch": 408.23880597014926, "grad_norm": 20.439847946166992, "learning_rate": 9.80952380952381e-06, "loss": 31.1955, "step": 17146 }, { "epoch": 408.26268656716417, "grad_norm": 22.33255386352539, "learning_rate": 9.808956916099775e-06, "loss": 31.1991, "step": 17147 }, { "epoch": 408.28656716417913, "grad_norm": 19.935897827148438, "learning_rate": 9.808390022675738e-06, "loss": 29.7388, "step": 17148 }, { "epoch": 408.31044776119404, "grad_norm": 17.66071891784668, "learning_rate": 9.8078231292517e-06, "loss": 29.9912, "step": 17149 }, { "epoch": 408.33432835820895, "grad_norm": 16.160856246948242, "learning_rate": 9.807256235827665e-06, "loss": 31.067, "step": 17150 }, { "epoch": 408.35820895522386, "grad_norm": 18.06080436706543, "learning_rate": 9.806689342403628e-06, "loss": 31.404, "step": 17151 }, { "epoch": 408.3820895522388, "grad_norm": 19.497299194335938, "learning_rate": 9.806122448979593e-06, "loss": 29.9847, "step": 17152 }, { "epoch": 408.40597014925373, "grad_norm": 25.528413772583008, "learning_rate": 9.805555555555556e-06, "loss": 30.5061, "step": 17153 }, { "epoch": 408.42985074626864, "grad_norm": 19.330944061279297, "learning_rate": 9.80498866213152e-06, "loss": 29.9116, "step": 17154 }, { "epoch": 408.4537313432836, "grad_norm": 17.243127822875977, "learning_rate": 9.804421768707483e-06, "loss": 30.5956, "step": 17155 }, { "epoch": 408.4776119402985, "grad_norm": 25.73492431640625, "learning_rate": 9.803854875283448e-06, "loss": 31.494, "step": 17156 }, { "epoch": 408.5014925373134, "grad_norm": 28.76915740966797, "learning_rate": 9.803287981859412e-06, "loss": 31.9458, "step": 17157 }, { "epoch": 408.52537313432833, "grad_norm": 17.02314567565918, "learning_rate": 9.802721088435375e-06, "loss": 31.0822, "step": 17158 }, { "epoch": 408.5492537313433, "grad_norm": 31.693359375, "learning_rate": 9.802154195011338e-06, "loss": 31.2315, "step": 17159 }, { "epoch": 408.5731343283582, "grad_norm": 22.58902931213379, "learning_rate": 9.801587301587301e-06, "loss": 31.5996, "step": 17160 }, { "epoch": 408.5970149253731, "grad_norm": 25.078304290771484, "learning_rate": 9.801020408163266e-06, "loss": 30.4479, "step": 17161 }, { "epoch": 408.6208955223881, "grad_norm": 32.10065841674805, "learning_rate": 9.80045351473923e-06, "loss": 31.7707, "step": 17162 }, { "epoch": 408.644776119403, "grad_norm": 21.00263786315918, "learning_rate": 9.799886621315193e-06, "loss": 32.3937, "step": 17163 }, { "epoch": 408.6686567164179, "grad_norm": 38.225982666015625, "learning_rate": 9.799319727891158e-06, "loss": 30.241, "step": 17164 }, { "epoch": 408.6925373134328, "grad_norm": 25.415969848632812, "learning_rate": 9.79875283446712e-06, "loss": 30.0834, "step": 17165 }, { "epoch": 408.7164179104478, "grad_norm": 37.347129821777344, "learning_rate": 9.798185941043085e-06, "loss": 31.426, "step": 17166 }, { "epoch": 408.7402985074627, "grad_norm": 23.168502807617188, "learning_rate": 9.797619047619048e-06, "loss": 30.2809, "step": 17167 }, { "epoch": 408.7641791044776, "grad_norm": 39.452091217041016, "learning_rate": 9.797052154195013e-06, "loss": 31.4917, "step": 17168 }, { "epoch": 408.78805970149256, "grad_norm": 28.55925750732422, "learning_rate": 9.796485260770976e-06, "loss": 31.6921, "step": 17169 }, { "epoch": 408.81194029850747, "grad_norm": 47.2205696105957, "learning_rate": 9.795918367346939e-06, "loss": 30.9541, "step": 17170 }, { "epoch": 408.8358208955224, "grad_norm": 38.546993255615234, "learning_rate": 9.795351473922903e-06, "loss": 30.3784, "step": 17171 }, { "epoch": 408.85970149253734, "grad_norm": 40.99946975708008, "learning_rate": 9.794784580498868e-06, "loss": 31.1373, "step": 17172 }, { "epoch": 408.88358208955225, "grad_norm": 37.15016174316406, "learning_rate": 9.79421768707483e-06, "loss": 30.7365, "step": 17173 }, { "epoch": 408.90746268656716, "grad_norm": 36.37284851074219, "learning_rate": 9.793650793650794e-06, "loss": 30.6809, "step": 17174 }, { "epoch": 408.93134328358207, "grad_norm": 36.006309509277344, "learning_rate": 9.793083900226758e-06, "loss": 30.8276, "step": 17175 }, { "epoch": 408.95522388059703, "grad_norm": 35.93528366088867, "learning_rate": 9.792517006802721e-06, "loss": 31.5256, "step": 17176 }, { "epoch": 408.97910447761194, "grad_norm": 25.682090759277344, "learning_rate": 9.791950113378686e-06, "loss": 29.6271, "step": 17177 }, { "epoch": 409.0, "grad_norm": 34.678077697753906, "learning_rate": 9.791383219954649e-06, "loss": 26.7581, "step": 17178 }, { "epoch": 409.0238805970149, "grad_norm": 32.7984504699707, "learning_rate": 9.790816326530613e-06, "loss": 30.9675, "step": 17179 }, { "epoch": 409.0477611940299, "grad_norm": 43.166133880615234, "learning_rate": 9.790249433106576e-06, "loss": 29.4506, "step": 17180 }, { "epoch": 409.0716417910448, "grad_norm": 40.31956481933594, "learning_rate": 9.78968253968254e-06, "loss": 32.4348, "step": 17181 }, { "epoch": 409.0955223880597, "grad_norm": 32.789756774902344, "learning_rate": 9.789115646258505e-06, "loss": 30.5454, "step": 17182 }, { "epoch": 409.1194029850746, "grad_norm": 32.10710525512695, "learning_rate": 9.788548752834468e-06, "loss": 29.9216, "step": 17183 }, { "epoch": 409.14328358208957, "grad_norm": 34.188148498535156, "learning_rate": 9.787981859410431e-06, "loss": 30.9924, "step": 17184 }, { "epoch": 409.1671641791045, "grad_norm": 30.383773803710938, "learning_rate": 9.787414965986394e-06, "loss": 30.9493, "step": 17185 }, { "epoch": 409.1910447761194, "grad_norm": 40.345176696777344, "learning_rate": 9.786848072562359e-06, "loss": 30.5554, "step": 17186 }, { "epoch": 409.21492537313435, "grad_norm": 34.366477966308594, "learning_rate": 9.786281179138323e-06, "loss": 31.3236, "step": 17187 }, { "epoch": 409.23880597014926, "grad_norm": 37.019676208496094, "learning_rate": 9.785714285714286e-06, "loss": 30.2368, "step": 17188 }, { "epoch": 409.26268656716417, "grad_norm": 33.16834259033203, "learning_rate": 9.78514739229025e-06, "loss": 30.4291, "step": 17189 }, { "epoch": 409.28656716417913, "grad_norm": 36.23292922973633, "learning_rate": 9.784580498866214e-06, "loss": 30.3375, "step": 17190 }, { "epoch": 409.31044776119404, "grad_norm": 29.69635772705078, "learning_rate": 9.784013605442178e-06, "loss": 31.311, "step": 17191 }, { "epoch": 409.33432835820895, "grad_norm": 36.75214767456055, "learning_rate": 9.783446712018141e-06, "loss": 31.749, "step": 17192 }, { "epoch": 409.35820895522386, "grad_norm": 34.37923049926758, "learning_rate": 9.782879818594106e-06, "loss": 31.6592, "step": 17193 }, { "epoch": 409.3820895522388, "grad_norm": 36.90751647949219, "learning_rate": 9.782312925170069e-06, "loss": 31.6242, "step": 17194 }, { "epoch": 409.40597014925373, "grad_norm": 33.20652389526367, "learning_rate": 9.781746031746032e-06, "loss": 31.5448, "step": 17195 }, { "epoch": 409.42985074626864, "grad_norm": 33.465938568115234, "learning_rate": 9.781179138321996e-06, "loss": 30.9569, "step": 17196 }, { "epoch": 409.4537313432836, "grad_norm": 33.31385040283203, "learning_rate": 9.78061224489796e-06, "loss": 31.5872, "step": 17197 }, { "epoch": 409.4776119402985, "grad_norm": 34.62633514404297, "learning_rate": 9.780045351473924e-06, "loss": 30.7983, "step": 17198 }, { "epoch": 409.5014925373134, "grad_norm": 31.84848976135254, "learning_rate": 9.779478458049887e-06, "loss": 31.574, "step": 17199 }, { "epoch": 409.52537313432833, "grad_norm": 33.264076232910156, "learning_rate": 9.778911564625851e-06, "loss": 29.8799, "step": 17200 }, { "epoch": 409.5492537313433, "grad_norm": 29.371370315551758, "learning_rate": 9.778344671201814e-06, "loss": 31.3128, "step": 17201 }, { "epoch": 409.5731343283582, "grad_norm": 38.70520782470703, "learning_rate": 9.777777777777779e-06, "loss": 31.7127, "step": 17202 }, { "epoch": 409.5970149253731, "grad_norm": 34.72576141357422, "learning_rate": 9.777210884353743e-06, "loss": 29.2374, "step": 17203 }, { "epoch": 409.6208955223881, "grad_norm": 33.42527770996094, "learning_rate": 9.776643990929706e-06, "loss": 30.6626, "step": 17204 }, { "epoch": 409.644776119403, "grad_norm": 32.23186492919922, "learning_rate": 9.776077097505669e-06, "loss": 30.1451, "step": 17205 }, { "epoch": 409.6686567164179, "grad_norm": 34.285430908203125, "learning_rate": 9.775510204081634e-06, "loss": 30.9031, "step": 17206 }, { "epoch": 409.6925373134328, "grad_norm": 30.343807220458984, "learning_rate": 9.774943310657598e-06, "loss": 30.7695, "step": 17207 }, { "epoch": 409.7164179104478, "grad_norm": 42.4433479309082, "learning_rate": 9.774376417233561e-06, "loss": 31.0233, "step": 17208 }, { "epoch": 409.7402985074627, "grad_norm": 33.58607864379883, "learning_rate": 9.773809523809524e-06, "loss": 30.2251, "step": 17209 }, { "epoch": 409.7641791044776, "grad_norm": 34.856666564941406, "learning_rate": 9.773242630385489e-06, "loss": 30.0397, "step": 17210 }, { "epoch": 409.78805970149256, "grad_norm": 32.365806579589844, "learning_rate": 9.772675736961452e-06, "loss": 29.791, "step": 17211 }, { "epoch": 409.81194029850747, "grad_norm": 31.12361717224121, "learning_rate": 9.772108843537416e-06, "loss": 30.6748, "step": 17212 }, { "epoch": 409.8358208955224, "grad_norm": 27.62432098388672, "learning_rate": 9.771541950113379e-06, "loss": 30.7766, "step": 17213 }, { "epoch": 409.85970149253734, "grad_norm": 36.967857360839844, "learning_rate": 9.770975056689344e-06, "loss": 30.597, "step": 17214 }, { "epoch": 409.88358208955225, "grad_norm": 33.09959411621094, "learning_rate": 9.770408163265307e-06, "loss": 29.5581, "step": 17215 }, { "epoch": 409.90746268656716, "grad_norm": 32.458499908447266, "learning_rate": 9.769841269841271e-06, "loss": 30.0254, "step": 17216 }, { "epoch": 409.93134328358207, "grad_norm": 32.26570510864258, "learning_rate": 9.769274376417234e-06, "loss": 30.1378, "step": 17217 }, { "epoch": 409.95522388059703, "grad_norm": 32.063602447509766, "learning_rate": 9.768707482993199e-06, "loss": 30.5175, "step": 17218 }, { "epoch": 409.97910447761194, "grad_norm": 29.157814025878906, "learning_rate": 9.768140589569162e-06, "loss": 30.7194, "step": 17219 }, { "epoch": 410.0, "grad_norm": 29.813369750976562, "learning_rate": 9.767573696145124e-06, "loss": 27.861, "step": 17220 }, { "epoch": 410.0238805970149, "grad_norm": 31.18349266052246, "learning_rate": 9.767006802721089e-06, "loss": 30.2601, "step": 17221 }, { "epoch": 410.0477611940299, "grad_norm": 33.58921813964844, "learning_rate": 9.766439909297054e-06, "loss": 30.5417, "step": 17222 }, { "epoch": 410.0716417910448, "grad_norm": 31.673707962036133, "learning_rate": 9.765873015873017e-06, "loss": 30.812, "step": 17223 }, { "epoch": 410.0955223880597, "grad_norm": 31.584043502807617, "learning_rate": 9.765306122448981e-06, "loss": 31.2592, "step": 17224 }, { "epoch": 410.1194029850746, "grad_norm": 30.22162628173828, "learning_rate": 9.764739229024944e-06, "loss": 31.577, "step": 17225 }, { "epoch": 410.14328358208957, "grad_norm": 33.900306701660156, "learning_rate": 9.764172335600907e-06, "loss": 29.0864, "step": 17226 }, { "epoch": 410.1671641791045, "grad_norm": 30.085092544555664, "learning_rate": 9.763605442176872e-06, "loss": 30.5391, "step": 17227 }, { "epoch": 410.1910447761194, "grad_norm": 38.079933166503906, "learning_rate": 9.763038548752836e-06, "loss": 31.1666, "step": 17228 }, { "epoch": 410.21492537313435, "grad_norm": 34.481910705566406, "learning_rate": 9.762471655328799e-06, "loss": 31.2171, "step": 17229 }, { "epoch": 410.23880597014926, "grad_norm": 34.52700424194336, "learning_rate": 9.761904761904762e-06, "loss": 30.8267, "step": 17230 }, { "epoch": 410.26268656716417, "grad_norm": 31.537492752075195, "learning_rate": 9.761337868480727e-06, "loss": 30.8598, "step": 17231 }, { "epoch": 410.28656716417913, "grad_norm": 33.93818283081055, "learning_rate": 9.760770975056691e-06, "loss": 30.6294, "step": 17232 }, { "epoch": 410.31044776119404, "grad_norm": 29.543317794799805, "learning_rate": 9.760204081632654e-06, "loss": 30.3695, "step": 17233 }, { "epoch": 410.33432835820895, "grad_norm": 37.45649719238281, "learning_rate": 9.759637188208617e-06, "loss": 31.0027, "step": 17234 }, { "epoch": 410.35820895522386, "grad_norm": NaN, "learning_rate": 9.759070294784582e-06, "loss": 35.4775, "step": 17235 }, { "epoch": 410.3820895522388, "grad_norm": 30.60056495666504, "learning_rate": 9.759070294784582e-06, "loss": 30.9545, "step": 17236 }, { "epoch": 410.40597014925373, "grad_norm": 32.86592102050781, "learning_rate": 9.758503401360544e-06, "loss": 30.482, "step": 17237 }, { "epoch": 410.42985074626864, "grad_norm": 28.09597396850586, "learning_rate": 9.757936507936509e-06, "loss": 30.1862, "step": 17238 }, { "epoch": 410.4537313432836, "grad_norm": 34.24605941772461, "learning_rate": 9.757369614512474e-06, "loss": 29.7122, "step": 17239 }, { "epoch": 410.4776119402985, "grad_norm": 26.68988800048828, "learning_rate": 9.756802721088437e-06, "loss": 29.6089, "step": 17240 }, { "epoch": 410.5014925373134, "grad_norm": 35.57308578491211, "learning_rate": 9.7562358276644e-06, "loss": 32.0771, "step": 17241 }, { "epoch": 410.52537313432833, "grad_norm": 31.854652404785156, "learning_rate": 9.755668934240364e-06, "loss": 30.3433, "step": 17242 }, { "epoch": 410.5492537313433, "grad_norm": 32.33788299560547, "learning_rate": 9.755102040816327e-06, "loss": 29.581, "step": 17243 }, { "epoch": 410.5731343283582, "grad_norm": 32.283477783203125, "learning_rate": 9.754535147392292e-06, "loss": 32.0348, "step": 17244 }, { "epoch": 410.5970149253731, "grad_norm": 29.719423294067383, "learning_rate": 9.753968253968254e-06, "loss": 30.9744, "step": 17245 }, { "epoch": 410.6208955223881, "grad_norm": 28.74759864807129, "learning_rate": 9.753401360544217e-06, "loss": 31.0377, "step": 17246 }, { "epoch": 410.644776119403, "grad_norm": 31.67352867126465, "learning_rate": 9.752834467120182e-06, "loss": 30.4061, "step": 17247 }, { "epoch": 410.6686567164179, "grad_norm": 25.129446029663086, "learning_rate": 9.752267573696147e-06, "loss": 31.7588, "step": 17248 }, { "epoch": 410.6925373134328, "grad_norm": 37.61256408691406, "learning_rate": 9.75170068027211e-06, "loss": 31.9124, "step": 17249 }, { "epoch": 410.7164179104478, "grad_norm": 29.709802627563477, "learning_rate": 9.751133786848074e-06, "loss": 31.7182, "step": 17250 }, { "epoch": 410.7402985074627, "grad_norm": 38.39265823364258, "learning_rate": 9.750566893424037e-06, "loss": 30.8116, "step": 17251 }, { "epoch": 410.7641791044776, "grad_norm": 33.163082122802734, "learning_rate": 9.75e-06, "loss": 29.819, "step": 17252 }, { "epoch": 410.78805970149256, "grad_norm": 31.229955673217773, "learning_rate": 9.749433106575964e-06, "loss": 29.8216, "step": 17253 }, { "epoch": 410.81194029850747, "grad_norm": 30.527732849121094, "learning_rate": 9.748866213151929e-06, "loss": 30.9318, "step": 17254 }, { "epoch": 410.8358208955224, "grad_norm": 31.837890625, "learning_rate": 9.748299319727892e-06, "loss": 31.0031, "step": 17255 }, { "epoch": 410.85970149253734, "grad_norm": 30.608051300048828, "learning_rate": 9.747732426303855e-06, "loss": 30.6614, "step": 17256 }, { "epoch": 410.88358208955225, "grad_norm": NaN, "learning_rate": 9.74716553287982e-06, "loss": 27.1812, "step": 17257 }, { "epoch": 410.90746268656716, "grad_norm": 36.195335388183594, "learning_rate": 9.74716553287982e-06, "loss": 29.9308, "step": 17258 }, { "epoch": 410.93134328358207, "grad_norm": 30.972122192382812, "learning_rate": 9.746598639455784e-06, "loss": 30.4884, "step": 17259 }, { "epoch": 410.95522388059703, "grad_norm": 32.893672943115234, "learning_rate": 9.746031746031747e-06, "loss": 29.9437, "step": 17260 }, { "epoch": 410.97910447761194, "grad_norm": 29.859777450561523, "learning_rate": 9.74546485260771e-06, "loss": 30.0954, "step": 17261 }, { "epoch": 411.0, "grad_norm": 28.657880783081055, "learning_rate": 9.744897959183674e-06, "loss": 27.0012, "step": 17262 }, { "epoch": 411.0238805970149, "grad_norm": 30.218544006347656, "learning_rate": 9.744331065759637e-06, "loss": 29.9479, "step": 17263 }, { "epoch": 411.0477611940299, "grad_norm": 32.60407257080078, "learning_rate": 9.743764172335602e-06, "loss": 30.6546, "step": 17264 }, { "epoch": 411.0716417910448, "grad_norm": 27.861570358276367, "learning_rate": 9.743197278911567e-06, "loss": 31.1357, "step": 17265 }, { "epoch": 411.0955223880597, "grad_norm": 32.95627975463867, "learning_rate": 9.74263038548753e-06, "loss": 30.2324, "step": 17266 }, { "epoch": 411.1194029850746, "grad_norm": 29.979084014892578, "learning_rate": 9.742063492063492e-06, "loss": 30.0651, "step": 17267 }, { "epoch": 411.14328358208957, "grad_norm": 32.46349334716797, "learning_rate": 9.741496598639457e-06, "loss": 31.6332, "step": 17268 }, { "epoch": 411.1671641791045, "grad_norm": 27.955581665039062, "learning_rate": 9.74092970521542e-06, "loss": 30.7639, "step": 17269 }, { "epoch": 411.1910447761194, "grad_norm": 31.141822814941406, "learning_rate": 9.740362811791384e-06, "loss": 30.5363, "step": 17270 }, { "epoch": 411.21492537313435, "grad_norm": 29.90436553955078, "learning_rate": 9.739795918367347e-06, "loss": 30.9016, "step": 17271 }, { "epoch": 411.23880597014926, "grad_norm": 30.68499755859375, "learning_rate": 9.739229024943312e-06, "loss": 30.5875, "step": 17272 }, { "epoch": 411.26268656716417, "grad_norm": NaN, "learning_rate": 9.738662131519275e-06, "loss": 39.0752, "step": 17273 }, { "epoch": 411.28656716417913, "grad_norm": 25.61927032470703, "learning_rate": 9.738662131519275e-06, "loss": 29.7111, "step": 17274 }, { "epoch": 411.31044776119404, "grad_norm": 33.73572540283203, "learning_rate": 9.73809523809524e-06, "loss": 29.6255, "step": 17275 }, { "epoch": 411.33432835820895, "grad_norm": 30.84865379333496, "learning_rate": 9.737528344671202e-06, "loss": 32.1993, "step": 17276 }, { "epoch": 411.35820895522386, "grad_norm": 34.85017776489258, "learning_rate": 9.736961451247167e-06, "loss": 30.1535, "step": 17277 }, { "epoch": 411.3820895522388, "grad_norm": 30.275859832763672, "learning_rate": 9.73639455782313e-06, "loss": 30.5525, "step": 17278 }, { "epoch": 411.40597014925373, "grad_norm": 33.74799728393555, "learning_rate": 9.735827664399093e-06, "loss": 30.5777, "step": 17279 }, { "epoch": 411.42985074626864, "grad_norm": NaN, "learning_rate": 9.735260770975057e-06, "loss": 49.9628, "step": 17280 }, { "epoch": 411.4537313432836, "grad_norm": 30.752450942993164, "learning_rate": 9.735260770975057e-06, "loss": 30.3917, "step": 17281 }, { "epoch": 411.4776119402985, "grad_norm": 32.22258377075195, "learning_rate": 9.734693877551022e-06, "loss": 31.2265, "step": 17282 }, { "epoch": 411.5014925373134, "grad_norm": 27.925674438476562, "learning_rate": 9.734126984126985e-06, "loss": 30.0399, "step": 17283 }, { "epoch": 411.52537313432833, "grad_norm": 34.14848327636719, "learning_rate": 9.733560090702948e-06, "loss": 29.8929, "step": 17284 }, { "epoch": 411.5492537313433, "grad_norm": 28.29414939880371, "learning_rate": 9.732993197278912e-06, "loss": 30.0548, "step": 17285 }, { "epoch": 411.5731343283582, "grad_norm": 33.69083023071289, "learning_rate": 9.732426303854877e-06, "loss": 31.5717, "step": 17286 }, { "epoch": 411.5970149253731, "grad_norm": 29.985658645629883, "learning_rate": 9.73185941043084e-06, "loss": 29.5741, "step": 17287 }, { "epoch": 411.6208955223881, "grad_norm": 29.38248634338379, "learning_rate": 9.731292517006804e-06, "loss": 30.3663, "step": 17288 }, { "epoch": 411.644776119403, "grad_norm": 28.989734649658203, "learning_rate": 9.730725623582767e-06, "loss": 32.1202, "step": 17289 }, { "epoch": 411.6686567164179, "grad_norm": 30.57007598876953, "learning_rate": 9.73015873015873e-06, "loss": 29.8059, "step": 17290 }, { "epoch": 411.6925373134328, "grad_norm": 27.80312728881836, "learning_rate": 9.729591836734695e-06, "loss": 30.9309, "step": 17291 }, { "epoch": 411.7164179104478, "grad_norm": 35.20885467529297, "learning_rate": 9.72902494331066e-06, "loss": 31.8309, "step": 17292 }, { "epoch": 411.7402985074627, "grad_norm": 29.32598304748535, "learning_rate": 9.728458049886622e-06, "loss": 31.491, "step": 17293 }, { "epoch": 411.7641791044776, "grad_norm": 41.09993362426758, "learning_rate": 9.727891156462585e-06, "loss": 31.7247, "step": 17294 }, { "epoch": 411.78805970149256, "grad_norm": 34.537803649902344, "learning_rate": 9.72732426303855e-06, "loss": 31.3353, "step": 17295 }, { "epoch": 411.81194029850747, "grad_norm": 27.136680603027344, "learning_rate": 9.726757369614513e-06, "loss": 31.4219, "step": 17296 }, { "epoch": 411.8358208955224, "grad_norm": 28.26192855834961, "learning_rate": 9.726190476190477e-06, "loss": 30.8183, "step": 17297 }, { "epoch": 411.85970149253734, "grad_norm": 29.68960189819336, "learning_rate": 9.72562358276644e-06, "loss": 31.601, "step": 17298 }, { "epoch": 411.88358208955225, "grad_norm": 25.01177406311035, "learning_rate": 9.725056689342405e-06, "loss": 30.5706, "step": 17299 }, { "epoch": 411.90746268656716, "grad_norm": 30.833648681640625, "learning_rate": 9.724489795918368e-06, "loss": 30.0223, "step": 17300 }, { "epoch": 411.93134328358207, "grad_norm": 24.258073806762695, "learning_rate": 9.723922902494332e-06, "loss": 29.7812, "step": 17301 }, { "epoch": 411.95522388059703, "grad_norm": 34.464168548583984, "learning_rate": 9.723356009070297e-06, "loss": 31.7863, "step": 17302 }, { "epoch": 411.97910447761194, "grad_norm": 29.224231719970703, "learning_rate": 9.72278911564626e-06, "loss": 30.4418, "step": 17303 }, { "epoch": 412.0, "grad_norm": 26.98820686340332, "learning_rate": 9.722222222222223e-06, "loss": 26.6036, "step": 17304 }, { "epoch": 412.0238805970149, "grad_norm": 30.626056671142578, "learning_rate": 9.721655328798186e-06, "loss": 30.981, "step": 17305 }, { "epoch": 412.0477611940299, "grad_norm": 26.96034049987793, "learning_rate": 9.72108843537415e-06, "loss": 30.8802, "step": 17306 }, { "epoch": 412.0716417910448, "grad_norm": 24.572372436523438, "learning_rate": 9.720521541950115e-06, "loss": 31.7014, "step": 17307 }, { "epoch": 412.0955223880597, "grad_norm": 27.470827102661133, "learning_rate": 9.719954648526078e-06, "loss": 30.3461, "step": 17308 }, { "epoch": 412.1194029850746, "grad_norm": 25.224088668823242, "learning_rate": 9.719387755102042e-06, "loss": 30.95, "step": 17309 }, { "epoch": 412.14328358208957, "grad_norm": 30.355358123779297, "learning_rate": 9.718820861678005e-06, "loss": 30.4449, "step": 17310 }, { "epoch": 412.1671641791045, "grad_norm": 25.076364517211914, "learning_rate": 9.71825396825397e-06, "loss": 30.5565, "step": 17311 }, { "epoch": 412.1910447761194, "grad_norm": 30.030237197875977, "learning_rate": 9.717687074829933e-06, "loss": 30.5954, "step": 17312 }, { "epoch": 412.21492537313435, "grad_norm": 25.06392478942871, "learning_rate": 9.717120181405897e-06, "loss": 30.1632, "step": 17313 }, { "epoch": 412.23880597014926, "grad_norm": 25.10326385498047, "learning_rate": 9.71655328798186e-06, "loss": 30.322, "step": 17314 }, { "epoch": 412.26268656716417, "grad_norm": 24.228214263916016, "learning_rate": 9.715986394557823e-06, "loss": 31.4099, "step": 17315 }, { "epoch": 412.28656716417913, "grad_norm": 21.156068801879883, "learning_rate": 9.715419501133788e-06, "loss": 31.4968, "step": 17316 }, { "epoch": 412.31044776119404, "grad_norm": 20.491891860961914, "learning_rate": 9.714852607709752e-06, "loss": 30.2069, "step": 17317 }, { "epoch": 412.33432835820895, "grad_norm": 18.79143524169922, "learning_rate": 9.714285714285715e-06, "loss": 30.6204, "step": 17318 }, { "epoch": 412.35820895522386, "grad_norm": 23.70470428466797, "learning_rate": 9.713718820861678e-06, "loss": 31.1247, "step": 17319 }, { "epoch": 412.3820895522388, "grad_norm": 19.9675235748291, "learning_rate": 9.713151927437643e-06, "loss": 30.9067, "step": 17320 }, { "epoch": 412.40597014925373, "grad_norm": 22.93436050415039, "learning_rate": 9.712585034013606e-06, "loss": 30.1828, "step": 17321 }, { "epoch": 412.42985074626864, "grad_norm": 19.510936737060547, "learning_rate": 9.71201814058957e-06, "loss": 31.448, "step": 17322 }, { "epoch": 412.4537313432836, "grad_norm": 25.782569885253906, "learning_rate": 9.711451247165535e-06, "loss": 31.1077, "step": 17323 }, { "epoch": 412.4776119402985, "grad_norm": 19.96767234802246, "learning_rate": 9.710884353741498e-06, "loss": 30.7972, "step": 17324 }, { "epoch": 412.5014925373134, "grad_norm": 26.440196990966797, "learning_rate": 9.71031746031746e-06, "loss": 30.4361, "step": 17325 }, { "epoch": 412.52537313432833, "grad_norm": 21.148910522460938, "learning_rate": 9.709750566893425e-06, "loss": 31.4516, "step": 17326 }, { "epoch": 412.5492537313433, "grad_norm": 25.684194564819336, "learning_rate": 9.70918367346939e-06, "loss": 29.7638, "step": 17327 }, { "epoch": 412.5731343283582, "grad_norm": 22.77691650390625, "learning_rate": 9.708616780045353e-06, "loss": 30.8605, "step": 17328 }, { "epoch": 412.5970149253731, "grad_norm": 23.238990783691406, "learning_rate": 9.708049886621316e-06, "loss": 30.6097, "step": 17329 }, { "epoch": 412.6208955223881, "grad_norm": 22.082658767700195, "learning_rate": 9.707482993197278e-06, "loss": 30.4965, "step": 17330 }, { "epoch": 412.644776119403, "grad_norm": 23.30976676940918, "learning_rate": 9.706916099773243e-06, "loss": 30.0028, "step": 17331 }, { "epoch": 412.6686567164179, "grad_norm": 18.33295440673828, "learning_rate": 9.706349206349208e-06, "loss": 31.2935, "step": 17332 }, { "epoch": 412.6925373134328, "grad_norm": 25.902048110961914, "learning_rate": 9.70578231292517e-06, "loss": 30.2697, "step": 17333 }, { "epoch": 412.7164179104478, "grad_norm": 21.660961151123047, "learning_rate": 9.705215419501135e-06, "loss": 29.2878, "step": 17334 }, { "epoch": 412.7402985074627, "grad_norm": 20.73514175415039, "learning_rate": 9.704648526077098e-06, "loss": 30.9861, "step": 17335 }, { "epoch": 412.7641791044776, "grad_norm": NaN, "learning_rate": 9.704081632653061e-06, "loss": 52.321, "step": 17336 }, { "epoch": 412.78805970149256, "grad_norm": 19.39436149597168, "learning_rate": 9.704081632653061e-06, "loss": 30.16, "step": 17337 }, { "epoch": 412.81194029850747, "grad_norm": 22.03596305847168, "learning_rate": 9.703514739229026e-06, "loss": 30.3655, "step": 17338 }, { "epoch": 412.8358208955224, "grad_norm": 24.618896484375, "learning_rate": 9.70294784580499e-06, "loss": 31.0424, "step": 17339 }, { "epoch": 412.85970149253734, "grad_norm": 17.97269630432129, "learning_rate": 9.702380952380953e-06, "loss": 30.5927, "step": 17340 }, { "epoch": 412.88358208955225, "grad_norm": NaN, "learning_rate": 9.701814058956916e-06, "loss": 27.2081, "step": 17341 }, { "epoch": 412.90746268656716, "grad_norm": 28.49968147277832, "learning_rate": 9.701814058956916e-06, "loss": 30.9395, "step": 17342 }, { "epoch": 412.93134328358207, "grad_norm": 20.1293888092041, "learning_rate": 9.70124716553288e-06, "loss": 31.8925, "step": 17343 }, { "epoch": 412.95522388059703, "grad_norm": 26.639421463012695, "learning_rate": 9.700680272108845e-06, "loss": 30.8541, "step": 17344 }, { "epoch": 412.97910447761194, "grad_norm": 23.90354347229004, "learning_rate": 9.700113378684808e-06, "loss": 32.278, "step": 17345 }, { "epoch": 413.0, "grad_norm": 20.97551918029785, "learning_rate": 9.699546485260771e-06, "loss": 25.9998, "step": 17346 }, { "epoch": 413.0238805970149, "grad_norm": 24.772329330444336, "learning_rate": 9.698979591836736e-06, "loss": 31.5759, "step": 17347 }, { "epoch": 413.0477611940299, "grad_norm": 20.000246047973633, "learning_rate": 9.698412698412698e-06, "loss": 30.7338, "step": 17348 }, { "epoch": 413.0716417910448, "grad_norm": 22.413970947265625, "learning_rate": 9.697845804988663e-06, "loss": 30.6352, "step": 17349 }, { "epoch": 413.0955223880597, "grad_norm": 22.880908966064453, "learning_rate": 9.697278911564628e-06, "loss": 31.6199, "step": 17350 }, { "epoch": 413.1194029850746, "grad_norm": 21.30689239501953, "learning_rate": 9.69671201814059e-06, "loss": 30.6978, "step": 17351 }, { "epoch": 413.14328358208957, "grad_norm": 20.144020080566406, "learning_rate": 9.696145124716553e-06, "loss": 31.9464, "step": 17352 }, { "epoch": 413.1671641791045, "grad_norm": 25.37303924560547, "learning_rate": 9.695578231292518e-06, "loss": 31.5355, "step": 17353 }, { "epoch": 413.1910447761194, "grad_norm": 17.912534713745117, "learning_rate": 9.695011337868483e-06, "loss": 31.2457, "step": 17354 }, { "epoch": 413.21492537313435, "grad_norm": 23.097381591796875, "learning_rate": 9.694444444444446e-06, "loss": 30.5069, "step": 17355 }, { "epoch": 413.23880597014926, "grad_norm": 22.0723819732666, "learning_rate": 9.693877551020408e-06, "loss": 31.1957, "step": 17356 }, { "epoch": 413.26268656716417, "grad_norm": 18.241931915283203, "learning_rate": 9.693310657596373e-06, "loss": 31.3098, "step": 17357 }, { "epoch": 413.28656716417913, "grad_norm": 24.820484161376953, "learning_rate": 9.692743764172336e-06, "loss": 30.3309, "step": 17358 }, { "epoch": 413.31044776119404, "grad_norm": 16.013769149780273, "learning_rate": 9.6921768707483e-06, "loss": 30.5326, "step": 17359 }, { "epoch": 413.33432835820895, "grad_norm": 27.219940185546875, "learning_rate": 9.691609977324263e-06, "loss": 31.3708, "step": 17360 }, { "epoch": 413.35820895522386, "grad_norm": 21.605802536010742, "learning_rate": 9.691043083900228e-06, "loss": 30.8502, "step": 17361 }, { "epoch": 413.3820895522388, "grad_norm": 30.45836639404297, "learning_rate": 9.690476190476191e-06, "loss": 32.564, "step": 17362 }, { "epoch": 413.40597014925373, "grad_norm": 24.562942504882812, "learning_rate": 9.689909297052154e-06, "loss": 31.0815, "step": 17363 }, { "epoch": 413.42985074626864, "grad_norm": 24.780115127563477, "learning_rate": 9.689342403628118e-06, "loss": 31.9012, "step": 17364 }, { "epoch": 413.4537313432836, "grad_norm": 19.684261322021484, "learning_rate": 9.688775510204083e-06, "loss": 30.6087, "step": 17365 }, { "epoch": 413.4776119402985, "grad_norm": 20.546043395996094, "learning_rate": 9.688208616780046e-06, "loss": 31.1515, "step": 17366 }, { "epoch": 413.5014925373134, "grad_norm": 21.40205192565918, "learning_rate": 9.687641723356009e-06, "loss": 29.8446, "step": 17367 }, { "epoch": 413.52537313432833, "grad_norm": 17.48446273803711, "learning_rate": 9.687074829931973e-06, "loss": 31.8636, "step": 17368 }, { "epoch": 413.5492537313433, "grad_norm": 20.376388549804688, "learning_rate": 9.686507936507938e-06, "loss": 30.2396, "step": 17369 }, { "epoch": 413.5731343283582, "grad_norm": 19.347999572753906, "learning_rate": 9.685941043083901e-06, "loss": 31.5867, "step": 17370 }, { "epoch": 413.5970149253731, "grad_norm": 16.06380271911621, "learning_rate": 9.685374149659866e-06, "loss": 31.7397, "step": 17371 }, { "epoch": 413.6208955223881, "grad_norm": 18.676008224487305, "learning_rate": 9.684807256235828e-06, "loss": 31.9843, "step": 17372 }, { "epoch": 413.644776119403, "grad_norm": 17.208215713500977, "learning_rate": 9.684240362811791e-06, "loss": 31.5737, "step": 17373 }, { "epoch": 413.6686567164179, "grad_norm": 20.85196304321289, "learning_rate": 9.683673469387756e-06, "loss": 31.6395, "step": 17374 }, { "epoch": 413.6925373134328, "grad_norm": 19.394147872924805, "learning_rate": 9.68310657596372e-06, "loss": 31.1919, "step": 17375 }, { "epoch": 413.7164179104478, "grad_norm": 20.282413482666016, "learning_rate": 9.682539682539683e-06, "loss": 30.9567, "step": 17376 }, { "epoch": 413.7402985074627, "grad_norm": 20.27549934387207, "learning_rate": 9.681972789115646e-06, "loss": 31.5665, "step": 17377 }, { "epoch": 413.7641791044776, "grad_norm": 19.347043991088867, "learning_rate": 9.681405895691611e-06, "loss": 30.8904, "step": 17378 }, { "epoch": 413.78805970149256, "grad_norm": 16.298198699951172, "learning_rate": 9.680839002267574e-06, "loss": 31.6256, "step": 17379 }, { "epoch": 413.81194029850747, "grad_norm": 17.747581481933594, "learning_rate": 9.680272108843538e-06, "loss": 31.5865, "step": 17380 }, { "epoch": 413.8358208955224, "grad_norm": 18.13039779663086, "learning_rate": 9.679705215419501e-06, "loss": 31.9384, "step": 17381 }, { "epoch": 413.85970149253734, "grad_norm": 23.176939010620117, "learning_rate": 9.679138321995466e-06, "loss": 30.9652, "step": 17382 }, { "epoch": 413.88358208955225, "grad_norm": 15.896499633789062, "learning_rate": 9.678571428571429e-06, "loss": 31.0066, "step": 17383 }, { "epoch": 413.90746268656716, "grad_norm": 16.574430465698242, "learning_rate": 9.678004535147393e-06, "loss": 30.3854, "step": 17384 }, { "epoch": 413.93134328358207, "grad_norm": 16.726947784423828, "learning_rate": 9.677437641723358e-06, "loss": 30.866, "step": 17385 }, { "epoch": 413.95522388059703, "grad_norm": 20.015026092529297, "learning_rate": 9.676870748299321e-06, "loss": 31.0936, "step": 17386 }, { "epoch": 413.97910447761194, "grad_norm": 21.819541931152344, "learning_rate": 9.676303854875284e-06, "loss": 31.6246, "step": 17387 }, { "epoch": 414.0, "grad_norm": 14.112858772277832, "learning_rate": 9.675736961451247e-06, "loss": 26.3616, "step": 17388 }, { "epoch": 414.0238805970149, "grad_norm": 17.154964447021484, "learning_rate": 9.675170068027211e-06, "loss": 31.003, "step": 17389 }, { "epoch": 414.0477611940299, "grad_norm": 30.1414737701416, "learning_rate": 9.674603174603176e-06, "loss": 30.0583, "step": 17390 }, { "epoch": 414.0716417910448, "grad_norm": 18.26875877380371, "learning_rate": 9.674036281179139e-06, "loss": 30.8035, "step": 17391 }, { "epoch": 414.0955223880597, "grad_norm": 16.537519454956055, "learning_rate": 9.673469387755103e-06, "loss": 30.3484, "step": 17392 }, { "epoch": 414.1194029850746, "grad_norm": 35.826725006103516, "learning_rate": 9.672902494331066e-06, "loss": 32.044, "step": 17393 }, { "epoch": 414.14328358208957, "grad_norm": 20.51215934753418, "learning_rate": 9.672335600907031e-06, "loss": 30.8656, "step": 17394 }, { "epoch": 414.1671641791045, "grad_norm": 41.638153076171875, "learning_rate": 9.671768707482994e-06, "loss": 31.5341, "step": 17395 }, { "epoch": 414.1910447761194, "grad_norm": 32.373390197753906, "learning_rate": 9.671201814058958e-06, "loss": 31.1785, "step": 17396 }, { "epoch": 414.21492537313435, "grad_norm": 45.31845474243164, "learning_rate": 9.670634920634921e-06, "loss": 31.3905, "step": 17397 }, { "epoch": 414.23880597014926, "grad_norm": 41.73866271972656, "learning_rate": 9.670068027210884e-06, "loss": 32.4249, "step": 17398 }, { "epoch": 414.26268656716417, "grad_norm": 34.975154876708984, "learning_rate": 9.669501133786849e-06, "loss": 31.9652, "step": 17399 }, { "epoch": 414.28656716417913, "grad_norm": 36.46391296386719, "learning_rate": 9.668934240362813e-06, "loss": 30.2904, "step": 17400 }, { "epoch": 414.31044776119404, "grad_norm": 32.26873779296875, "learning_rate": 9.668367346938776e-06, "loss": 31.0885, "step": 17401 }, { "epoch": 414.33432835820895, "grad_norm": 28.08407974243164, "learning_rate": 9.66780045351474e-06, "loss": 30.2394, "step": 17402 }, { "epoch": 414.35820895522386, "grad_norm": 38.191524505615234, "learning_rate": 9.667233560090704e-06, "loss": 30.2707, "step": 17403 }, { "epoch": 414.3820895522388, "grad_norm": 30.715505599975586, "learning_rate": 9.666666666666667e-06, "loss": 30.857, "step": 17404 }, { "epoch": 414.40597014925373, "grad_norm": 39.77232360839844, "learning_rate": 9.666099773242631e-06, "loss": 31.821, "step": 17405 }, { "epoch": 414.42985074626864, "grad_norm": 35.4797477722168, "learning_rate": 9.665532879818596e-06, "loss": 31.302, "step": 17406 }, { "epoch": 414.4537313432836, "grad_norm": 34.66422653198242, "learning_rate": 9.664965986394559e-06, "loss": 32.2161, "step": 17407 }, { "epoch": 414.4776119402985, "grad_norm": 32.85184097290039, "learning_rate": 9.664399092970522e-06, "loss": 31.9949, "step": 17408 }, { "epoch": 414.5014925373134, "grad_norm": 35.33075714111328, "learning_rate": 9.663832199546486e-06, "loss": 31.0031, "step": 17409 }, { "epoch": 414.52537313432833, "grad_norm": 32.316551208496094, "learning_rate": 9.663265306122451e-06, "loss": 31.0723, "step": 17410 }, { "epoch": 414.5492537313433, "grad_norm": 34.94147872924805, "learning_rate": 9.662698412698414e-06, "loss": 30.4708, "step": 17411 }, { "epoch": 414.5731343283582, "grad_norm": 32.667728424072266, "learning_rate": 9.662131519274377e-06, "loss": 30.697, "step": 17412 }, { "epoch": 414.5970149253731, "grad_norm": 35.34757995605469, "learning_rate": 9.66156462585034e-06, "loss": 30.566, "step": 17413 }, { "epoch": 414.6208955223881, "grad_norm": 32.55601501464844, "learning_rate": 9.660997732426304e-06, "loss": 31.046, "step": 17414 }, { "epoch": 414.644776119403, "grad_norm": 35.4473762512207, "learning_rate": 9.660430839002269e-06, "loss": 31.1124, "step": 17415 }, { "epoch": 414.6686567164179, "grad_norm": 32.57688903808594, "learning_rate": 9.659863945578232e-06, "loss": 31.5619, "step": 17416 }, { "epoch": 414.6925373134328, "grad_norm": 38.10930633544922, "learning_rate": 9.659297052154196e-06, "loss": 31.9327, "step": 17417 }, { "epoch": 414.7164179104478, "grad_norm": 33.61649703979492, "learning_rate": 9.65873015873016e-06, "loss": 31.3994, "step": 17418 }, { "epoch": 414.7402985074627, "grad_norm": 34.291107177734375, "learning_rate": 9.658163265306124e-06, "loss": 32.1362, "step": 17419 }, { "epoch": 414.7641791044776, "grad_norm": 32.92462158203125, "learning_rate": 9.657596371882087e-06, "loss": 31.3864, "step": 17420 }, { "epoch": 414.78805970149256, "grad_norm": 38.45814895629883, "learning_rate": 9.657029478458051e-06, "loss": 30.5475, "step": 17421 }, { "epoch": 414.81194029850747, "grad_norm": 31.68741226196289, "learning_rate": 9.656462585034014e-06, "loss": 32.3403, "step": 17422 }, { "epoch": 414.8358208955224, "grad_norm": 36.55650329589844, "learning_rate": 9.655895691609977e-06, "loss": 32.0996, "step": 17423 }, { "epoch": 414.85970149253734, "grad_norm": 33.3495979309082, "learning_rate": 9.655328798185942e-06, "loss": 30.9624, "step": 17424 }, { "epoch": 414.88358208955225, "grad_norm": 35.57025146484375, "learning_rate": 9.654761904761906e-06, "loss": 31.1892, "step": 17425 }, { "epoch": 414.90746268656716, "grad_norm": 32.37010955810547, "learning_rate": 9.65419501133787e-06, "loss": 30.7616, "step": 17426 }, { "epoch": 414.93134328358207, "grad_norm": 33.7585334777832, "learning_rate": 9.653628117913832e-06, "loss": 31.299, "step": 17427 }, { "epoch": 414.95522388059703, "grad_norm": 30.659799575805664, "learning_rate": 9.653061224489797e-06, "loss": 30.9077, "step": 17428 }, { "epoch": 414.97910447761194, "grad_norm": 35.51494598388672, "learning_rate": 9.65249433106576e-06, "loss": 31.9317, "step": 17429 }, { "epoch": 415.0, "grad_norm": 30.698129653930664, "learning_rate": 9.651927437641724e-06, "loss": 27.1297, "step": 17430 }, { "epoch": 415.0238805970149, "grad_norm": 34.81195831298828, "learning_rate": 9.651360544217689e-06, "loss": 31.1253, "step": 17431 }, { "epoch": 415.0477611940299, "grad_norm": 31.048845291137695, "learning_rate": 9.650793650793652e-06, "loss": 30.1795, "step": 17432 }, { "epoch": 415.0716417910448, "grad_norm": 35.35676956176758, "learning_rate": 9.650226757369615e-06, "loss": 30.8869, "step": 17433 }, { "epoch": 415.0955223880597, "grad_norm": 29.95574378967285, "learning_rate": 9.64965986394558e-06, "loss": 31.0124, "step": 17434 }, { "epoch": 415.1194029850746, "grad_norm": 37.70972442626953, "learning_rate": 9.649092970521544e-06, "loss": 30.3196, "step": 17435 }, { "epoch": 415.14328358208957, "grad_norm": 34.77582931518555, "learning_rate": 9.648526077097507e-06, "loss": 30.9152, "step": 17436 }, { "epoch": 415.1671641791045, "grad_norm": 37.33811950683594, "learning_rate": 9.64795918367347e-06, "loss": 31.0816, "step": 17437 }, { "epoch": 415.1910447761194, "grad_norm": 29.557003021240234, "learning_rate": 9.647392290249434e-06, "loss": 31.382, "step": 17438 }, { "epoch": 415.21492537313435, "grad_norm": 36.27533721923828, "learning_rate": 9.646825396825397e-06, "loss": 30.9413, "step": 17439 }, { "epoch": 415.23880597014926, "grad_norm": 32.80736541748047, "learning_rate": 9.646258503401362e-06, "loss": 31.7801, "step": 17440 }, { "epoch": 415.26268656716417, "grad_norm": 36.021240234375, "learning_rate": 9.645691609977325e-06, "loss": 31.8343, "step": 17441 }, { "epoch": 415.28656716417913, "grad_norm": 32.6784553527832, "learning_rate": 9.64512471655329e-06, "loss": 30.9252, "step": 17442 }, { "epoch": 415.31044776119404, "grad_norm": 32.43506622314453, "learning_rate": 9.644557823129252e-06, "loss": 30.8383, "step": 17443 }, { "epoch": 415.33432835820895, "grad_norm": 28.829185485839844, "learning_rate": 9.643990929705217e-06, "loss": 31.4669, "step": 17444 }, { "epoch": 415.35820895522386, "grad_norm": 40.13490295410156, "learning_rate": 9.64342403628118e-06, "loss": 32.4948, "step": 17445 }, { "epoch": 415.3820895522388, "grad_norm": 34.88079833984375, "learning_rate": 9.642857142857144e-06, "loss": 30.145, "step": 17446 }, { "epoch": 415.40597014925373, "grad_norm": 35.491676330566406, "learning_rate": 9.642290249433107e-06, "loss": 31.2718, "step": 17447 }, { "epoch": 415.42985074626864, "grad_norm": 30.531219482421875, "learning_rate": 9.64172335600907e-06, "loss": 31.199, "step": 17448 }, { "epoch": 415.4537313432836, "grad_norm": 34.6240348815918, "learning_rate": 9.641156462585035e-06, "loss": 31.5692, "step": 17449 }, { "epoch": 415.4776119402985, "grad_norm": NaN, "learning_rate": 9.640589569161e-06, "loss": 46.3923, "step": 17450 }, { "epoch": 415.5014925373134, "grad_norm": 19.935775756835938, "learning_rate": 9.640589569161e-06, "loss": 31.8499, "step": 17451 }, { "epoch": 415.52537313432833, "grad_norm": 34.28929901123047, "learning_rate": 9.640022675736962e-06, "loss": 30.9777, "step": 17452 }, { "epoch": 415.5492537313433, "grad_norm": 23.856491088867188, "learning_rate": 9.639455782312927e-06, "loss": 30.8576, "step": 17453 }, { "epoch": 415.5731343283582, "grad_norm": 44.480350494384766, "learning_rate": 9.63888888888889e-06, "loss": 31.3179, "step": 17454 }, { "epoch": 415.5970149253731, "grad_norm": 40.33920669555664, "learning_rate": 9.638321995464852e-06, "loss": 31.7506, "step": 17455 }, { "epoch": 415.6208955223881, "grad_norm": 31.928173065185547, "learning_rate": 9.637755102040817e-06, "loss": 32.2406, "step": 17456 }, { "epoch": 415.644776119403, "grad_norm": 31.577682495117188, "learning_rate": 9.637188208616782e-06, "loss": 30.344, "step": 17457 }, { "epoch": 415.6686567164179, "grad_norm": 33.42020034790039, "learning_rate": 9.636621315192745e-06, "loss": 32.0592, "step": 17458 }, { "epoch": 415.6925373134328, "grad_norm": 24.131744384765625, "learning_rate": 9.636054421768707e-06, "loss": 32.8307, "step": 17459 }, { "epoch": 415.7164179104478, "grad_norm": 42.14389419555664, "learning_rate": 9.635487528344672e-06, "loss": 31.7787, "step": 17460 }, { "epoch": 415.7402985074627, "grad_norm": 33.02387619018555, "learning_rate": 9.634920634920637e-06, "loss": 31.4886, "step": 17461 }, { "epoch": 415.7641791044776, "grad_norm": 40.44780731201172, "learning_rate": 9.6343537414966e-06, "loss": 33.1966, "step": 17462 }, { "epoch": 415.78805970149256, "grad_norm": 38.408538818359375, "learning_rate": 9.633786848072562e-06, "loss": 31.0887, "step": 17463 }, { "epoch": 415.81194029850747, "grad_norm": 33.120426177978516, "learning_rate": 9.633219954648527e-06, "loss": 31.4848, "step": 17464 }, { "epoch": 415.8358208955224, "grad_norm": 30.846548080444336, "learning_rate": 9.63265306122449e-06, "loss": 32.0165, "step": 17465 }, { "epoch": 415.85970149253734, "grad_norm": 39.12036895751953, "learning_rate": 9.632086167800455e-06, "loss": 30.8918, "step": 17466 }, { "epoch": 415.88358208955225, "grad_norm": 34.815101623535156, "learning_rate": 9.63151927437642e-06, "loss": 30.5831, "step": 17467 }, { "epoch": 415.90746268656716, "grad_norm": 37.425941467285156, "learning_rate": 9.630952380952382e-06, "loss": 31.4369, "step": 17468 }, { "epoch": 415.93134328358207, "grad_norm": 33.379005432128906, "learning_rate": 9.630385487528345e-06, "loss": 32.4893, "step": 17469 }, { "epoch": 415.95522388059703, "grad_norm": 34.791534423828125, "learning_rate": 9.62981859410431e-06, "loss": 32.1588, "step": 17470 }, { "epoch": 415.97910447761194, "grad_norm": 34.959320068359375, "learning_rate": 9.629251700680272e-06, "loss": 32.3794, "step": 17471 }, { "epoch": 416.0, "grad_norm": 28.786602020263672, "learning_rate": 9.628684807256237e-06, "loss": 28.1386, "step": 17472 }, { "epoch": 416.0238805970149, "grad_norm": 33.026161193847656, "learning_rate": 9.6281179138322e-06, "loss": 31.1314, "step": 17473 }, { "epoch": 416.0477611940299, "grad_norm": 38.329368591308594, "learning_rate": 9.627551020408165e-06, "loss": 32.9591, "step": 17474 }, { "epoch": 416.0716417910448, "grad_norm": 31.09491729736328, "learning_rate": 9.626984126984127e-06, "loss": 31.3726, "step": 17475 }, { "epoch": 416.0955223880597, "grad_norm": 38.20236587524414, "learning_rate": 9.626417233560092e-06, "loss": 31.2644, "step": 17476 }, { "epoch": 416.1194029850746, "grad_norm": 33.05231857299805, "learning_rate": 9.625850340136055e-06, "loss": 31.2694, "step": 17477 }, { "epoch": 416.14328358208957, "grad_norm": 33.7283821105957, "learning_rate": 9.62528344671202e-06, "loss": 31.4671, "step": 17478 }, { "epoch": 416.1671641791045, "grad_norm": 29.768062591552734, "learning_rate": 9.624716553287982e-06, "loss": 31.5029, "step": 17479 }, { "epoch": 416.1910447761194, "grad_norm": 34.26103973388672, "learning_rate": 9.624149659863945e-06, "loss": 31.6965, "step": 17480 }, { "epoch": 416.21492537313435, "grad_norm": 29.112293243408203, "learning_rate": 9.62358276643991e-06, "loss": 31.0562, "step": 17481 }, { "epoch": 416.23880597014926, "grad_norm": 36.47053527832031, "learning_rate": 9.623015873015875e-06, "loss": 32.4062, "step": 17482 }, { "epoch": 416.26268656716417, "grad_norm": 33.5345458984375, "learning_rate": 9.622448979591837e-06, "loss": 31.8404, "step": 17483 }, { "epoch": 416.28656716417913, "grad_norm": 37.95174026489258, "learning_rate": 9.6218820861678e-06, "loss": 30.7303, "step": 17484 }, { "epoch": 416.31044776119404, "grad_norm": 33.3171272277832, "learning_rate": 9.621315192743765e-06, "loss": 31.9348, "step": 17485 }, { "epoch": 416.33432835820895, "grad_norm": 36.97134780883789, "learning_rate": 9.62074829931973e-06, "loss": 30.7094, "step": 17486 }, { "epoch": 416.35820895522386, "grad_norm": 33.6683349609375, "learning_rate": 9.620181405895692e-06, "loss": 32.4069, "step": 17487 }, { "epoch": 416.3820895522388, "grad_norm": 36.61471939086914, "learning_rate": 9.619614512471655e-06, "loss": 31.9634, "step": 17488 }, { "epoch": 416.40597014925373, "grad_norm": 30.84718894958496, "learning_rate": 9.61904761904762e-06, "loss": 31.6744, "step": 17489 }, { "epoch": 416.42985074626864, "grad_norm": 39.370994567871094, "learning_rate": 9.618480725623583e-06, "loss": 32.897, "step": 17490 }, { "epoch": 416.4537313432836, "grad_norm": 32.75835418701172, "learning_rate": 9.617913832199547e-06, "loss": 32.2125, "step": 17491 }, { "epoch": 416.4776119402985, "grad_norm": 38.812191009521484, "learning_rate": 9.617346938775512e-06, "loss": 33.1373, "step": 17492 }, { "epoch": 416.5014925373134, "grad_norm": 35.7237548828125, "learning_rate": 9.616780045351475e-06, "loss": 31.8632, "step": 17493 }, { "epoch": 416.52537313432833, "grad_norm": 32.696044921875, "learning_rate": 9.616213151927438e-06, "loss": 31.1517, "step": 17494 }, { "epoch": 416.5492537313433, "grad_norm": 31.310306549072266, "learning_rate": 9.6156462585034e-06, "loss": 32.4938, "step": 17495 }, { "epoch": 416.5731343283582, "grad_norm": 33.53468704223633, "learning_rate": 9.615079365079365e-06, "loss": 31.6092, "step": 17496 }, { "epoch": 416.5970149253731, "grad_norm": 31.257722854614258, "learning_rate": 9.61451247165533e-06, "loss": 32.7622, "step": 17497 }, { "epoch": 416.6208955223881, "grad_norm": 41.01079177856445, "learning_rate": 9.613945578231293e-06, "loss": 30.3508, "step": 17498 }, { "epoch": 416.644776119403, "grad_norm": 34.838714599609375, "learning_rate": 9.613378684807257e-06, "loss": 30.8629, "step": 17499 }, { "epoch": 416.6686567164179, "grad_norm": 35.80812072753906, "learning_rate": 9.61281179138322e-06, "loss": 31.4608, "step": 17500 }, { "epoch": 416.6925373134328, "grad_norm": 29.86716651916504, "learning_rate": 9.612244897959185e-06, "loss": 32.0256, "step": 17501 }, { "epoch": 416.7164179104478, "grad_norm": 36.53352355957031, "learning_rate": 9.611678004535148e-06, "loss": 31.7998, "step": 17502 }, { "epoch": 416.7402985074627, "grad_norm": 33.87714385986328, "learning_rate": 9.611111111111112e-06, "loss": 31.5293, "step": 17503 }, { "epoch": 416.7641791044776, "grad_norm": 34.24754333496094, "learning_rate": 9.610544217687075e-06, "loss": 30.5463, "step": 17504 }, { "epoch": 416.78805970149256, "grad_norm": 26.537628173828125, "learning_rate": 9.609977324263038e-06, "loss": 31.6937, "step": 17505 }, { "epoch": 416.81194029850747, "grad_norm": 35.77519989013672, "learning_rate": 9.609410430839003e-06, "loss": 33.0816, "step": 17506 }, { "epoch": 416.8358208955224, "grad_norm": 32.720829010009766, "learning_rate": 9.608843537414967e-06, "loss": 32.7679, "step": 17507 }, { "epoch": 416.85970149253734, "grad_norm": 40.37900161743164, "learning_rate": 9.60827664399093e-06, "loss": 31.6627, "step": 17508 }, { "epoch": 416.88358208955225, "grad_norm": 39.46938705444336, "learning_rate": 9.607709750566893e-06, "loss": 33.3254, "step": 17509 }, { "epoch": 416.90746268656716, "grad_norm": 33.46904754638672, "learning_rate": 9.607142857142858e-06, "loss": 31.2637, "step": 17510 }, { "epoch": 416.93134328358207, "grad_norm": 34.10995101928711, "learning_rate": 9.606575963718822e-06, "loss": 31.9394, "step": 17511 }, { "epoch": 416.95522388059703, "grad_norm": 29.791414260864258, "learning_rate": 9.606009070294785e-06, "loss": 31.2692, "step": 17512 }, { "epoch": 416.97910447761194, "grad_norm": 26.669784545898438, "learning_rate": 9.60544217687075e-06, "loss": 31.9486, "step": 17513 }, { "epoch": 417.0, "grad_norm": 28.887584686279297, "learning_rate": 9.604875283446713e-06, "loss": 27.0846, "step": 17514 }, { "epoch": 417.0238805970149, "grad_norm": 26.53117561340332, "learning_rate": 9.604308390022676e-06, "loss": 32.2599, "step": 17515 }, { "epoch": 417.0477611940299, "grad_norm": 30.18800163269043, "learning_rate": 9.60374149659864e-06, "loss": 32.1433, "step": 17516 }, { "epoch": 417.0716417910448, "grad_norm": 25.092082977294922, "learning_rate": 9.603174603174605e-06, "loss": 31.7247, "step": 17517 }, { "epoch": 417.0955223880597, "grad_norm": 31.795455932617188, "learning_rate": 9.602607709750568e-06, "loss": 30.6835, "step": 17518 }, { "epoch": 417.1194029850746, "grad_norm": 22.675058364868164, "learning_rate": 9.60204081632653e-06, "loss": 32.5906, "step": 17519 }, { "epoch": 417.14328358208957, "grad_norm": 35.6273307800293, "learning_rate": 9.601473922902495e-06, "loss": 31.4875, "step": 17520 }, { "epoch": 417.1671641791045, "grad_norm": 32.19929122924805, "learning_rate": 9.600907029478458e-06, "loss": 30.7397, "step": 17521 }, { "epoch": 417.1910447761194, "grad_norm": 31.30029296875, "learning_rate": 9.600340136054423e-06, "loss": 32.4994, "step": 17522 }, { "epoch": 417.21492537313435, "grad_norm": 27.148357391357422, "learning_rate": 9.599773242630386e-06, "loss": 32.9034, "step": 17523 }, { "epoch": 417.23880597014926, "grad_norm": 29.497865676879883, "learning_rate": 9.59920634920635e-06, "loss": 31.6199, "step": 17524 }, { "epoch": 417.26268656716417, "grad_norm": 24.3176326751709, "learning_rate": 9.598639455782313e-06, "loss": 33.2101, "step": 17525 }, { "epoch": 417.28656716417913, "grad_norm": 31.636337280273438, "learning_rate": 9.598072562358278e-06, "loss": 30.4667, "step": 17526 }, { "epoch": 417.31044776119404, "grad_norm": 24.830875396728516, "learning_rate": 9.597505668934242e-06, "loss": 32.5534, "step": 17527 }, { "epoch": 417.33432835820895, "grad_norm": 31.565763473510742, "learning_rate": 9.596938775510205e-06, "loss": 30.5732, "step": 17528 }, { "epoch": 417.35820895522386, "grad_norm": 27.42149543762207, "learning_rate": 9.596371882086168e-06, "loss": 31.6573, "step": 17529 }, { "epoch": 417.3820895522388, "grad_norm": 29.571035385131836, "learning_rate": 9.595804988662131e-06, "loss": 32.4568, "step": 17530 }, { "epoch": 417.40597014925373, "grad_norm": 27.271560668945312, "learning_rate": 9.595238095238096e-06, "loss": 32.0478, "step": 17531 }, { "epoch": 417.42985074626864, "grad_norm": 25.561492919921875, "learning_rate": 9.59467120181406e-06, "loss": 32.1874, "step": 17532 }, { "epoch": 417.4537313432836, "grad_norm": 25.692110061645508, "learning_rate": 9.594104308390023e-06, "loss": 31.4429, "step": 17533 }, { "epoch": 417.4776119402985, "grad_norm": 26.35320472717285, "learning_rate": 9.593537414965988e-06, "loss": 31.4279, "step": 17534 }, { "epoch": 417.5014925373134, "grad_norm": 25.281557083129883, "learning_rate": 9.59297052154195e-06, "loss": 33.0092, "step": 17535 }, { "epoch": 417.52537313432833, "grad_norm": 26.904966354370117, "learning_rate": 9.592403628117914e-06, "loss": 31.7924, "step": 17536 }, { "epoch": 417.5492537313433, "grad_norm": 22.45372772216797, "learning_rate": 9.591836734693878e-06, "loss": 32.2811, "step": 17537 }, { "epoch": 417.5731343283582, "grad_norm": 24.859397888183594, "learning_rate": 9.591269841269843e-06, "loss": 31.921, "step": 17538 }, { "epoch": 417.5970149253731, "grad_norm": 23.877729415893555, "learning_rate": 9.590702947845806e-06, "loss": 32.5846, "step": 17539 }, { "epoch": 417.6208955223881, "grad_norm": 18.87540626525879, "learning_rate": 9.590136054421769e-06, "loss": 33.1223, "step": 17540 }, { "epoch": 417.644776119403, "grad_norm": 23.214746475219727, "learning_rate": 9.589569160997733e-06, "loss": 33.1679, "step": 17541 }, { "epoch": 417.6686567164179, "grad_norm": 21.429454803466797, "learning_rate": 9.589002267573698e-06, "loss": 31.8201, "step": 17542 }, { "epoch": 417.6925373134328, "grad_norm": 21.93021011352539, "learning_rate": 9.58843537414966e-06, "loss": 29.5693, "step": 17543 }, { "epoch": 417.7164179104478, "grad_norm": 16.797672271728516, "learning_rate": 9.587868480725624e-06, "loss": 31.6084, "step": 17544 }, { "epoch": 417.7402985074627, "grad_norm": 29.99384880065918, "learning_rate": 9.587301587301588e-06, "loss": 31.7087, "step": 17545 }, { "epoch": 417.7641791044776, "grad_norm": 22.556325912475586, "learning_rate": 9.586734693877551e-06, "loss": 32.217, "step": 17546 }, { "epoch": 417.78805970149256, "grad_norm": 25.481576919555664, "learning_rate": 9.586167800453516e-06, "loss": 31.3533, "step": 17547 }, { "epoch": 417.81194029850747, "grad_norm": 25.59836769104004, "learning_rate": 9.58560090702948e-06, "loss": 31.2115, "step": 17548 }, { "epoch": 417.8358208955224, "grad_norm": 19.862539291381836, "learning_rate": 9.585034013605443e-06, "loss": 31.1257, "step": 17549 }, { "epoch": 417.85970149253734, "grad_norm": 22.07025146484375, "learning_rate": 9.584467120181406e-06, "loss": 32.9178, "step": 17550 }, { "epoch": 417.88358208955225, "grad_norm": 24.650352478027344, "learning_rate": 9.58390022675737e-06, "loss": 32.2181, "step": 17551 }, { "epoch": 417.90746268656716, "grad_norm": 17.13593292236328, "learning_rate": 9.583333333333335e-06, "loss": 32.0811, "step": 17552 }, { "epoch": 417.93134328358207, "grad_norm": 22.94606590270996, "learning_rate": 9.582766439909298e-06, "loss": 31.1929, "step": 17553 }, { "epoch": 417.95522388059703, "grad_norm": 24.301525115966797, "learning_rate": 9.582199546485261e-06, "loss": 32.6324, "step": 17554 }, { "epoch": 417.97910447761194, "grad_norm": 18.32398796081543, "learning_rate": 9.581632653061226e-06, "loss": 31.1325, "step": 17555 }, { "epoch": 418.0, "grad_norm": 15.554393768310547, "learning_rate": 9.581065759637189e-06, "loss": 27.5425, "step": 17556 }, { "epoch": 418.0238805970149, "grad_norm": 25.79958152770996, "learning_rate": 9.580498866213153e-06, "loss": 32.2033, "step": 17557 }, { "epoch": 418.0477611940299, "grad_norm": 22.983232498168945, "learning_rate": 9.579931972789116e-06, "loss": 31.7035, "step": 17558 }, { "epoch": 418.0716417910448, "grad_norm": 15.403076171875, "learning_rate": 9.57936507936508e-06, "loss": 32.0717, "step": 17559 }, { "epoch": 418.0955223880597, "grad_norm": 18.59060287475586, "learning_rate": 9.578798185941044e-06, "loss": 30.2652, "step": 17560 }, { "epoch": 418.1194029850746, "grad_norm": 24.422931671142578, "learning_rate": 9.578231292517007e-06, "loss": 32.4902, "step": 17561 }, { "epoch": 418.14328358208957, "grad_norm": 16.37408447265625, "learning_rate": 9.577664399092971e-06, "loss": 30.9031, "step": 17562 }, { "epoch": 418.1671641791045, "grad_norm": 22.187536239624023, "learning_rate": 9.577097505668936e-06, "loss": 31.2213, "step": 17563 }, { "epoch": 418.1910447761194, "grad_norm": NaN, "learning_rate": 9.576530612244899e-06, "loss": 45.3476, "step": 17564 }, { "epoch": 418.21492537313435, "grad_norm": 22.222942352294922, "learning_rate": 9.576530612244899e-06, "loss": 31.6182, "step": 17565 }, { "epoch": 418.23880597014926, "grad_norm": 48.21788024902344, "learning_rate": 9.575963718820862e-06, "loss": 32.9053, "step": 17566 }, { "epoch": 418.26268656716417, "grad_norm": 38.83982849121094, "learning_rate": 9.575396825396826e-06, "loss": 33.7198, "step": 17567 }, { "epoch": 418.28656716417913, "grad_norm": 40.27055740356445, "learning_rate": 9.57482993197279e-06, "loss": 31.2131, "step": 17568 }, { "epoch": 418.31044776119404, "grad_norm": 31.61357307434082, "learning_rate": 9.574263038548754e-06, "loss": 32.2099, "step": 17569 }, { "epoch": 418.33432835820895, "grad_norm": 40.3348388671875, "learning_rate": 9.573696145124717e-06, "loss": 31.9725, "step": 17570 }, { "epoch": 418.35820895522386, "grad_norm": 31.171201705932617, "learning_rate": 9.573129251700681e-06, "loss": 33.3625, "step": 17571 }, { "epoch": 418.3820895522388, "grad_norm": 44.89733123779297, "learning_rate": 9.572562358276644e-06, "loss": 32.2646, "step": 17572 }, { "epoch": 418.40597014925373, "grad_norm": 38.07866668701172, "learning_rate": 9.571995464852609e-06, "loss": 32.5972, "step": 17573 }, { "epoch": 418.42985074626864, "grad_norm": 49.71159362792969, "learning_rate": 9.571428571428573e-06, "loss": 33.2418, "step": 17574 }, { "epoch": 418.4537313432836, "grad_norm": 41.45730972290039, "learning_rate": 9.570861678004536e-06, "loss": 33.2561, "step": 17575 }, { "epoch": 418.4776119402985, "grad_norm": 41.64115905761719, "learning_rate": 9.570294784580499e-06, "loss": 32.1524, "step": 17576 }, { "epoch": 418.5014925373134, "grad_norm": 37.11415481567383, "learning_rate": 9.569727891156464e-06, "loss": 33.4341, "step": 17577 }, { "epoch": 418.52537313432833, "grad_norm": 41.89173889160156, "learning_rate": 9.569160997732427e-06, "loss": 32.5726, "step": 17578 }, { "epoch": 418.5492537313433, "grad_norm": 34.52882385253906, "learning_rate": 9.568594104308391e-06, "loss": 33.1209, "step": 17579 }, { "epoch": 418.5731343283582, "grad_norm": 40.373287200927734, "learning_rate": 9.568027210884354e-06, "loss": 33.3066, "step": 17580 }, { "epoch": 418.5970149253731, "grad_norm": 32.57235336303711, "learning_rate": 9.567460317460319e-06, "loss": 32.788, "step": 17581 }, { "epoch": 418.6208955223881, "grad_norm": 48.87415313720703, "learning_rate": 9.566893424036282e-06, "loss": 33.2523, "step": 17582 }, { "epoch": 418.644776119403, "grad_norm": 39.380393981933594, "learning_rate": 9.566326530612246e-06, "loss": 33.0736, "step": 17583 }, { "epoch": 418.6686567164179, "grad_norm": 36.41569900512695, "learning_rate": 9.565759637188209e-06, "loss": 33.0713, "step": 17584 }, { "epoch": 418.6925373134328, "grad_norm": 33.75836181640625, "learning_rate": 9.565192743764174e-06, "loss": 32.9451, "step": 17585 }, { "epoch": 418.7164179104478, "grad_norm": 36.547908782958984, "learning_rate": 9.564625850340137e-06, "loss": 31.6862, "step": 17586 }, { "epoch": 418.7402985074627, "grad_norm": 32.715938568115234, "learning_rate": 9.5640589569161e-06, "loss": 32.9217, "step": 17587 }, { "epoch": 418.7641791044776, "grad_norm": 44.12977981567383, "learning_rate": 9.563492063492064e-06, "loss": 32.9309, "step": 17588 }, { "epoch": 418.78805970149256, "grad_norm": 38.17497634887695, "learning_rate": 9.562925170068029e-06, "loss": 32.5588, "step": 17589 }, { "epoch": 418.81194029850747, "grad_norm": 41.1082649230957, "learning_rate": 9.562358276643991e-06, "loss": 33.0644, "step": 17590 }, { "epoch": 418.8358208955224, "grad_norm": 35.24775314331055, "learning_rate": 9.561791383219954e-06, "loss": 33.8196, "step": 17591 }, { "epoch": 418.85970149253734, "grad_norm": 37.03262710571289, "learning_rate": 9.561224489795919e-06, "loss": 32.1539, "step": 17592 }, { "epoch": 418.88358208955225, "grad_norm": 33.93702697753906, "learning_rate": 9.560657596371884e-06, "loss": 32.4094, "step": 17593 }, { "epoch": 418.90746268656716, "grad_norm": 44.84053421020508, "learning_rate": 9.560090702947846e-06, "loss": 33.1483, "step": 17594 }, { "epoch": 418.93134328358207, "grad_norm": 42.49712371826172, "learning_rate": 9.559523809523811e-06, "loss": 33.1746, "step": 17595 }, { "epoch": 418.95522388059703, "grad_norm": 36.922584533691406, "learning_rate": 9.558956916099774e-06, "loss": 33.5949, "step": 17596 }, { "epoch": 418.97910447761194, "grad_norm": 36.12288284301758, "learning_rate": 9.558390022675737e-06, "loss": 32.6467, "step": 17597 }, { "epoch": 419.0, "grad_norm": 37.515892028808594, "learning_rate": 9.557823129251701e-06, "loss": 28.5415, "step": 17598 }, { "epoch": 419.0238805970149, "grad_norm": 34.370445251464844, "learning_rate": 9.557256235827666e-06, "loss": 33.8195, "step": 17599 }, { "epoch": 419.0477611940299, "grad_norm": 41.703155517578125, "learning_rate": 9.556689342403629e-06, "loss": 32.9765, "step": 17600 }, { "epoch": 419.0716417910448, "grad_norm": 39.18743896484375, "learning_rate": 9.556122448979592e-06, "loss": 31.2492, "step": 17601 }, { "epoch": 419.0955223880597, "grad_norm": 39.28944778442383, "learning_rate": 9.555555555555556e-06, "loss": 32.1479, "step": 17602 }, { "epoch": 419.1194029850746, "grad_norm": 35.05729675292969, "learning_rate": 9.55498866213152e-06, "loss": 32.1249, "step": 17603 }, { "epoch": 419.14328358208957, "grad_norm": 40.646728515625, "learning_rate": 9.554421768707484e-06, "loss": 33.3527, "step": 17604 }, { "epoch": 419.1671641791045, "grad_norm": 35.161800384521484, "learning_rate": 9.553854875283447e-06, "loss": 31.7491, "step": 17605 }, { "epoch": 419.1910447761194, "grad_norm": 41.97065353393555, "learning_rate": 9.553287981859411e-06, "loss": 33.5065, "step": 17606 }, { "epoch": 419.21492537313435, "grad_norm": 39.23545455932617, "learning_rate": 9.552721088435374e-06, "loss": 33.4734, "step": 17607 }, { "epoch": 419.23880597014926, "grad_norm": 43.604305267333984, "learning_rate": 9.552154195011339e-06, "loss": 32.548, "step": 17608 }, { "epoch": 419.26268656716417, "grad_norm": 36.96836853027344, "learning_rate": 9.551587301587304e-06, "loss": 33.0143, "step": 17609 }, { "epoch": 419.28656716417913, "grad_norm": 40.2878532409668, "learning_rate": 9.551020408163266e-06, "loss": 31.9072, "step": 17610 }, { "epoch": 419.31044776119404, "grad_norm": 35.68461990356445, "learning_rate": 9.55045351473923e-06, "loss": 32.882, "step": 17611 }, { "epoch": 419.33432835820895, "grad_norm": 41.77913284301758, "learning_rate": 9.549886621315192e-06, "loss": 31.5042, "step": 17612 }, { "epoch": 419.35820895522386, "grad_norm": 35.19708251953125, "learning_rate": 9.549319727891157e-06, "loss": 32.4653, "step": 17613 }, { "epoch": 419.3820895522388, "grad_norm": 41.49123764038086, "learning_rate": 9.548752834467121e-06, "loss": 31.4312, "step": 17614 }, { "epoch": 419.40597014925373, "grad_norm": 32.89275360107422, "learning_rate": 9.548185941043084e-06, "loss": 33.208, "step": 17615 }, { "epoch": 419.42985074626864, "grad_norm": 40.672149658203125, "learning_rate": 9.547619047619049e-06, "loss": 34.5644, "step": 17616 }, { "epoch": 419.4537313432836, "grad_norm": 35.49092483520508, "learning_rate": 9.547052154195012e-06, "loss": 33.4505, "step": 17617 }, { "epoch": 419.4776119402985, "grad_norm": 43.761478424072266, "learning_rate": 9.546485260770976e-06, "loss": 32.4576, "step": 17618 }, { "epoch": 419.5014925373134, "grad_norm": 41.06398010253906, "learning_rate": 9.54591836734694e-06, "loss": 32.5994, "step": 17619 }, { "epoch": 419.52537313432833, "grad_norm": 40.87247085571289, "learning_rate": 9.545351473922904e-06, "loss": 33.1939, "step": 17620 }, { "epoch": 419.5492537313433, "grad_norm": 38.59591293334961, "learning_rate": 9.544784580498867e-06, "loss": 33.1224, "step": 17621 }, { "epoch": 419.5731343283582, "grad_norm": 39.23366928100586, "learning_rate": 9.54421768707483e-06, "loss": 33.0341, "step": 17622 }, { "epoch": 419.5970149253731, "grad_norm": 36.526763916015625, "learning_rate": 9.543650793650794e-06, "loss": 31.8997, "step": 17623 }, { "epoch": 419.6208955223881, "grad_norm": 40.675716400146484, "learning_rate": 9.543083900226759e-06, "loss": 32.2546, "step": 17624 }, { "epoch": 419.644776119403, "grad_norm": 37.90499496459961, "learning_rate": 9.542517006802722e-06, "loss": 32.4246, "step": 17625 }, { "epoch": 419.6686567164179, "grad_norm": NaN, "learning_rate": 9.541950113378685e-06, "loss": 51.5688, "step": 17626 }, { "epoch": 419.6925373134328, "grad_norm": 23.699739456176758, "learning_rate": 9.541950113378685e-06, "loss": 33.5107, "step": 17627 }, { "epoch": 419.7164179104478, "grad_norm": 49.75596237182617, "learning_rate": 9.54138321995465e-06, "loss": 33.7643, "step": 17628 }, { "epoch": 419.7402985074627, "grad_norm": 34.00912094116211, "learning_rate": 9.540816326530612e-06, "loss": 34.7164, "step": 17629 }, { "epoch": 419.7641791044776, "grad_norm": 36.16143035888672, "learning_rate": 9.540249433106577e-06, "loss": 34.8689, "step": 17630 }, { "epoch": 419.78805970149256, "grad_norm": 30.307167053222656, "learning_rate": 9.539682539682541e-06, "loss": 33.6437, "step": 17631 }, { "epoch": 419.81194029850747, "grad_norm": 29.870691299438477, "learning_rate": 9.539115646258504e-06, "loss": 33.9267, "step": 17632 }, { "epoch": 419.8358208955224, "grad_norm": 30.42253875732422, "learning_rate": 9.538548752834467e-06, "loss": 33.6009, "step": 17633 }, { "epoch": 419.85970149253734, "grad_norm": 26.70201873779297, "learning_rate": 9.537981859410432e-06, "loss": 34.1938, "step": 17634 }, { "epoch": 419.88358208955225, "grad_norm": 38.59564208984375, "learning_rate": 9.537414965986396e-06, "loss": 33.16, "step": 17635 }, { "epoch": 419.90746268656716, "grad_norm": 25.168344497680664, "learning_rate": 9.53684807256236e-06, "loss": 34.469, "step": 17636 }, { "epoch": 419.93134328358207, "grad_norm": 52.04273986816406, "learning_rate": 9.536281179138322e-06, "loss": 33.9904, "step": 17637 }, { "epoch": 419.95522388059703, "grad_norm": 37.60790252685547, "learning_rate": 9.535714285714287e-06, "loss": 34.7973, "step": 17638 }, { "epoch": 419.97910447761194, "grad_norm": 55.68663024902344, "learning_rate": 9.53514739229025e-06, "loss": 35.7397, "step": 17639 }, { "epoch": 420.0, "grad_norm": 51.74850845336914, "learning_rate": 9.534580498866214e-06, "loss": 31.1879, "step": 17640 }, { "epoch": 420.0, "step": 17640, "total_flos": 8.671153071633885e+17, "train_loss": 1.4952584476427697, "train_runtime": 25672.4343, "train_samples_per_second": 87.559, "train_steps_per_second": 0.687 }, { "epoch": 420.0238805970149, "grad_norm": 112.91419219970703, "learning_rate": 1e-05, "loss": 33.9151, "step": 17641 }, { "epoch": 420.0477611940299, "grad_norm": Infinity, "learning_rate": 9.999458874458874e-06, "loss": 40.5489, "step": 17642 }, { "epoch": 420.0716417910448, "grad_norm": Infinity, "learning_rate": 9.999458874458874e-06, "loss": 39.5182, "step": 17643 }, { "epoch": 420.0955223880597, "grad_norm": 402.99609375, "learning_rate": 9.999458874458874e-06, "loss": 38.8643, "step": 17644 }, { "epoch": 420.1194029850746, "grad_norm": 222.9229736328125, "learning_rate": 9.99891774891775e-06, "loss": 36.7847, "step": 17645 }, { "epoch": 420.14328358208957, "grad_norm": 73.6364974975586, "learning_rate": 9.998376623376625e-06, "loss": 34.5185, "step": 17646 }, { "epoch": 420.1671641791045, "grad_norm": 98.36174011230469, "learning_rate": 9.997835497835499e-06, "loss": 34.3682, "step": 17647 }, { "epoch": 420.1910447761194, "grad_norm": 56.974830627441406, "learning_rate": 9.997294372294372e-06, "loss": 33.9145, "step": 17648 }, { "epoch": 420.21492537313435, "grad_norm": 47.68419647216797, "learning_rate": 9.996753246753248e-06, "loss": 32.737, "step": 17649 }, { "epoch": 420.23880597014926, "grad_norm": 38.63371276855469, "learning_rate": 9.996212121212123e-06, "loss": 32.4193, "step": 17650 }, { "epoch": 420.26268656716417, "grad_norm": 35.25867462158203, "learning_rate": 9.995670995670996e-06, "loss": 33.1348, "step": 17651 }, { "epoch": 420.28656716417913, "grad_norm": 31.282548904418945, "learning_rate": 9.99512987012987e-06, "loss": 32.6214, "step": 17652 }, { "epoch": 420.31044776119404, "grad_norm": 29.480207443237305, "learning_rate": 9.994588744588745e-06, "loss": 31.8834, "step": 17653 }, { "epoch": 420.33432835820895, "grad_norm": 33.77840042114258, "learning_rate": 9.99404761904762e-06, "loss": 31.733, "step": 17654 }, { "epoch": 420.35820895522386, "grad_norm": 34.8023567199707, "learning_rate": 9.993506493506494e-06, "loss": 31.583, "step": 17655 }, { "epoch": 420.3820895522388, "grad_norm": 23.891918182373047, "learning_rate": 9.99296536796537e-06, "loss": 31.7572, "step": 17656 }, { "epoch": 420.40597014925373, "grad_norm": 24.45165252685547, "learning_rate": 9.992424242424243e-06, "loss": 30.9273, "step": 17657 }, { "epoch": 420.42985074626864, "grad_norm": 31.686626434326172, "learning_rate": 9.991883116883118e-06, "loss": 31.7009, "step": 17658 }, { "epoch": 420.4537313432836, "grad_norm": 28.03240203857422, "learning_rate": 9.991341991341992e-06, "loss": 32.3674, "step": 17659 }, { "epoch": 420.4776119402985, "grad_norm": 19.185731887817383, "learning_rate": 9.990800865800867e-06, "loss": 31.2072, "step": 17660 }, { "epoch": 420.5014925373134, "grad_norm": 21.57698631286621, "learning_rate": 9.990259740259741e-06, "loss": 30.589, "step": 17661 }, { "epoch": 420.52537313432833, "grad_norm": 26.052366256713867, "learning_rate": 9.989718614718616e-06, "loss": 31.491, "step": 17662 }, { "epoch": 420.5492537313433, "grad_norm": 17.546358108520508, "learning_rate": 9.98917748917749e-06, "loss": 30.1266, "step": 17663 }, { "epoch": 420.5731343283582, "grad_norm": 20.863521575927734, "learning_rate": 9.988636363636365e-06, "loss": 31.4594, "step": 17664 }, { "epoch": 420.5970149253731, "grad_norm": 21.719436645507812, "learning_rate": 9.988095238095239e-06, "loss": 30.7668, "step": 17665 }, { "epoch": 420.6208955223881, "grad_norm": 20.507556915283203, "learning_rate": 9.987554112554112e-06, "loss": 31.5237, "step": 17666 }, { "epoch": 420.644776119403, "grad_norm": 16.809471130371094, "learning_rate": 9.987012987012988e-06, "loss": 31.3119, "step": 17667 }, { "epoch": 420.6686567164179, "grad_norm": 25.894765853881836, "learning_rate": 9.986471861471863e-06, "loss": 31.2617, "step": 17668 }, { "epoch": 420.6925373134328, "grad_norm": 22.232994079589844, "learning_rate": 9.985930735930737e-06, "loss": 30.2337, "step": 17669 }, { "epoch": 420.7164179104478, "grad_norm": 19.489809036254883, "learning_rate": 9.98538961038961e-06, "loss": 32.0539, "step": 17670 }, { "epoch": 420.7402985074627, "grad_norm": 21.0067195892334, "learning_rate": 9.984848484848485e-06, "loss": 30.9518, "step": 17671 }, { "epoch": 420.7641791044776, "grad_norm": 25.96683120727539, "learning_rate": 9.98430735930736e-06, "loss": 30.3011, "step": 17672 }, { "epoch": 420.78805970149256, "grad_norm": 18.228660583496094, "learning_rate": 9.983766233766234e-06, "loss": 30.681, "step": 17673 }, { "epoch": 420.81194029850747, "grad_norm": 20.957002639770508, "learning_rate": 9.983225108225108e-06, "loss": 31.5611, "step": 17674 }, { "epoch": 420.8358208955224, "grad_norm": 19.175439834594727, "learning_rate": 9.982683982683983e-06, "loss": 31.0533, "step": 17675 }, { "epoch": 420.85970149253734, "grad_norm": 20.846750259399414, "learning_rate": 9.982142857142858e-06, "loss": 30.1827, "step": 17676 }, { "epoch": 420.88358208955225, "grad_norm": 17.5195369720459, "learning_rate": 9.981601731601732e-06, "loss": 30.4414, "step": 17677 }, { "epoch": 420.90746268656716, "grad_norm": 18.69231605529785, "learning_rate": 9.981060606060606e-06, "loss": 30.6528, "step": 17678 }, { "epoch": 420.93134328358207, "grad_norm": 19.49618911743164, "learning_rate": 9.980519480519481e-06, "loss": 30.482, "step": 17679 }, { "epoch": 420.95522388059703, "grad_norm": 19.205490112304688, "learning_rate": 9.979978354978356e-06, "loss": 29.6978, "step": 17680 }, { "epoch": 420.97910447761194, "grad_norm": 19.25636100769043, "learning_rate": 9.97943722943723e-06, "loss": 30.7216, "step": 17681 }, { "epoch": 421.0, "grad_norm": 17.168264389038086, "learning_rate": 9.978896103896105e-06, "loss": 28.0752, "step": 17682 }, { "epoch": 421.0238805970149, "grad_norm": 21.812082290649414, "learning_rate": 9.978354978354979e-06, "loss": 31.1343, "step": 17683 }, { "epoch": 421.0477611940299, "grad_norm": 17.323999404907227, "learning_rate": 9.977813852813854e-06, "loss": 31.3371, "step": 17684 }, { "epoch": 421.0716417910448, "grad_norm": 19.48267936706543, "learning_rate": 9.977272727272728e-06, "loss": 30.5882, "step": 17685 }, { "epoch": 421.0955223880597, "grad_norm": 17.70973014831543, "learning_rate": 9.976731601731603e-06, "loss": 31.1488, "step": 17686 }, { "epoch": 421.1194029850746, "grad_norm": 19.419677734375, "learning_rate": 9.976190476190477e-06, "loss": 29.9038, "step": 17687 }, { "epoch": 421.14328358208957, "grad_norm": 18.19388198852539, "learning_rate": 9.975649350649352e-06, "loss": 30.389, "step": 17688 }, { "epoch": 421.1671641791045, "grad_norm": 16.10342025756836, "learning_rate": 9.975108225108225e-06, "loss": 30.582, "step": 17689 }, { "epoch": 421.1910447761194, "grad_norm": 19.07910919189453, "learning_rate": 9.9745670995671e-06, "loss": 29.9752, "step": 17690 }, { "epoch": 421.21492537313435, "grad_norm": 18.458965301513672, "learning_rate": 9.974025974025974e-06, "loss": 31.2172, "step": 17691 }, { "epoch": 421.23880597014926, "grad_norm": 18.080291748046875, "learning_rate": 9.97348484848485e-06, "loss": 30.7753, "step": 17692 }, { "epoch": 421.26268656716417, "grad_norm": 23.084182739257812, "learning_rate": 9.972943722943725e-06, "loss": 31.4107, "step": 17693 }, { "epoch": 421.28656716417913, "grad_norm": 16.90616798400879, "learning_rate": 9.972402597402599e-06, "loss": 30.7728, "step": 17694 }, { "epoch": 421.31044776119404, "grad_norm": 19.12290382385254, "learning_rate": 9.971861471861472e-06, "loss": 31.1052, "step": 17695 }, { "epoch": 421.33432835820895, "grad_norm": 22.74007225036621, "learning_rate": 9.971320346320347e-06, "loss": 31.7474, "step": 17696 }, { "epoch": 421.35820895522386, "grad_norm": 17.926822662353516, "learning_rate": 9.970779220779223e-06, "loss": 29.8698, "step": 17697 }, { "epoch": 421.3820895522388, "grad_norm": 20.645263671875, "learning_rate": 9.970238095238096e-06, "loss": 30.3212, "step": 17698 }, { "epoch": 421.40597014925373, "grad_norm": 22.62677574157715, "learning_rate": 9.96969696969697e-06, "loss": 31.3098, "step": 17699 }, { "epoch": 421.42985074626864, "grad_norm": 16.30235481262207, "learning_rate": 9.969155844155845e-06, "loss": 30.6176, "step": 17700 }, { "epoch": 421.4537313432836, "grad_norm": 16.102262496948242, "learning_rate": 9.96861471861472e-06, "loss": 30.7111, "step": 17701 }, { "epoch": 421.4776119402985, "grad_norm": 16.76392936706543, "learning_rate": 9.968073593073594e-06, "loss": 31.0407, "step": 17702 }, { "epoch": 421.5014925373134, "grad_norm": 20.659801483154297, "learning_rate": 9.967532467532468e-06, "loss": 31.4066, "step": 17703 }, { "epoch": 421.52537313432833, "grad_norm": 20.50784683227539, "learning_rate": 9.966991341991343e-06, "loss": 29.9563, "step": 17704 }, { "epoch": 421.5492537313433, "grad_norm": 19.23495864868164, "learning_rate": 9.966450216450217e-06, "loss": 31.179, "step": 17705 }, { "epoch": 421.5731343283582, "grad_norm": 18.347591400146484, "learning_rate": 9.965909090909092e-06, "loss": 30.8491, "step": 17706 }, { "epoch": 421.5970149253731, "grad_norm": 17.33802604675293, "learning_rate": 9.965367965367966e-06, "loss": 31.1655, "step": 17707 }, { "epoch": 421.6208955223881, "grad_norm": 17.760305404663086, "learning_rate": 9.964826839826841e-06, "loss": 30.5194, "step": 17708 }, { "epoch": 421.644776119403, "grad_norm": 23.50550079345703, "learning_rate": 9.964285714285714e-06, "loss": 31.2406, "step": 17709 }, { "epoch": 421.6686567164179, "grad_norm": 19.194673538208008, "learning_rate": 9.96374458874459e-06, "loss": 30.7916, "step": 17710 }, { "epoch": 421.6925373134328, "grad_norm": 17.148178100585938, "learning_rate": 9.963203463203463e-06, "loss": 28.8889, "step": 17711 }, { "epoch": 421.7164179104478, "grad_norm": 20.253847122192383, "learning_rate": 9.962662337662339e-06, "loss": 30.4494, "step": 17712 }, { "epoch": 421.7402985074627, "grad_norm": 21.947084426879883, "learning_rate": 9.962121212121212e-06, "loss": 31.4266, "step": 17713 }, { "epoch": 421.7641791044776, "grad_norm": 20.87897491455078, "learning_rate": 9.961580086580088e-06, "loss": 31.3864, "step": 17714 }, { "epoch": 421.78805970149256, "grad_norm": 17.126819610595703, "learning_rate": 9.961038961038963e-06, "loss": 30.0833, "step": 17715 }, { "epoch": 421.81194029850747, "grad_norm": 19.545730590820312, "learning_rate": 9.960497835497836e-06, "loss": 30.3197, "step": 17716 }, { "epoch": 421.8358208955224, "grad_norm": 20.319454193115234, "learning_rate": 9.95995670995671e-06, "loss": 30.0552, "step": 17717 }, { "epoch": 421.85970149253734, "grad_norm": 17.654926300048828, "learning_rate": 9.959415584415585e-06, "loss": 30.7586, "step": 17718 }, { "epoch": 421.88358208955225, "grad_norm": 15.611412048339844, "learning_rate": 9.95887445887446e-06, "loss": 30.4199, "step": 17719 }, { "epoch": 421.90746268656716, "grad_norm": NaN, "learning_rate": 9.958333333333334e-06, "loss": 29.7574, "step": 17720 }, { "epoch": 421.93134328358207, "grad_norm": 15.934208869934082, "learning_rate": 9.958333333333334e-06, "loss": 30.6046, "step": 17721 }, { "epoch": 421.95522388059703, "grad_norm": 17.66332244873047, "learning_rate": 9.957792207792208e-06, "loss": 30.948, "step": 17722 }, { "epoch": 421.97910447761194, "grad_norm": 15.901494979858398, "learning_rate": 9.957251082251083e-06, "loss": 31.1169, "step": 17723 }, { "epoch": 422.0, "grad_norm": 18.879749298095703, "learning_rate": 9.956709956709958e-06, "loss": 27.1391, "step": 17724 }, { "epoch": 422.0238805970149, "grad_norm": 22.32988739013672, "learning_rate": 9.956168831168832e-06, "loss": 31.3596, "step": 17725 }, { "epoch": 422.0477611940299, "grad_norm": 20.225967407226562, "learning_rate": 9.955627705627706e-06, "loss": 31.0335, "step": 17726 }, { "epoch": 422.0716417910448, "grad_norm": 19.20271110534668, "learning_rate": 9.955086580086581e-06, "loss": 30.2488, "step": 17727 }, { "epoch": 422.0955223880597, "grad_norm": 19.8067626953125, "learning_rate": 9.954545454545456e-06, "loss": 31.0425, "step": 17728 }, { "epoch": 422.1194029850746, "grad_norm": 22.654584884643555, "learning_rate": 9.95400432900433e-06, "loss": 30.037, "step": 17729 }, { "epoch": 422.14328358208957, "grad_norm": 19.72269058227539, "learning_rate": 9.953463203463203e-06, "loss": 31.6525, "step": 17730 }, { "epoch": 422.1671641791045, "grad_norm": 18.585247039794922, "learning_rate": 9.952922077922079e-06, "loss": 30.8078, "step": 17731 }, { "epoch": 422.1910447761194, "grad_norm": 16.761127471923828, "learning_rate": 9.952380952380954e-06, "loss": 31.3979, "step": 17732 }, { "epoch": 422.21492537313435, "grad_norm": 20.538742065429688, "learning_rate": 9.951839826839828e-06, "loss": 29.8246, "step": 17733 }, { "epoch": 422.23880597014926, "grad_norm": 20.14104461669922, "learning_rate": 9.951298701298701e-06, "loss": 29.3158, "step": 17734 }, { "epoch": 422.26268656716417, "grad_norm": 15.591094017028809, "learning_rate": 9.950757575757577e-06, "loss": 31.2207, "step": 17735 }, { "epoch": 422.28656716417913, "grad_norm": 16.661462783813477, "learning_rate": 9.950216450216452e-06, "loss": 32.3801, "step": 17736 }, { "epoch": 422.31044776119404, "grad_norm": 16.15082359313965, "learning_rate": 9.949675324675325e-06, "loss": 30.3467, "step": 17737 }, { "epoch": 422.33432835820895, "grad_norm": 18.46958351135254, "learning_rate": 9.949134199134199e-06, "loss": 29.7602, "step": 17738 }, { "epoch": 422.35820895522386, "grad_norm": 20.684654235839844, "learning_rate": 9.948593073593074e-06, "loss": 31.3437, "step": 17739 }, { "epoch": 422.3820895522388, "grad_norm": 16.216947555541992, "learning_rate": 9.94805194805195e-06, "loss": 29.3093, "step": 17740 }, { "epoch": 422.40597014925373, "grad_norm": 18.468616485595703, "learning_rate": 9.947510822510823e-06, "loss": 30.6165, "step": 17741 }, { "epoch": 422.42985074626864, "grad_norm": 17.252256393432617, "learning_rate": 9.946969696969699e-06, "loss": 31.1957, "step": 17742 }, { "epoch": 422.4537313432836, "grad_norm": 18.978557586669922, "learning_rate": 9.946428571428572e-06, "loss": 30.4812, "step": 17743 }, { "epoch": 422.4776119402985, "grad_norm": 23.810266494750977, "learning_rate": 9.945887445887446e-06, "loss": 31.4226, "step": 17744 }, { "epoch": 422.5014925373134, "grad_norm": 21.530662536621094, "learning_rate": 9.945346320346321e-06, "loss": 29.7931, "step": 17745 }, { "epoch": 422.52537313432833, "grad_norm": 17.577211380004883, "learning_rate": 9.944805194805196e-06, "loss": 30.1175, "step": 17746 }, { "epoch": 422.5492537313433, "grad_norm": 17.01324462890625, "learning_rate": 9.94426406926407e-06, "loss": 30.0424, "step": 17747 }, { "epoch": 422.5731343283582, "grad_norm": NaN, "learning_rate": 9.943722943722944e-06, "loss": 38.9605, "step": 17748 }, { "epoch": 422.5970149253731, "grad_norm": 22.119617462158203, "learning_rate": 9.943722943722944e-06, "loss": 30.4234, "step": 17749 }, { "epoch": 422.6208955223881, "grad_norm": 19.939777374267578, "learning_rate": 9.943181818181819e-06, "loss": 30.3345, "step": 17750 }, { "epoch": 422.644776119403, "grad_norm": 15.90744400024414, "learning_rate": 9.942640692640694e-06, "loss": 31.2696, "step": 17751 }, { "epoch": 422.6686567164179, "grad_norm": 19.290239334106445, "learning_rate": 9.942099567099568e-06, "loss": 30.276, "step": 17752 }, { "epoch": 422.6925373134328, "grad_norm": 30.07827377319336, "learning_rate": 9.941558441558441e-06, "loss": 30.3613, "step": 17753 }, { "epoch": 422.7164179104478, "grad_norm": 17.388504028320312, "learning_rate": 9.941017316017317e-06, "loss": 30.0253, "step": 17754 }, { "epoch": 422.7402985074627, "grad_norm": 21.055727005004883, "learning_rate": 9.940476190476192e-06, "loss": 30.8306, "step": 17755 }, { "epoch": 422.7641791044776, "grad_norm": 25.98221206665039, "learning_rate": 9.939935064935066e-06, "loss": 30.4435, "step": 17756 }, { "epoch": 422.78805970149256, "grad_norm": 17.11972427368164, "learning_rate": 9.939393939393939e-06, "loss": 30.8077, "step": 17757 }, { "epoch": 422.81194029850747, "grad_norm": 20.359371185302734, "learning_rate": 9.938852813852814e-06, "loss": 29.795, "step": 17758 }, { "epoch": 422.8358208955224, "grad_norm": 25.94600486755371, "learning_rate": 9.93831168831169e-06, "loss": 31.2549, "step": 17759 }, { "epoch": 422.85970149253734, "grad_norm": 16.56966781616211, "learning_rate": 9.937770562770563e-06, "loss": 29.1916, "step": 17760 }, { "epoch": 422.88358208955225, "grad_norm": 31.18387222290039, "learning_rate": 9.937229437229437e-06, "loss": 30.9677, "step": 17761 }, { "epoch": 422.90746268656716, "grad_norm": 18.86530303955078, "learning_rate": 9.936688311688312e-06, "loss": 29.9417, "step": 17762 }, { "epoch": 422.93134328358207, "grad_norm": 30.287748336791992, "learning_rate": 9.936147186147188e-06, "loss": 31.2346, "step": 17763 }, { "epoch": 422.95522388059703, "grad_norm": 22.606487274169922, "learning_rate": 9.935606060606061e-06, "loss": 30.5415, "step": 17764 }, { "epoch": 422.97910447761194, "grad_norm": 22.81735610961914, "learning_rate": 9.935064935064936e-06, "loss": 30.0585, "step": 17765 }, { "epoch": 423.0, "grad_norm": 24.056591033935547, "learning_rate": 9.93452380952381e-06, "loss": 26.9734, "step": 17766 }, { "epoch": 423.0238805970149, "grad_norm": 19.35657501220703, "learning_rate": 9.933982683982685e-06, "loss": 29.8204, "step": 17767 }, { "epoch": 423.0477611940299, "grad_norm": 30.62700843811035, "learning_rate": 9.933441558441559e-06, "loss": 31.759, "step": 17768 }, { "epoch": 423.0716417910448, "grad_norm": 22.67262840270996, "learning_rate": 9.932900432900434e-06, "loss": 29.912, "step": 17769 }, { "epoch": 423.0955223880597, "grad_norm": 19.989831924438477, "learning_rate": 9.932359307359308e-06, "loss": 30.7858, "step": 17770 }, { "epoch": 423.1194029850746, "grad_norm": 31.967315673828125, "learning_rate": 9.931818181818183e-06, "loss": 30.787, "step": 17771 }, { "epoch": 423.14328358208957, "grad_norm": 21.397275924682617, "learning_rate": 9.931277056277057e-06, "loss": 30.4314, "step": 17772 }, { "epoch": 423.1671641791045, "grad_norm": 35.429481506347656, "learning_rate": 9.930735930735932e-06, "loss": 29.9019, "step": 17773 }, { "epoch": 423.1910447761194, "grad_norm": 25.119787216186523, "learning_rate": 9.930194805194806e-06, "loss": 30.2204, "step": 17774 }, { "epoch": 423.21492537313435, "grad_norm": 28.95484733581543, "learning_rate": 9.929653679653681e-06, "loss": 30.3761, "step": 17775 }, { "epoch": 423.23880597014926, "grad_norm": 27.173364639282227, "learning_rate": 9.929112554112556e-06, "loss": 29.5577, "step": 17776 }, { "epoch": 423.26268656716417, "grad_norm": 28.711408615112305, "learning_rate": 9.92857142857143e-06, "loss": 31.2918, "step": 17777 }, { "epoch": 423.28656716417913, "grad_norm": 31.441675186157227, "learning_rate": 9.928030303030303e-06, "loss": 31.5406, "step": 17778 }, { "epoch": 423.31044776119404, "grad_norm": 23.445634841918945, "learning_rate": 9.927489177489179e-06, "loss": 30.5159, "step": 17779 }, { "epoch": 423.33432835820895, "grad_norm": 32.985877990722656, "learning_rate": 9.926948051948054e-06, "loss": 30.7025, "step": 17780 }, { "epoch": 423.35820895522386, "grad_norm": 22.22218894958496, "learning_rate": 9.926406926406928e-06, "loss": 28.941, "step": 17781 }, { "epoch": 423.3820895522388, "grad_norm": 34.304080963134766, "learning_rate": 9.925865800865801e-06, "loss": 30.4497, "step": 17782 }, { "epoch": 423.40597014925373, "grad_norm": 25.405498504638672, "learning_rate": 9.925324675324677e-06, "loss": 31.6932, "step": 17783 }, { "epoch": 423.42985074626864, "grad_norm": 32.971370697021484, "learning_rate": 9.92478354978355e-06, "loss": 30.6665, "step": 17784 }, { "epoch": 423.4537313432836, "grad_norm": 21.468421936035156, "learning_rate": 9.924242424242425e-06, "loss": 29.4264, "step": 17785 }, { "epoch": 423.4776119402985, "grad_norm": 31.87540626525879, "learning_rate": 9.923701298701299e-06, "loss": 30.6439, "step": 17786 }, { "epoch": 423.5014925373134, "grad_norm": 21.964067459106445, "learning_rate": 9.923160173160173e-06, "loss": 31.0809, "step": 17787 }, { "epoch": 423.52537313432833, "grad_norm": 34.96088409423828, "learning_rate": 9.922619047619048e-06, "loss": 29.8497, "step": 17788 }, { "epoch": 423.5492537313433, "grad_norm": 24.022890090942383, "learning_rate": 9.922077922077923e-06, "loss": 30.354, "step": 17789 }, { "epoch": 423.5731343283582, "grad_norm": 39.60622787475586, "learning_rate": 9.921536796536797e-06, "loss": 29.1475, "step": 17790 }, { "epoch": 423.5970149253731, "grad_norm": 28.349515914916992, "learning_rate": 9.920995670995672e-06, "loss": 31.3577, "step": 17791 }, { "epoch": 423.6208955223881, "grad_norm": 33.882591247558594, "learning_rate": 9.920454545454546e-06, "loss": 29.6474, "step": 17792 }, { "epoch": 423.644776119403, "grad_norm": 27.70705223083496, "learning_rate": 9.919913419913421e-06, "loss": 31.2927, "step": 17793 }, { "epoch": 423.6686567164179, "grad_norm": 35.434532165527344, "learning_rate": 9.919372294372295e-06, "loss": 29.8524, "step": 17794 }, { "epoch": 423.6925373134328, "grad_norm": NaN, "learning_rate": 9.91883116883117e-06, "loss": 52.2435, "step": 17795 }, { "epoch": 423.7164179104478, "grad_norm": 26.123586654663086, "learning_rate": 9.91883116883117e-06, "loss": 31.4211, "step": 17796 }, { "epoch": 423.7402985074627, "grad_norm": 38.33211135864258, "learning_rate": 9.918290043290044e-06, "loss": 30.4635, "step": 17797 }, { "epoch": 423.7641791044776, "grad_norm": 30.718141555786133, "learning_rate": 9.917748917748919e-06, "loss": 29.4099, "step": 17798 }, { "epoch": 423.78805970149256, "grad_norm": 34.1279411315918, "learning_rate": 9.917207792207792e-06, "loss": 30.2232, "step": 17799 }, { "epoch": 423.81194029850747, "grad_norm": 30.384740829467773, "learning_rate": 9.916666666666668e-06, "loss": 30.7991, "step": 17800 }, { "epoch": 423.8358208955224, "grad_norm": 29.308504104614258, "learning_rate": 9.916125541125541e-06, "loss": 31.0372, "step": 17801 }, { "epoch": 423.85970149253734, "grad_norm": 27.05010223388672, "learning_rate": 9.915584415584417e-06, "loss": 30.4624, "step": 17802 }, { "epoch": 423.88358208955225, "grad_norm": 26.148622512817383, "learning_rate": 9.915043290043292e-06, "loss": 30.948, "step": 17803 }, { "epoch": 423.90746268656716, "grad_norm": 26.995044708251953, "learning_rate": 9.914502164502166e-06, "loss": 30.5287, "step": 17804 }, { "epoch": 423.93134328358207, "grad_norm": 18.846092224121094, "learning_rate": 9.913961038961039e-06, "loss": 30.8986, "step": 17805 }, { "epoch": 423.95522388059703, "grad_norm": 37.53959274291992, "learning_rate": 9.913419913419914e-06, "loss": 30.9469, "step": 17806 }, { "epoch": 423.97910447761194, "grad_norm": 27.74464988708496, "learning_rate": 9.91287878787879e-06, "loss": 30.7162, "step": 17807 }, { "epoch": 424.0, "grad_norm": 35.00090789794922, "learning_rate": 9.912337662337663e-06, "loss": 27.6168, "step": 17808 }, { "epoch": 424.0238805970149, "grad_norm": 33.48642349243164, "learning_rate": 9.911796536796537e-06, "loss": 29.8008, "step": 17809 }, { "epoch": 424.0477611940299, "grad_norm": 35.65671157836914, "learning_rate": 9.911255411255412e-06, "loss": 29.1094, "step": 17810 }, { "epoch": 424.0716417910448, "grad_norm": 30.726531982421875, "learning_rate": 9.910714285714288e-06, "loss": 31.0795, "step": 17811 }, { "epoch": 424.0955223880597, "grad_norm": 35.1904411315918, "learning_rate": 9.910173160173161e-06, "loss": 30.6741, "step": 17812 }, { "epoch": 424.1194029850746, "grad_norm": 26.820903778076172, "learning_rate": 9.909632034632035e-06, "loss": 31.4292, "step": 17813 }, { "epoch": 424.14328358208957, "grad_norm": 39.136512756347656, "learning_rate": 9.90909090909091e-06, "loss": 30.6442, "step": 17814 }, { "epoch": 424.1671641791045, "grad_norm": 33.135902404785156, "learning_rate": 9.908549783549785e-06, "loss": 31.1797, "step": 17815 }, { "epoch": 424.1910447761194, "grad_norm": 33.562984466552734, "learning_rate": 9.908008658008659e-06, "loss": 31.4799, "step": 17816 }, { "epoch": 424.21492537313435, "grad_norm": 31.27215576171875, "learning_rate": 9.907467532467533e-06, "loss": 30.1781, "step": 17817 }, { "epoch": 424.23880597014926, "grad_norm": 31.56283950805664, "learning_rate": 9.906926406926408e-06, "loss": 30.6477, "step": 17818 }, { "epoch": 424.26268656716417, "grad_norm": 29.7825927734375, "learning_rate": 9.906385281385283e-06, "loss": 28.9348, "step": 17819 }, { "epoch": 424.28656716417913, "grad_norm": 35.68899917602539, "learning_rate": 9.905844155844157e-06, "loss": 30.0048, "step": 17820 }, { "epoch": 424.31044776119404, "grad_norm": 30.14948844909668, "learning_rate": 9.90530303030303e-06, "loss": 30.2308, "step": 17821 }, { "epoch": 424.33432835820895, "grad_norm": 38.211917877197266, "learning_rate": 9.904761904761906e-06, "loss": 31.5832, "step": 17822 }, { "epoch": 424.35820895522386, "grad_norm": 34.006839752197266, "learning_rate": 9.904220779220781e-06, "loss": 29.7783, "step": 17823 }, { "epoch": 424.3820895522388, "grad_norm": 34.897071838378906, "learning_rate": 9.903679653679655e-06, "loss": 29.6908, "step": 17824 }, { "epoch": 424.40597014925373, "grad_norm": 34.3736572265625, "learning_rate": 9.90313852813853e-06, "loss": 30.3572, "step": 17825 }, { "epoch": 424.42985074626864, "grad_norm": 31.942798614501953, "learning_rate": 9.902597402597403e-06, "loss": 30.1564, "step": 17826 }, { "epoch": 424.4537313432836, "grad_norm": 28.030567169189453, "learning_rate": 9.902056277056277e-06, "loss": 31.4503, "step": 17827 }, { "epoch": 424.4776119402985, "grad_norm": 34.852108001708984, "learning_rate": 9.901515151515152e-06, "loss": 30.7671, "step": 17828 }, { "epoch": 424.5014925373134, "grad_norm": 31.828214645385742, "learning_rate": 9.900974025974028e-06, "loss": 30.4066, "step": 17829 }, { "epoch": 424.52537313432833, "grad_norm": 35.93915557861328, "learning_rate": 9.900432900432901e-06, "loss": 30.2484, "step": 17830 }, { "epoch": 424.5492537313433, "grad_norm": 31.057619094848633, "learning_rate": 9.899891774891775e-06, "loss": 29.3562, "step": 17831 }, { "epoch": 424.5731343283582, "grad_norm": 32.44815444946289, "learning_rate": 9.89935064935065e-06, "loss": 30.7158, "step": 17832 }, { "epoch": 424.5970149253731, "grad_norm": 29.92171859741211, "learning_rate": 9.898809523809525e-06, "loss": 31.3564, "step": 17833 }, { "epoch": 424.6208955223881, "grad_norm": 35.34208679199219, "learning_rate": 9.898268398268399e-06, "loss": 30.372, "step": 17834 }, { "epoch": 424.644776119403, "grad_norm": 32.69564437866211, "learning_rate": 9.897727272727273e-06, "loss": 31.1831, "step": 17835 }, { "epoch": 424.6686567164179, "grad_norm": 32.14631652832031, "learning_rate": 9.897186147186148e-06, "loss": 30.3639, "step": 17836 }, { "epoch": 424.6925373134328, "grad_norm": 28.59247398376465, "learning_rate": 9.896645021645023e-06, "loss": 30.2063, "step": 17837 }, { "epoch": 424.7164179104478, "grad_norm": 37.333717346191406, "learning_rate": 9.896103896103897e-06, "loss": 30.7617, "step": 17838 }, { "epoch": 424.7402985074627, "grad_norm": NaN, "learning_rate": 9.89556277056277e-06, "loss": 52.2572, "step": 17839 }, { "epoch": 424.7641791044776, "grad_norm": 30.809633255004883, "learning_rate": 9.89556277056277e-06, "loss": 29.9285, "step": 17840 }, { "epoch": 424.78805970149256, "grad_norm": 31.57054901123047, "learning_rate": 9.895021645021646e-06, "loss": 30.6814, "step": 17841 }, { "epoch": 424.81194029850747, "grad_norm": 27.44460105895996, "learning_rate": 9.894480519480521e-06, "loss": 29.5906, "step": 17842 }, { "epoch": 424.8358208955224, "grad_norm": 35.57521057128906, "learning_rate": 9.893939393939395e-06, "loss": 29.9667, "step": 17843 }, { "epoch": 424.85970149253734, "grad_norm": 31.476011276245117, "learning_rate": 9.893398268398268e-06, "loss": 30.0742, "step": 17844 }, { "epoch": 424.88358208955225, "grad_norm": 33.79240036010742, "learning_rate": 9.892857142857143e-06, "loss": 31.2717, "step": 17845 }, { "epoch": 424.90746268656716, "grad_norm": 29.751890182495117, "learning_rate": 9.892316017316019e-06, "loss": 30.2951, "step": 17846 }, { "epoch": 424.93134328358207, "grad_norm": 32.82600402832031, "learning_rate": 9.891774891774892e-06, "loss": 31.0297, "step": 17847 }, { "epoch": 424.95522388059703, "grad_norm": 29.788423538208008, "learning_rate": 9.891233766233766e-06, "loss": 31.3126, "step": 17848 }, { "epoch": 424.97910447761194, "grad_norm": 32.77175521850586, "learning_rate": 9.890692640692641e-06, "loss": 29.8603, "step": 17849 }, { "epoch": 425.0, "grad_norm": 26.2161922454834, "learning_rate": 9.890151515151517e-06, "loss": 26.7776, "step": 17850 }, { "epoch": 425.0238805970149, "grad_norm": 34.68898391723633, "learning_rate": 9.88961038961039e-06, "loss": 30.9171, "step": 17851 }, { "epoch": 425.0477611940299, "grad_norm": 30.301311492919922, "learning_rate": 9.889069264069265e-06, "loss": 30.7368, "step": 17852 }, { "epoch": 425.0716417910448, "grad_norm": 37.42303466796875, "learning_rate": 9.888528138528139e-06, "loss": 30.7461, "step": 17853 }, { "epoch": 425.0955223880597, "grad_norm": 34.15085983276367, "learning_rate": 9.887987012987014e-06, "loss": 30.8722, "step": 17854 }, { "epoch": 425.1194029850746, "grad_norm": 28.95452880859375, "learning_rate": 9.887445887445888e-06, "loss": 29.8098, "step": 17855 }, { "epoch": 425.14328358208957, "grad_norm": 30.111637115478516, "learning_rate": 9.886904761904763e-06, "loss": 30.7687, "step": 17856 }, { "epoch": 425.1671641791045, "grad_norm": 31.200292587280273, "learning_rate": 9.886363636363637e-06, "loss": 29.3525, "step": 17857 }, { "epoch": 425.1910447761194, "grad_norm": 25.285808563232422, "learning_rate": 9.885822510822512e-06, "loss": 29.628, "step": 17858 }, { "epoch": 425.21492537313435, "grad_norm": 36.19496536254883, "learning_rate": 9.885281385281386e-06, "loss": 30.054, "step": 17859 }, { "epoch": 425.23880597014926, "grad_norm": 29.875398635864258, "learning_rate": 9.884740259740261e-06, "loss": 31.2073, "step": 17860 }, { "epoch": 425.26268656716417, "grad_norm": 33.963462829589844, "learning_rate": 9.884199134199135e-06, "loss": 30.4521, "step": 17861 }, { "epoch": 425.28656716417913, "grad_norm": 33.140625, "learning_rate": 9.88365800865801e-06, "loss": 30.9789, "step": 17862 }, { "epoch": 425.31044776119404, "grad_norm": 33.85870361328125, "learning_rate": 9.883116883116885e-06, "loss": 30.4421, "step": 17863 }, { "epoch": 425.33432835820895, "grad_norm": 29.22146987915039, "learning_rate": 9.882575757575759e-06, "loss": 30.8357, "step": 17864 }, { "epoch": 425.35820895522386, "grad_norm": 30.87940788269043, "learning_rate": 9.882034632034632e-06, "loss": 29.9467, "step": 17865 }, { "epoch": 425.3820895522388, "grad_norm": 27.88400650024414, "learning_rate": 9.881493506493506e-06, "loss": 29.1242, "step": 17866 }, { "epoch": 425.40597014925373, "grad_norm": 32.36547088623047, "learning_rate": 9.880952380952381e-06, "loss": 30.4286, "step": 17867 }, { "epoch": 425.42985074626864, "grad_norm": 31.14236068725586, "learning_rate": 9.880411255411257e-06, "loss": 30.7921, "step": 17868 }, { "epoch": 425.4537313432836, "grad_norm": 34.59557342529297, "learning_rate": 9.87987012987013e-06, "loss": 30.5107, "step": 17869 }, { "epoch": 425.4776119402985, "grad_norm": 29.180438995361328, "learning_rate": 9.879329004329004e-06, "loss": 30.3648, "step": 17870 }, { "epoch": 425.5014925373134, "grad_norm": 39.44462966918945, "learning_rate": 9.87878787878788e-06, "loss": 30.5324, "step": 17871 }, { "epoch": 425.52537313432833, "grad_norm": 34.72891616821289, "learning_rate": 9.878246753246754e-06, "loss": 30.7565, "step": 17872 }, { "epoch": 425.5492537313433, "grad_norm": 30.731353759765625, "learning_rate": 9.877705627705628e-06, "loss": 30.7171, "step": 17873 }, { "epoch": 425.5731343283582, "grad_norm": 30.493850708007812, "learning_rate": 9.877164502164503e-06, "loss": 30.4339, "step": 17874 }, { "epoch": 425.5970149253731, "grad_norm": 32.21706008911133, "learning_rate": 9.876623376623377e-06, "loss": 30.1464, "step": 17875 }, { "epoch": 425.6208955223881, "grad_norm": 30.330780029296875, "learning_rate": 9.876082251082252e-06, "loss": 30.5009, "step": 17876 }, { "epoch": 425.644776119403, "grad_norm": 32.3734016418457, "learning_rate": 9.875541125541126e-06, "loss": 30.3951, "step": 17877 }, { "epoch": 425.6686567164179, "grad_norm": 29.343202590942383, "learning_rate": 9.875000000000001e-06, "loss": 30.2334, "step": 17878 }, { "epoch": 425.6925373134328, "grad_norm": 29.87128257751465, "learning_rate": 9.874458874458875e-06, "loss": 30.1809, "step": 17879 }, { "epoch": 425.7164179104478, "grad_norm": 27.3814754486084, "learning_rate": 9.87391774891775e-06, "loss": 30.4723, "step": 17880 }, { "epoch": 425.7402985074627, "grad_norm": 33.278907775878906, "learning_rate": 9.873376623376624e-06, "loss": 29.45, "step": 17881 }, { "epoch": 425.7641791044776, "grad_norm": 28.267608642578125, "learning_rate": 9.872835497835499e-06, "loss": 30.5861, "step": 17882 }, { "epoch": 425.78805970149256, "grad_norm": 33.01785659790039, "learning_rate": 9.872294372294373e-06, "loss": 30.4724, "step": 17883 }, { "epoch": 425.81194029850747, "grad_norm": 29.440507888793945, "learning_rate": 9.871753246753248e-06, "loss": 29.078, "step": 17884 }, { "epoch": 425.8358208955224, "grad_norm": 31.783506393432617, "learning_rate": 9.871212121212121e-06, "loss": 30.31, "step": 17885 }, { "epoch": 425.85970149253734, "grad_norm": 27.963035583496094, "learning_rate": 9.870670995670997e-06, "loss": 31.2888, "step": 17886 }, { "epoch": 425.88358208955225, "grad_norm": 32.07109069824219, "learning_rate": 9.87012987012987e-06, "loss": 31.0465, "step": 17887 }, { "epoch": 425.90746268656716, "grad_norm": 27.297204971313477, "learning_rate": 9.869588744588746e-06, "loss": 30.2904, "step": 17888 }, { "epoch": 425.93134328358207, "grad_norm": 38.54228210449219, "learning_rate": 9.869047619047621e-06, "loss": 31.458, "step": 17889 }, { "epoch": 425.95522388059703, "grad_norm": 31.75337028503418, "learning_rate": 9.868506493506495e-06, "loss": 29.7941, "step": 17890 }, { "epoch": 425.97910447761194, "grad_norm": 34.617706298828125, "learning_rate": 9.867965367965368e-06, "loss": 30.7023, "step": 17891 }, { "epoch": 426.0, "grad_norm": 26.294721603393555, "learning_rate": 9.867424242424243e-06, "loss": 26.6909, "step": 17892 }, { "epoch": 426.0238805970149, "grad_norm": 30.75958824157715, "learning_rate": 9.866883116883119e-06, "loss": 31.2395, "step": 17893 }, { "epoch": 426.0477611940299, "grad_norm": 25.127758026123047, "learning_rate": 9.866341991341992e-06, "loss": 29.0264, "step": 17894 }, { "epoch": 426.0716417910448, "grad_norm": 33.18397903442383, "learning_rate": 9.865800865800866e-06, "loss": 30.1247, "step": 17895 }, { "epoch": 426.0955223880597, "grad_norm": 29.049959182739258, "learning_rate": 9.865259740259741e-06, "loss": 30.354, "step": 17896 }, { "epoch": 426.1194029850746, "grad_norm": 31.164453506469727, "learning_rate": 9.864718614718617e-06, "loss": 30.3468, "step": 17897 }, { "epoch": 426.14328358208957, "grad_norm": 28.106958389282227, "learning_rate": 9.86417748917749e-06, "loss": 30.1245, "step": 17898 }, { "epoch": 426.1671641791045, "grad_norm": 36.262516021728516, "learning_rate": 9.863636363636364e-06, "loss": 30.0848, "step": 17899 }, { "epoch": 426.1910447761194, "grad_norm": 28.52217674255371, "learning_rate": 9.863095238095239e-06, "loss": 30.2682, "step": 17900 }, { "epoch": 426.21492537313435, "grad_norm": 34.440555572509766, "learning_rate": 9.862554112554114e-06, "loss": 29.7051, "step": 17901 }, { "epoch": 426.23880597014926, "grad_norm": 28.033153533935547, "learning_rate": 9.862012987012988e-06, "loss": 30.5083, "step": 17902 }, { "epoch": 426.26268656716417, "grad_norm": 34.20317459106445, "learning_rate": 9.861471861471862e-06, "loss": 31.9734, "step": 17903 }, { "epoch": 426.28656716417913, "grad_norm": 27.240867614746094, "learning_rate": 9.860930735930737e-06, "loss": 30.4648, "step": 17904 }, { "epoch": 426.31044776119404, "grad_norm": 33.89701461791992, "learning_rate": 9.86038961038961e-06, "loss": 31.649, "step": 17905 }, { "epoch": 426.33432835820895, "grad_norm": 31.373794555664062, "learning_rate": 9.859848484848486e-06, "loss": 29.3744, "step": 17906 }, { "epoch": 426.35820895522386, "grad_norm": 34.37152099609375, "learning_rate": 9.85930735930736e-06, "loss": 29.9243, "step": 17907 }, { "epoch": 426.3820895522388, "grad_norm": 28.36467170715332, "learning_rate": 9.858766233766235e-06, "loss": 31.0966, "step": 17908 }, { "epoch": 426.40597014925373, "grad_norm": 30.59943962097168, "learning_rate": 9.858225108225108e-06, "loss": 29.8116, "step": 17909 }, { "epoch": 426.42985074626864, "grad_norm": 26.82291030883789, "learning_rate": 9.857683982683984e-06, "loss": 30.8499, "step": 17910 }, { "epoch": 426.4537313432836, "grad_norm": 31.674299240112305, "learning_rate": 9.857142857142859e-06, "loss": 30.462, "step": 17911 }, { "epoch": 426.4776119402985, "grad_norm": 19.16553497314453, "learning_rate": 9.856601731601732e-06, "loss": 29.5794, "step": 17912 }, { "epoch": 426.5014925373134, "grad_norm": 34.04121780395508, "learning_rate": 9.856060606060606e-06, "loss": 30.4825, "step": 17913 }, { "epoch": 426.52537313432833, "grad_norm": 25.557682037353516, "learning_rate": 9.855519480519481e-06, "loss": 30.9062, "step": 17914 }, { "epoch": 426.5492537313433, "grad_norm": 36.60789108276367, "learning_rate": 9.854978354978357e-06, "loss": 29.3358, "step": 17915 }, { "epoch": 426.5731343283582, "grad_norm": 37.37985610961914, "learning_rate": 9.85443722943723e-06, "loss": 30.7294, "step": 17916 }, { "epoch": 426.5970149253731, "grad_norm": 26.243133544921875, "learning_rate": 9.853896103896104e-06, "loss": 30.779, "step": 17917 }, { "epoch": 426.6208955223881, "grad_norm": 27.727338790893555, "learning_rate": 9.853354978354979e-06, "loss": 30.1231, "step": 17918 }, { "epoch": 426.644776119403, "grad_norm": 29.845748901367188, "learning_rate": 9.852813852813854e-06, "loss": 29.3638, "step": 17919 }, { "epoch": 426.6686567164179, "grad_norm": 23.25604820251465, "learning_rate": 9.852272727272728e-06, "loss": 30.5057, "step": 17920 }, { "epoch": 426.6925373134328, "grad_norm": 30.109285354614258, "learning_rate": 9.851731601731602e-06, "loss": 31.1297, "step": 17921 }, { "epoch": 426.7164179104478, "grad_norm": 22.888425827026367, "learning_rate": 9.851190476190477e-06, "loss": 30.2449, "step": 17922 }, { "epoch": 426.7402985074627, "grad_norm": 28.549480438232422, "learning_rate": 9.850649350649352e-06, "loss": 29.7814, "step": 17923 }, { "epoch": 426.7641791044776, "grad_norm": 26.971939086914062, "learning_rate": 9.850108225108226e-06, "loss": 30.6323, "step": 17924 }, { "epoch": 426.78805970149256, "grad_norm": 31.966575622558594, "learning_rate": 9.8495670995671e-06, "loss": 30.7119, "step": 17925 }, { "epoch": 426.81194029850747, "grad_norm": 27.02059555053711, "learning_rate": 9.849025974025975e-06, "loss": 31.3519, "step": 17926 }, { "epoch": 426.8358208955224, "grad_norm": 27.787981033325195, "learning_rate": 9.84848484848485e-06, "loss": 30.2269, "step": 17927 }, { "epoch": 426.85970149253734, "grad_norm": 26.651683807373047, "learning_rate": 9.847943722943724e-06, "loss": 31.4751, "step": 17928 }, { "epoch": 426.88358208955225, "grad_norm": 24.847896575927734, "learning_rate": 9.847402597402597e-06, "loss": 29.7752, "step": 17929 }, { "epoch": 426.90746268656716, "grad_norm": 19.923368453979492, "learning_rate": 9.846861471861473e-06, "loss": 30.3849, "step": 17930 }, { "epoch": 426.93134328358207, "grad_norm": 19.710582733154297, "learning_rate": 9.846320346320348e-06, "loss": 30.8312, "step": 17931 }, { "epoch": 426.95522388059703, "grad_norm": 23.509376525878906, "learning_rate": 9.845779220779221e-06, "loss": 30.1011, "step": 17932 }, { "epoch": 426.97910447761194, "grad_norm": 20.309120178222656, "learning_rate": 9.845238095238097e-06, "loss": 30.4874, "step": 17933 }, { "epoch": 427.0, "grad_norm": 17.632957458496094, "learning_rate": 9.84469696969697e-06, "loss": 26.1494, "step": 17934 }, { "epoch": 427.0238805970149, "grad_norm": 22.372276306152344, "learning_rate": 9.844155844155846e-06, "loss": 30.3935, "step": 17935 }, { "epoch": 427.0477611940299, "grad_norm": 15.630749702453613, "learning_rate": 9.84361471861472e-06, "loss": 29.0292, "step": 17936 }, { "epoch": 427.0716417910448, "grad_norm": 22.172382354736328, "learning_rate": 9.843073593073595e-06, "loss": 31.2715, "step": 17937 }, { "epoch": 427.0955223880597, "grad_norm": 23.19870376586914, "learning_rate": 9.842532467532468e-06, "loss": 30.2725, "step": 17938 }, { "epoch": 427.1194029850746, "grad_norm": 18.850271224975586, "learning_rate": 9.841991341991343e-06, "loss": 30.0919, "step": 17939 }, { "epoch": 427.14328358208957, "grad_norm": 25.05748748779297, "learning_rate": 9.841450216450217e-06, "loss": 30.5763, "step": 17940 }, { "epoch": 427.1671641791045, "grad_norm": 16.73858070373535, "learning_rate": 9.840909090909092e-06, "loss": 29.7672, "step": 17941 }, { "epoch": 427.1910447761194, "grad_norm": 28.96164894104004, "learning_rate": 9.840367965367966e-06, "loss": 30.946, "step": 17942 }, { "epoch": 427.21492537313435, "grad_norm": 22.287185668945312, "learning_rate": 9.839826839826841e-06, "loss": 29.6005, "step": 17943 }, { "epoch": 427.23880597014926, "grad_norm": 27.769054412841797, "learning_rate": 9.839285714285715e-06, "loss": 30.3361, "step": 17944 }, { "epoch": 427.26268656716417, "grad_norm": 24.134944915771484, "learning_rate": 9.83874458874459e-06, "loss": 30.3562, "step": 17945 }, { "epoch": 427.28656716417913, "grad_norm": 22.095304489135742, "learning_rate": 9.838203463203464e-06, "loss": 29.4175, "step": 17946 }, { "epoch": 427.31044776119404, "grad_norm": 23.975479125976562, "learning_rate": 9.837662337662337e-06, "loss": 30.8257, "step": 17947 }, { "epoch": 427.33432835820895, "grad_norm": 20.785980224609375, "learning_rate": 9.837121212121213e-06, "loss": 29.4275, "step": 17948 }, { "epoch": 427.35820895522386, "grad_norm": 22.19607162475586, "learning_rate": 9.836580086580088e-06, "loss": 29.9369, "step": 17949 }, { "epoch": 427.3820895522388, "grad_norm": 21.125015258789062, "learning_rate": 9.836038961038962e-06, "loss": 31.9682, "step": 17950 }, { "epoch": 427.40597014925373, "grad_norm": 21.566129684448242, "learning_rate": 9.835497835497835e-06, "loss": 29.7118, "step": 17951 }, { "epoch": 427.42985074626864, "grad_norm": 18.172327041625977, "learning_rate": 9.83495670995671e-06, "loss": 29.7498, "step": 17952 }, { "epoch": 427.4537313432836, "grad_norm": 24.794818878173828, "learning_rate": 9.834415584415586e-06, "loss": 30.1251, "step": 17953 }, { "epoch": 427.4776119402985, "grad_norm": 18.634353637695312, "learning_rate": 9.83387445887446e-06, "loss": 30.5711, "step": 17954 }, { "epoch": 427.5014925373134, "grad_norm": 22.368017196655273, "learning_rate": 9.833333333333333e-06, "loss": 30.6314, "step": 17955 }, { "epoch": 427.52537313432833, "grad_norm": 21.320226669311523, "learning_rate": 9.832792207792208e-06, "loss": 31.0989, "step": 17956 }, { "epoch": 427.5492537313433, "grad_norm": 18.811485290527344, "learning_rate": 9.832251082251084e-06, "loss": 29.3738, "step": 17957 }, { "epoch": 427.5731343283582, "grad_norm": 19.80241584777832, "learning_rate": 9.831709956709957e-06, "loss": 30.3017, "step": 17958 }, { "epoch": 427.5970149253731, "grad_norm": 18.42851448059082, "learning_rate": 9.831168831168832e-06, "loss": 30.0681, "step": 17959 }, { "epoch": 427.6208955223881, "grad_norm": 22.667186737060547, "learning_rate": 9.830627705627706e-06, "loss": 31.2866, "step": 17960 }, { "epoch": 427.644776119403, "grad_norm": 16.108610153198242, "learning_rate": 9.830086580086581e-06, "loss": 29.6498, "step": 17961 }, { "epoch": 427.6686567164179, "grad_norm": 22.690263748168945, "learning_rate": 9.829545454545455e-06, "loss": 30.835, "step": 17962 }, { "epoch": 427.6925373134328, "grad_norm": 18.636512756347656, "learning_rate": 9.82900432900433e-06, "loss": 30.6828, "step": 17963 }, { "epoch": 427.7164179104478, "grad_norm": 19.8270263671875, "learning_rate": 9.828463203463204e-06, "loss": 30.089, "step": 17964 }, { "epoch": 427.7402985074627, "grad_norm": 20.09429359436035, "learning_rate": 9.827922077922079e-06, "loss": 30.4413, "step": 17965 }, { "epoch": 427.7641791044776, "grad_norm": 22.37393569946289, "learning_rate": 9.827380952380953e-06, "loss": 30.876, "step": 17966 }, { "epoch": 427.78805970149256, "grad_norm": 18.90312957763672, "learning_rate": 9.826839826839828e-06, "loss": 30.6051, "step": 17967 }, { "epoch": 427.81194029850747, "grad_norm": 23.74074935913086, "learning_rate": 9.826298701298702e-06, "loss": 30.7434, "step": 17968 }, { "epoch": 427.8358208955224, "grad_norm": 20.368953704833984, "learning_rate": 9.825757575757577e-06, "loss": 29.9398, "step": 17969 }, { "epoch": 427.85970149253734, "grad_norm": 20.267969131469727, "learning_rate": 9.825216450216452e-06, "loss": 30.475, "step": 17970 }, { "epoch": 427.88358208955225, "grad_norm": 20.809078216552734, "learning_rate": 9.824675324675326e-06, "loss": 31.1628, "step": 17971 }, { "epoch": 427.90746268656716, "grad_norm": 22.63227653503418, "learning_rate": 9.8241341991342e-06, "loss": 30.6279, "step": 17972 }, { "epoch": 427.93134328358207, "grad_norm": 22.542566299438477, "learning_rate": 9.823593073593075e-06, "loss": 29.9502, "step": 17973 }, { "epoch": 427.95522388059703, "grad_norm": 18.110464096069336, "learning_rate": 9.82305194805195e-06, "loss": 31.4847, "step": 17974 }, { "epoch": 427.97910447761194, "grad_norm": 25.007631301879883, "learning_rate": 9.822510822510824e-06, "loss": 31.5311, "step": 17975 }, { "epoch": 428.0, "grad_norm": 18.795978546142578, "learning_rate": 9.821969696969697e-06, "loss": 26.3636, "step": 17976 }, { "epoch": 428.0238805970149, "grad_norm": 18.240968704223633, "learning_rate": 9.821428571428573e-06, "loss": 31.3659, "step": 17977 }, { "epoch": 428.0477611940299, "grad_norm": 17.354169845581055, "learning_rate": 9.820887445887448e-06, "loss": 30.6544, "step": 17978 }, { "epoch": 428.0716417910448, "grad_norm": 18.9918270111084, "learning_rate": 9.820346320346321e-06, "loss": 30.1045, "step": 17979 }, { "epoch": 428.0955223880597, "grad_norm": 16.270517349243164, "learning_rate": 9.819805194805195e-06, "loss": 30.5998, "step": 17980 }, { "epoch": 428.1194029850746, "grad_norm": 16.979324340820312, "learning_rate": 9.81926406926407e-06, "loss": 30.3645, "step": 17981 }, { "epoch": 428.14328358208957, "grad_norm": 19.20657730102539, "learning_rate": 9.818722943722946e-06, "loss": 31.0501, "step": 17982 }, { "epoch": 428.1671641791045, "grad_norm": 18.77733612060547, "learning_rate": 9.81818181818182e-06, "loss": 30.3856, "step": 17983 }, { "epoch": 428.1910447761194, "grad_norm": 18.703338623046875, "learning_rate": 9.817640692640693e-06, "loss": 29.618, "step": 17984 }, { "epoch": 428.21492537313435, "grad_norm": 20.7010440826416, "learning_rate": 9.817099567099568e-06, "loss": 30.8886, "step": 17985 }, { "epoch": 428.23880597014926, "grad_norm": 17.985546112060547, "learning_rate": 9.816558441558442e-06, "loss": 30.6722, "step": 17986 }, { "epoch": 428.26268656716417, "grad_norm": 19.62334632873535, "learning_rate": 9.816017316017317e-06, "loss": 29.9925, "step": 17987 }, { "epoch": 428.28656716417913, "grad_norm": 18.24506950378418, "learning_rate": 9.81547619047619e-06, "loss": 31.1573, "step": 17988 }, { "epoch": 428.31044776119404, "grad_norm": 18.331899642944336, "learning_rate": 9.814935064935066e-06, "loss": 30.4798, "step": 17989 }, { "epoch": 428.33432835820895, "grad_norm": 20.825767517089844, "learning_rate": 9.81439393939394e-06, "loss": 30.6141, "step": 17990 }, { "epoch": 428.35820895522386, "grad_norm": 17.477787017822266, "learning_rate": 9.813852813852815e-06, "loss": 29.8966, "step": 17991 }, { "epoch": 428.3820895522388, "grad_norm": 17.410789489746094, "learning_rate": 9.813311688311688e-06, "loss": 30.443, "step": 17992 }, { "epoch": 428.40597014925373, "grad_norm": 21.99690818786621, "learning_rate": 9.812770562770564e-06, "loss": 30.2403, "step": 17993 }, { "epoch": 428.42985074626864, "grad_norm": 18.052631378173828, "learning_rate": 9.812229437229437e-06, "loss": 30.7037, "step": 17994 }, { "epoch": 428.4537313432836, "grad_norm": 19.822654724121094, "learning_rate": 9.811688311688313e-06, "loss": 29.4578, "step": 17995 }, { "epoch": 428.4776119402985, "grad_norm": 18.63534164428711, "learning_rate": 9.811147186147188e-06, "loss": 30.2552, "step": 17996 }, { "epoch": 428.5014925373134, "grad_norm": 22.506925582885742, "learning_rate": 9.810606060606061e-06, "loss": 30.118, "step": 17997 }, { "epoch": 428.52537313432833, "grad_norm": 19.12542152404785, "learning_rate": 9.810064935064935e-06, "loss": 29.1208, "step": 17998 }, { "epoch": 428.5492537313433, "grad_norm": 20.821836471557617, "learning_rate": 9.80952380952381e-06, "loss": 30.1457, "step": 17999 }, { "epoch": 428.5731343283582, "grad_norm": 18.658109664916992, "learning_rate": 9.808982683982686e-06, "loss": 28.8632, "step": 18000 }, { "epoch": 428.5970149253731, "grad_norm": 22.922346115112305, "learning_rate": 9.80844155844156e-06, "loss": 30.2293, "step": 18001 }, { "epoch": 428.6208955223881, "grad_norm": 16.36787223815918, "learning_rate": 9.807900432900433e-06, "loss": 29.8542, "step": 18002 }, { "epoch": 428.644776119403, "grad_norm": 22.274993896484375, "learning_rate": 9.807359307359308e-06, "loss": 30.4504, "step": 18003 }, { "epoch": 428.6686567164179, "grad_norm": NaN, "learning_rate": 9.806818181818183e-06, "loss": 30.018, "step": 18004 }, { "epoch": 428.6925373134328, "grad_norm": 17.916748046875, "learning_rate": 9.806818181818183e-06, "loss": 30.5479, "step": 18005 }, { "epoch": 428.7164179104478, "grad_norm": 23.17523765563965, "learning_rate": 9.806277056277057e-06, "loss": 30.4771, "step": 18006 }, { "epoch": 428.7402985074627, "grad_norm": 19.569843292236328, "learning_rate": 9.80573593073593e-06, "loss": 30.4651, "step": 18007 }, { "epoch": 428.7641791044776, "grad_norm": 21.045124053955078, "learning_rate": 9.805194805194806e-06, "loss": 29.7906, "step": 18008 }, { "epoch": 428.78805970149256, "grad_norm": 22.470186233520508, "learning_rate": 9.804653679653681e-06, "loss": 31.1689, "step": 18009 }, { "epoch": 428.81194029850747, "grad_norm": 18.39364242553711, "learning_rate": 9.804112554112555e-06, "loss": 30.7684, "step": 18010 }, { "epoch": 428.8358208955224, "grad_norm": 20.105195999145508, "learning_rate": 9.803571428571428e-06, "loss": 30.9044, "step": 18011 }, { "epoch": 428.85970149253734, "grad_norm": 16.686826705932617, "learning_rate": 9.803030303030304e-06, "loss": 30.3485, "step": 18012 }, { "epoch": 428.88358208955225, "grad_norm": 21.4024600982666, "learning_rate": 9.802489177489179e-06, "loss": 30.5889, "step": 18013 }, { "epoch": 428.90746268656716, "grad_norm": NaN, "learning_rate": 9.801948051948053e-06, "loss": 26.1148, "step": 18014 }, { "epoch": 428.93134328358207, "grad_norm": 20.984010696411133, "learning_rate": 9.801948051948053e-06, "loss": 29.6254, "step": 18015 }, { "epoch": 428.95522388059703, "grad_norm": 19.516313552856445, "learning_rate": 9.801406926406926e-06, "loss": 31.5041, "step": 18016 }, { "epoch": 428.97910447761194, "grad_norm": 22.676307678222656, "learning_rate": 9.800865800865802e-06, "loss": 30.2982, "step": 18017 }, { "epoch": 429.0, "grad_norm": 16.253156661987305, "learning_rate": 9.800324675324677e-06, "loss": 26.3313, "step": 18018 }, { "epoch": 429.0238805970149, "grad_norm": 18.35772705078125, "learning_rate": 9.79978354978355e-06, "loss": 28.7806, "step": 18019 }, { "epoch": 429.0477611940299, "grad_norm": 20.161684036254883, "learning_rate": 9.799242424242426e-06, "loss": 30.2068, "step": 18020 }, { "epoch": 429.0716417910448, "grad_norm": 18.514429092407227, "learning_rate": 9.7987012987013e-06, "loss": 30.3083, "step": 18021 }, { "epoch": 429.0955223880597, "grad_norm": 21.278491973876953, "learning_rate": 9.798160173160175e-06, "loss": 31.3024, "step": 18022 }, { "epoch": 429.1194029850746, "grad_norm": 19.238842010498047, "learning_rate": 9.797619047619048e-06, "loss": 30.5968, "step": 18023 }, { "epoch": 429.14328358208957, "grad_norm": 19.597755432128906, "learning_rate": 9.797077922077924e-06, "loss": 30.1577, "step": 18024 }, { "epoch": 429.1671641791045, "grad_norm": 18.52017593383789, "learning_rate": 9.796536796536797e-06, "loss": 31.0317, "step": 18025 }, { "epoch": 429.1910447761194, "grad_norm": 19.330345153808594, "learning_rate": 9.79599567099567e-06, "loss": 30.1967, "step": 18026 }, { "epoch": 429.21492537313435, "grad_norm": 20.696184158325195, "learning_rate": 9.795454545454546e-06, "loss": 29.8116, "step": 18027 }, { "epoch": 429.23880597014926, "grad_norm": 23.277179718017578, "learning_rate": 9.794913419913421e-06, "loss": 31.3263, "step": 18028 }, { "epoch": 429.26268656716417, "grad_norm": 17.244430541992188, "learning_rate": 9.794372294372295e-06, "loss": 29.495, "step": 18029 }, { "epoch": 429.28656716417913, "grad_norm": 28.546722412109375, "learning_rate": 9.793831168831169e-06, "loss": 30.5444, "step": 18030 }, { "epoch": 429.31044776119404, "grad_norm": 21.881196975708008, "learning_rate": 9.793290043290044e-06, "loss": 30.0764, "step": 18031 }, { "epoch": 429.33432835820895, "grad_norm": 20.317873001098633, "learning_rate": 9.79274891774892e-06, "loss": 30.2842, "step": 18032 }, { "epoch": 429.35820895522386, "grad_norm": 25.365812301635742, "learning_rate": 9.792207792207793e-06, "loss": 29.8889, "step": 18033 }, { "epoch": 429.3820895522388, "grad_norm": 21.7113037109375, "learning_rate": 9.791666666666666e-06, "loss": 31.2148, "step": 18034 }, { "epoch": 429.40597014925373, "grad_norm": 19.957426071166992, "learning_rate": 9.791125541125542e-06, "loss": 30.9801, "step": 18035 }, { "epoch": 429.42985074626864, "grad_norm": 28.227706909179688, "learning_rate": 9.790584415584417e-06, "loss": 29.9109, "step": 18036 }, { "epoch": 429.4537313432836, "grad_norm": 16.694480895996094, "learning_rate": 9.79004329004329e-06, "loss": 30.586, "step": 18037 }, { "epoch": 429.4776119402985, "grad_norm": 25.612768173217773, "learning_rate": 9.789502164502164e-06, "loss": 30.0729, "step": 18038 }, { "epoch": 429.5014925373134, "grad_norm": 19.783058166503906, "learning_rate": 9.78896103896104e-06, "loss": 30.5687, "step": 18039 }, { "epoch": 429.52537313432833, "grad_norm": 21.934432983398438, "learning_rate": 9.788419913419915e-06, "loss": 30.241, "step": 18040 }, { "epoch": 429.5492537313433, "grad_norm": 22.333999633789062, "learning_rate": 9.787878787878788e-06, "loss": 30.1466, "step": 18041 }, { "epoch": 429.5731343283582, "grad_norm": 22.895530700683594, "learning_rate": 9.787337662337664e-06, "loss": 29.9201, "step": 18042 }, { "epoch": 429.5970149253731, "grad_norm": 17.72591209411621, "learning_rate": 9.786796536796537e-06, "loss": 30.3627, "step": 18043 }, { "epoch": 429.6208955223881, "grad_norm": 28.418760299682617, "learning_rate": 9.786255411255413e-06, "loss": 30.4524, "step": 18044 }, { "epoch": 429.644776119403, "grad_norm": 20.38718032836914, "learning_rate": 9.785714285714286e-06, "loss": 29.7648, "step": 18045 }, { "epoch": 429.6686567164179, "grad_norm": 22.938461303710938, "learning_rate": 9.785173160173161e-06, "loss": 29.9673, "step": 18046 }, { "epoch": 429.6925373134328, "grad_norm": 27.806175231933594, "learning_rate": 9.784632034632035e-06, "loss": 29.2941, "step": 18047 }, { "epoch": 429.7164179104478, "grad_norm": 19.876920700073242, "learning_rate": 9.78409090909091e-06, "loss": 30.2551, "step": 18048 }, { "epoch": 429.7402985074627, "grad_norm": 27.202096939086914, "learning_rate": 9.783549783549784e-06, "loss": 29.598, "step": 18049 }, { "epoch": 429.7641791044776, "grad_norm": 22.663503646850586, "learning_rate": 9.78300865800866e-06, "loss": 30.6346, "step": 18050 }, { "epoch": 429.78805970149256, "grad_norm": 19.912015914916992, "learning_rate": 9.782467532467533e-06, "loss": 30.3092, "step": 18051 }, { "epoch": 429.81194029850747, "grad_norm": 25.106313705444336, "learning_rate": 9.781926406926408e-06, "loss": 30.9421, "step": 18052 }, { "epoch": 429.8358208955224, "grad_norm": 20.329668045043945, "learning_rate": 9.781385281385282e-06, "loss": 30.6523, "step": 18053 }, { "epoch": 429.85970149253734, "grad_norm": 18.845617294311523, "learning_rate": 9.780844155844157e-06, "loss": 30.7676, "step": 18054 }, { "epoch": 429.88358208955225, "grad_norm": 26.7338809967041, "learning_rate": 9.78030303030303e-06, "loss": 30.6298, "step": 18055 }, { "epoch": 429.90746268656716, "grad_norm": 17.853437423706055, "learning_rate": 9.779761904761906e-06, "loss": 30.2678, "step": 18056 }, { "epoch": 429.93134328358207, "grad_norm": 20.483516693115234, "learning_rate": 9.779220779220781e-06, "loss": 29.3205, "step": 18057 }, { "epoch": 429.95522388059703, "grad_norm": 21.360971450805664, "learning_rate": 9.778679653679655e-06, "loss": 30.1287, "step": 18058 }, { "epoch": 429.97910447761194, "grad_norm": 21.779895782470703, "learning_rate": 9.778138528138528e-06, "loss": 30.0274, "step": 18059 }, { "epoch": 430.0, "grad_norm": 21.29844093322754, "learning_rate": 9.777597402597404e-06, "loss": 27.3231, "step": 18060 }, { "epoch": 430.0238805970149, "grad_norm": 19.95744514465332, "learning_rate": 9.777056277056279e-06, "loss": 29.8417, "step": 18061 }, { "epoch": 430.0477611940299, "grad_norm": 18.681852340698242, "learning_rate": 9.776515151515153e-06, "loss": 29.8086, "step": 18062 }, { "epoch": 430.0716417910448, "grad_norm": 27.81563377380371, "learning_rate": 9.775974025974026e-06, "loss": 30.0835, "step": 18063 }, { "epoch": 430.0955223880597, "grad_norm": 20.64076805114746, "learning_rate": 9.775432900432902e-06, "loss": 31.3017, "step": 18064 }, { "epoch": 430.1194029850746, "grad_norm": 17.689733505249023, "learning_rate": 9.774891774891775e-06, "loss": 29.8934, "step": 18065 }, { "epoch": 430.14328358208957, "grad_norm": 18.394147872924805, "learning_rate": 9.77435064935065e-06, "loss": 30.7361, "step": 18066 }, { "epoch": 430.1671641791045, "grad_norm": 22.4614315032959, "learning_rate": 9.773809523809524e-06, "loss": 30.3341, "step": 18067 }, { "epoch": 430.1910447761194, "grad_norm": 19.07718849182129, "learning_rate": 9.7732683982684e-06, "loss": 29.995, "step": 18068 }, { "epoch": 430.21492537313435, "grad_norm": 19.843769073486328, "learning_rate": 9.772727272727273e-06, "loss": 29.9518, "step": 18069 }, { "epoch": 430.23880597014926, "grad_norm": 19.379444122314453, "learning_rate": 9.772186147186148e-06, "loss": 31.0508, "step": 18070 }, { "epoch": 430.26268656716417, "grad_norm": 25.515151977539062, "learning_rate": 9.771645021645022e-06, "loss": 30.3901, "step": 18071 }, { "epoch": 430.28656716417913, "grad_norm": 19.96921157836914, "learning_rate": 9.771103896103897e-06, "loss": 29.9685, "step": 18072 }, { "epoch": 430.31044776119404, "grad_norm": 17.278278350830078, "learning_rate": 9.77056277056277e-06, "loss": 30.2587, "step": 18073 }, { "epoch": 430.33432835820895, "grad_norm": 18.309537887573242, "learning_rate": 9.770021645021646e-06, "loss": 29.8807, "step": 18074 }, { "epoch": 430.35820895522386, "grad_norm": 21.972036361694336, "learning_rate": 9.76948051948052e-06, "loss": 29.7726, "step": 18075 }, { "epoch": 430.3820895522388, "grad_norm": 17.125043869018555, "learning_rate": 9.768939393939395e-06, "loss": 30.4803, "step": 18076 }, { "epoch": 430.40597014925373, "grad_norm": 20.584535598754883, "learning_rate": 9.768398268398269e-06, "loss": 29.1054, "step": 18077 }, { "epoch": 430.42985074626864, "grad_norm": 21.71653938293457, "learning_rate": 9.767857142857144e-06, "loss": 30.7956, "step": 18078 }, { "epoch": 430.4537313432836, "grad_norm": 22.659194946289062, "learning_rate": 9.767316017316019e-06, "loss": 31.0582, "step": 18079 }, { "epoch": 430.4776119402985, "grad_norm": 23.04308319091797, "learning_rate": 9.766774891774893e-06, "loss": 30.7528, "step": 18080 }, { "epoch": 430.5014925373134, "grad_norm": 18.38223648071289, "learning_rate": 9.766233766233766e-06, "loss": 29.7243, "step": 18081 }, { "epoch": 430.52537313432833, "grad_norm": 21.17744255065918, "learning_rate": 9.765692640692642e-06, "loss": 29.9287, "step": 18082 }, { "epoch": 430.5492537313433, "grad_norm": 23.022775650024414, "learning_rate": 9.765151515151517e-06, "loss": 30.4872, "step": 18083 }, { "epoch": 430.5731343283582, "grad_norm": 17.25739860534668, "learning_rate": 9.76461038961039e-06, "loss": 30.0265, "step": 18084 }, { "epoch": 430.5970149253731, "grad_norm": 19.320589065551758, "learning_rate": 9.764069264069264e-06, "loss": 29.8083, "step": 18085 }, { "epoch": 430.6208955223881, "grad_norm": 18.31188201904297, "learning_rate": 9.76352813852814e-06, "loss": 29.8401, "step": 18086 }, { "epoch": 430.644776119403, "grad_norm": 21.063247680664062, "learning_rate": 9.762987012987015e-06, "loss": 30.7492, "step": 18087 }, { "epoch": 430.6686567164179, "grad_norm": 17.298439025878906, "learning_rate": 9.762445887445888e-06, "loss": 31.1181, "step": 18088 }, { "epoch": 430.6925373134328, "grad_norm": 19.951534271240234, "learning_rate": 9.761904761904762e-06, "loss": 30.5114, "step": 18089 }, { "epoch": 430.7164179104478, "grad_norm": 17.378904342651367, "learning_rate": 9.761363636363637e-06, "loss": 30.5523, "step": 18090 }, { "epoch": 430.7402985074627, "grad_norm": 19.628141403198242, "learning_rate": 9.760822510822513e-06, "loss": 30.5202, "step": 18091 }, { "epoch": 430.7641791044776, "grad_norm": 18.593860626220703, "learning_rate": 9.760281385281386e-06, "loss": 30.9547, "step": 18092 }, { "epoch": 430.78805970149256, "grad_norm": 21.787429809570312, "learning_rate": 9.75974025974026e-06, "loss": 30.2461, "step": 18093 }, { "epoch": 430.81194029850747, "grad_norm": 19.232810974121094, "learning_rate": 9.759199134199135e-06, "loss": 29.9098, "step": 18094 }, { "epoch": 430.8358208955224, "grad_norm": 20.132171630859375, "learning_rate": 9.75865800865801e-06, "loss": 30.2782, "step": 18095 }, { "epoch": 430.85970149253734, "grad_norm": 21.953615188598633, "learning_rate": 9.758116883116884e-06, "loss": 30.2047, "step": 18096 }, { "epoch": 430.88358208955225, "grad_norm": 19.692167282104492, "learning_rate": 9.757575757575758e-06, "loss": 29.5148, "step": 18097 }, { "epoch": 430.90746268656716, "grad_norm": 19.286014556884766, "learning_rate": 9.757034632034633e-06, "loss": 30.6875, "step": 18098 }, { "epoch": 430.93134328358207, "grad_norm": 22.44078254699707, "learning_rate": 9.756493506493508e-06, "loss": 30.1331, "step": 18099 }, { "epoch": 430.95522388059703, "grad_norm": 25.140562057495117, "learning_rate": 9.755952380952382e-06, "loss": 30.417, "step": 18100 }, { "epoch": 430.97910447761194, "grad_norm": 19.807554244995117, "learning_rate": 9.755411255411255e-06, "loss": 29.6433, "step": 18101 }, { "epoch": 431.0, "grad_norm": 15.321440696716309, "learning_rate": 9.75487012987013e-06, "loss": 26.0771, "step": 18102 }, { "epoch": 431.0238805970149, "grad_norm": 21.101791381835938, "learning_rate": 9.754329004329006e-06, "loss": 29.9268, "step": 18103 }, { "epoch": 431.0477611940299, "grad_norm": 19.69845199584961, "learning_rate": 9.75378787878788e-06, "loss": 29.4053, "step": 18104 }, { "epoch": 431.0716417910448, "grad_norm": 22.732641220092773, "learning_rate": 9.753246753246755e-06, "loss": 30.7583, "step": 18105 }, { "epoch": 431.0955223880597, "grad_norm": 15.956716537475586, "learning_rate": 9.752705627705628e-06, "loss": 30.2716, "step": 18106 }, { "epoch": 431.1194029850746, "grad_norm": 28.313772201538086, "learning_rate": 9.752164502164502e-06, "loss": 31.3226, "step": 18107 }, { "epoch": 431.14328358208957, "grad_norm": 24.60181999206543, "learning_rate": 9.751623376623377e-06, "loss": 31.0822, "step": 18108 }, { "epoch": 431.1671641791045, "grad_norm": 18.626888275146484, "learning_rate": 9.751082251082253e-06, "loss": 30.4575, "step": 18109 }, { "epoch": 431.1910447761194, "grad_norm": 20.24835968017578, "learning_rate": 9.750541125541126e-06, "loss": 30.0248, "step": 18110 }, { "epoch": 431.21492537313435, "grad_norm": 22.693408966064453, "learning_rate": 9.75e-06, "loss": 31.5596, "step": 18111 }, { "epoch": 431.23880597014926, "grad_norm": 17.155376434326172, "learning_rate": 9.749458874458875e-06, "loss": 30.8917, "step": 18112 }, { "epoch": 431.26268656716417, "grad_norm": 21.3492431640625, "learning_rate": 9.74891774891775e-06, "loss": 29.6096, "step": 18113 }, { "epoch": 431.28656716417913, "grad_norm": 19.785463333129883, "learning_rate": 9.748376623376624e-06, "loss": 31.0222, "step": 18114 }, { "epoch": 431.31044776119404, "grad_norm": 20.90851402282715, "learning_rate": 9.747835497835498e-06, "loss": 30.3411, "step": 18115 }, { "epoch": 431.33432835820895, "grad_norm": 19.563947677612305, "learning_rate": 9.747294372294373e-06, "loss": 30.4188, "step": 18116 }, { "epoch": 431.35820895522386, "grad_norm": 16.971725463867188, "learning_rate": 9.746753246753248e-06, "loss": 29.6125, "step": 18117 }, { "epoch": 431.3820895522388, "grad_norm": 30.47445297241211, "learning_rate": 9.746212121212122e-06, "loss": 30.3827, "step": 18118 }, { "epoch": 431.40597014925373, "grad_norm": 19.661272048950195, "learning_rate": 9.745670995670995e-06, "loss": 30.1986, "step": 18119 }, { "epoch": 431.42985074626864, "grad_norm": 24.344816207885742, "learning_rate": 9.74512987012987e-06, "loss": 30.7661, "step": 18120 }, { "epoch": 431.4537313432836, "grad_norm": 28.426176071166992, "learning_rate": 9.744588744588746e-06, "loss": 30.1947, "step": 18121 }, { "epoch": 431.4776119402985, "grad_norm": 17.48659324645996, "learning_rate": 9.74404761904762e-06, "loss": 30.8871, "step": 18122 }, { "epoch": 431.5014925373134, "grad_norm": 29.077180862426758, "learning_rate": 9.743506493506493e-06, "loss": 29.2371, "step": 18123 }, { "epoch": 431.52537313432833, "grad_norm": 25.1240177154541, "learning_rate": 9.742965367965369e-06, "loss": 30.2949, "step": 18124 }, { "epoch": 431.5492537313433, "grad_norm": 20.506816864013672, "learning_rate": 9.742424242424244e-06, "loss": 30.0029, "step": 18125 }, { "epoch": 431.5731343283582, "grad_norm": 29.383607864379883, "learning_rate": 9.741883116883117e-06, "loss": 30.1708, "step": 18126 }, { "epoch": 431.5970149253731, "grad_norm": 21.535627365112305, "learning_rate": 9.741341991341993e-06, "loss": 29.6401, "step": 18127 }, { "epoch": 431.6208955223881, "grad_norm": 26.823631286621094, "learning_rate": 9.740800865800866e-06, "loss": 29.6246, "step": 18128 }, { "epoch": 431.644776119403, "grad_norm": 23.506425857543945, "learning_rate": 9.740259740259742e-06, "loss": 29.7555, "step": 18129 }, { "epoch": 431.6686567164179, "grad_norm": 26.477182388305664, "learning_rate": 9.739718614718615e-06, "loss": 29.9764, "step": 18130 }, { "epoch": 431.6925373134328, "grad_norm": 27.343353271484375, "learning_rate": 9.73917748917749e-06, "loss": 30.8947, "step": 18131 }, { "epoch": 431.7164179104478, "grad_norm": 20.136842727661133, "learning_rate": 9.738636363636364e-06, "loss": 29.4448, "step": 18132 }, { "epoch": 431.7402985074627, "grad_norm": 23.874711990356445, "learning_rate": 9.73809523809524e-06, "loss": 30.4971, "step": 18133 }, { "epoch": 431.7641791044776, "grad_norm": 22.99772071838379, "learning_rate": 9.737554112554113e-06, "loss": 28.7115, "step": 18134 }, { "epoch": 431.78805970149256, "grad_norm": 17.651897430419922, "learning_rate": 9.737012987012988e-06, "loss": 28.9517, "step": 18135 }, { "epoch": 431.81194029850747, "grad_norm": 23.423057556152344, "learning_rate": 9.736471861471862e-06, "loss": 30.3671, "step": 18136 }, { "epoch": 431.8358208955224, "grad_norm": 22.226015090942383, "learning_rate": 9.735930735930737e-06, "loss": 31.7865, "step": 18137 }, { "epoch": 431.85970149253734, "grad_norm": 19.68935203552246, "learning_rate": 9.735389610389612e-06, "loss": 29.694, "step": 18138 }, { "epoch": 431.88358208955225, "grad_norm": 18.63748550415039, "learning_rate": 9.734848484848486e-06, "loss": 29.9531, "step": 18139 }, { "epoch": 431.90746268656716, "grad_norm": 24.25529670715332, "learning_rate": 9.73430735930736e-06, "loss": 29.8732, "step": 18140 }, { "epoch": 431.93134328358207, "grad_norm": 19.358558654785156, "learning_rate": 9.733766233766235e-06, "loss": 30.0011, "step": 18141 }, { "epoch": 431.95522388059703, "grad_norm": 18.492074966430664, "learning_rate": 9.73322510822511e-06, "loss": 30.1113, "step": 18142 }, { "epoch": 431.97910447761194, "grad_norm": 23.94791030883789, "learning_rate": 9.732683982683984e-06, "loss": 30.768, "step": 18143 }, { "epoch": 432.0, "grad_norm": 18.57393455505371, "learning_rate": 9.732142857142858e-06, "loss": 26.8599, "step": 18144 }, { "epoch": 432.0238805970149, "grad_norm": 18.835695266723633, "learning_rate": 9.731601731601731e-06, "loss": 29.5854, "step": 18145 }, { "epoch": 432.0477611940299, "grad_norm": 26.365015029907227, "learning_rate": 9.731060606060606e-06, "loss": 30.2654, "step": 18146 }, { "epoch": 432.0716417910448, "grad_norm": 18.96073341369629, "learning_rate": 9.730519480519482e-06, "loss": 29.6751, "step": 18147 }, { "epoch": 432.0955223880597, "grad_norm": 19.330810546875, "learning_rate": 9.729978354978355e-06, "loss": 30.1745, "step": 18148 }, { "epoch": 432.1194029850746, "grad_norm": 20.16083526611328, "learning_rate": 9.729437229437229e-06, "loss": 29.659, "step": 18149 }, { "epoch": 432.14328358208957, "grad_norm": 21.80440330505371, "learning_rate": 9.728896103896104e-06, "loss": 29.8091, "step": 18150 }, { "epoch": 432.1671641791045, "grad_norm": 17.65022850036621, "learning_rate": 9.72835497835498e-06, "loss": 30.9192, "step": 18151 }, { "epoch": 432.1910447761194, "grad_norm": 17.394853591918945, "learning_rate": 9.727813852813853e-06, "loss": 29.6661, "step": 18152 }, { "epoch": 432.21492537313435, "grad_norm": 18.526281356811523, "learning_rate": 9.727272727272728e-06, "loss": 30.1869, "step": 18153 }, { "epoch": 432.23880597014926, "grad_norm": 16.71364974975586, "learning_rate": 9.726731601731602e-06, "loss": 29.687, "step": 18154 }, { "epoch": 432.26268656716417, "grad_norm": 23.47662925720215, "learning_rate": 9.726190476190477e-06, "loss": 29.7638, "step": 18155 }, { "epoch": 432.28656716417913, "grad_norm": 17.683979034423828, "learning_rate": 9.725649350649351e-06, "loss": 29.8452, "step": 18156 }, { "epoch": 432.31044776119404, "grad_norm": 21.617971420288086, "learning_rate": 9.725108225108226e-06, "loss": 30.203, "step": 18157 }, { "epoch": 432.33432835820895, "grad_norm": 19.951866149902344, "learning_rate": 9.7245670995671e-06, "loss": 30.7912, "step": 18158 }, { "epoch": 432.35820895522386, "grad_norm": 19.99698257446289, "learning_rate": 9.724025974025975e-06, "loss": 30.8737, "step": 18159 }, { "epoch": 432.3820895522388, "grad_norm": 24.1381893157959, "learning_rate": 9.723484848484849e-06, "loss": 30.2432, "step": 18160 }, { "epoch": 432.40597014925373, "grad_norm": 19.578128814697266, "learning_rate": 9.722943722943724e-06, "loss": 29.848, "step": 18161 }, { "epoch": 432.42985074626864, "grad_norm": 21.668193817138672, "learning_rate": 9.722402597402598e-06, "loss": 31.3999, "step": 18162 }, { "epoch": 432.4537313432836, "grad_norm": 20.279748916625977, "learning_rate": 9.721861471861473e-06, "loss": 31.1406, "step": 18163 }, { "epoch": 432.4776119402985, "grad_norm": 20.21373748779297, "learning_rate": 9.721320346320348e-06, "loss": 30.5899, "step": 18164 }, { "epoch": 432.5014925373134, "grad_norm": 18.572547912597656, "learning_rate": 9.720779220779222e-06, "loss": 29.4998, "step": 18165 }, { "epoch": 432.52537313432833, "grad_norm": 19.156526565551758, "learning_rate": 9.720238095238095e-06, "loss": 30.9866, "step": 18166 }, { "epoch": 432.5492537313433, "grad_norm": 20.265737533569336, "learning_rate": 9.71969696969697e-06, "loss": 30.1894, "step": 18167 }, { "epoch": 432.5731343283582, "grad_norm": 19.298349380493164, "learning_rate": 9.719155844155846e-06, "loss": 30.6742, "step": 18168 }, { "epoch": 432.5970149253731, "grad_norm": 20.337852478027344, "learning_rate": 9.71861471861472e-06, "loss": 30.027, "step": 18169 }, { "epoch": 432.6208955223881, "grad_norm": 18.822757720947266, "learning_rate": 9.718073593073593e-06, "loss": 29.6242, "step": 18170 }, { "epoch": 432.644776119403, "grad_norm": 17.615934371948242, "learning_rate": 9.717532467532468e-06, "loss": 29.7349, "step": 18171 }, { "epoch": 432.6686567164179, "grad_norm": 19.46221923828125, "learning_rate": 9.716991341991344e-06, "loss": 29.5274, "step": 18172 }, { "epoch": 432.6925373134328, "grad_norm": 23.778234481811523, "learning_rate": 9.716450216450217e-06, "loss": 30.3217, "step": 18173 }, { "epoch": 432.7164179104478, "grad_norm": 19.636152267456055, "learning_rate": 9.715909090909091e-06, "loss": 31.0492, "step": 18174 }, { "epoch": 432.7402985074627, "grad_norm": 17.6784725189209, "learning_rate": 9.715367965367966e-06, "loss": 29.4998, "step": 18175 }, { "epoch": 432.7641791044776, "grad_norm": 19.695819854736328, "learning_rate": 9.714826839826842e-06, "loss": 30.7269, "step": 18176 }, { "epoch": 432.78805970149256, "grad_norm": 19.45430564880371, "learning_rate": 9.714285714285715e-06, "loss": 30.1602, "step": 18177 }, { "epoch": 432.81194029850747, "grad_norm": 21.0908260345459, "learning_rate": 9.713744588744589e-06, "loss": 29.7193, "step": 18178 }, { "epoch": 432.8358208955224, "grad_norm": 19.038637161254883, "learning_rate": 9.713203463203464e-06, "loss": 30.0591, "step": 18179 }, { "epoch": 432.85970149253734, "grad_norm": 17.280475616455078, "learning_rate": 9.71266233766234e-06, "loss": 29.9562, "step": 18180 }, { "epoch": 432.88358208955225, "grad_norm": 18.024349212646484, "learning_rate": 9.712121212121213e-06, "loss": 30.2917, "step": 18181 }, { "epoch": 432.90746268656716, "grad_norm": 17.661876678466797, "learning_rate": 9.711580086580087e-06, "loss": 30.0967, "step": 18182 }, { "epoch": 432.93134328358207, "grad_norm": 18.027271270751953, "learning_rate": 9.711038961038962e-06, "loss": 29.4842, "step": 18183 }, { "epoch": 432.95522388059703, "grad_norm": 18.41741943359375, "learning_rate": 9.710497835497835e-06, "loss": 30.4195, "step": 18184 }, { "epoch": 432.97910447761194, "grad_norm": 16.7603816986084, "learning_rate": 9.70995670995671e-06, "loss": 30.8362, "step": 18185 }, { "epoch": 433.0, "grad_norm": 16.588382720947266, "learning_rate": 9.709415584415586e-06, "loss": 26.8038, "step": 18186 }, { "epoch": 433.0238805970149, "grad_norm": 15.491820335388184, "learning_rate": 9.70887445887446e-06, "loss": 29.728, "step": 18187 }, { "epoch": 433.0477611940299, "grad_norm": 18.844192504882812, "learning_rate": 9.708333333333333e-06, "loss": 29.4772, "step": 18188 }, { "epoch": 433.0716417910448, "grad_norm": 18.78489112854004, "learning_rate": 9.707792207792209e-06, "loss": 29.0074, "step": 18189 }, { "epoch": 433.0955223880597, "grad_norm": 19.88336181640625, "learning_rate": 9.707251082251084e-06, "loss": 29.2426, "step": 18190 }, { "epoch": 433.1194029850746, "grad_norm": 24.643024444580078, "learning_rate": 9.706709956709957e-06, "loss": 29.6507, "step": 18191 }, { "epoch": 433.14328358208957, "grad_norm": 16.000728607177734, "learning_rate": 9.706168831168831e-06, "loss": 30.8033, "step": 18192 }, { "epoch": 433.1671641791045, "grad_norm": 25.131465911865234, "learning_rate": 9.705627705627706e-06, "loss": 28.96, "step": 18193 }, { "epoch": 433.1910447761194, "grad_norm": 23.845787048339844, "learning_rate": 9.705086580086582e-06, "loss": 29.8061, "step": 18194 }, { "epoch": 433.21492537313435, "grad_norm": 17.705398559570312, "learning_rate": 9.704545454545455e-06, "loss": 29.806, "step": 18195 }, { "epoch": 433.23880597014926, "grad_norm": 18.651830673217773, "learning_rate": 9.704004329004329e-06, "loss": 30.2466, "step": 18196 }, { "epoch": 433.26268656716417, "grad_norm": 21.60057830810547, "learning_rate": 9.703463203463204e-06, "loss": 31.1439, "step": 18197 }, { "epoch": 433.28656716417913, "grad_norm": 19.81477928161621, "learning_rate": 9.70292207792208e-06, "loss": 31.1098, "step": 18198 }, { "epoch": 433.31044776119404, "grad_norm": 18.393125534057617, "learning_rate": 9.702380952380953e-06, "loss": 29.9146, "step": 18199 }, { "epoch": 433.33432835820895, "grad_norm": 17.704788208007812, "learning_rate": 9.701839826839827e-06, "loss": 29.6932, "step": 18200 }, { "epoch": 433.35820895522386, "grad_norm": 19.540067672729492, "learning_rate": 9.701298701298702e-06, "loss": 29.1834, "step": 18201 }, { "epoch": 433.3820895522388, "grad_norm": 24.0918025970459, "learning_rate": 9.700757575757577e-06, "loss": 30.6327, "step": 18202 }, { "epoch": 433.40597014925373, "grad_norm": 17.470829010009766, "learning_rate": 9.700216450216451e-06, "loss": 29.4752, "step": 18203 }, { "epoch": 433.42985074626864, "grad_norm": 22.728925704956055, "learning_rate": 9.699675324675324e-06, "loss": 30.8575, "step": 18204 }, { "epoch": 433.4537313432836, "grad_norm": 23.7744140625, "learning_rate": 9.6991341991342e-06, "loss": 29.8983, "step": 18205 }, { "epoch": 433.4776119402985, "grad_norm": 19.178041458129883, "learning_rate": 9.698593073593075e-06, "loss": 30.1342, "step": 18206 }, { "epoch": 433.5014925373134, "grad_norm": 20.79680061340332, "learning_rate": 9.698051948051949e-06, "loss": 30.2384, "step": 18207 }, { "epoch": 433.52537313432833, "grad_norm": 22.13588523864746, "learning_rate": 9.697510822510822e-06, "loss": 30.4178, "step": 18208 }, { "epoch": 433.5492537313433, "grad_norm": 22.770479202270508, "learning_rate": 9.696969696969698e-06, "loss": 30.1464, "step": 18209 }, { "epoch": 433.5731343283582, "grad_norm": 20.7957706451416, "learning_rate": 9.696428571428573e-06, "loss": 30.8754, "step": 18210 }, { "epoch": 433.5970149253731, "grad_norm": 18.539743423461914, "learning_rate": 9.695887445887446e-06, "loss": 30.1513, "step": 18211 }, { "epoch": 433.6208955223881, "grad_norm": 29.045120239257812, "learning_rate": 9.695346320346322e-06, "loss": 30.3363, "step": 18212 }, { "epoch": 433.644776119403, "grad_norm": 20.481473922729492, "learning_rate": 9.694805194805195e-06, "loss": 29.9034, "step": 18213 }, { "epoch": 433.6686567164179, "grad_norm": 21.91567611694336, "learning_rate": 9.69426406926407e-06, "loss": 30.7069, "step": 18214 }, { "epoch": 433.6925373134328, "grad_norm": 27.261484146118164, "learning_rate": 9.693722943722944e-06, "loss": 30.8677, "step": 18215 }, { "epoch": 433.7164179104478, "grad_norm": 18.287307739257812, "learning_rate": 9.69318181818182e-06, "loss": 29.6516, "step": 18216 }, { "epoch": 433.7402985074627, "grad_norm": 27.005582809448242, "learning_rate": 9.692640692640693e-06, "loss": 30.5841, "step": 18217 }, { "epoch": 433.7641791044776, "grad_norm": 23.46379852294922, "learning_rate": 9.692099567099568e-06, "loss": 29.9727, "step": 18218 }, { "epoch": 433.78805970149256, "grad_norm": 19.012388229370117, "learning_rate": 9.691558441558442e-06, "loss": 29.4702, "step": 18219 }, { "epoch": 433.81194029850747, "grad_norm": 21.882553100585938, "learning_rate": 9.691017316017317e-06, "loss": 31.2094, "step": 18220 }, { "epoch": 433.8358208955224, "grad_norm": 19.79618263244629, "learning_rate": 9.690476190476191e-06, "loss": 30.3362, "step": 18221 }, { "epoch": 433.85970149253734, "grad_norm": 23.287639617919922, "learning_rate": 9.689935064935066e-06, "loss": 31.2182, "step": 18222 }, { "epoch": 433.88358208955225, "grad_norm": 19.262718200683594, "learning_rate": 9.68939393939394e-06, "loss": 29.0019, "step": 18223 }, { "epoch": 433.90746268656716, "grad_norm": 18.80190658569336, "learning_rate": 9.688852813852815e-06, "loss": 31.3132, "step": 18224 }, { "epoch": 433.93134328358207, "grad_norm": 20.80198860168457, "learning_rate": 9.688311688311689e-06, "loss": 30.3108, "step": 18225 }, { "epoch": 433.95522388059703, "grad_norm": 22.15380859375, "learning_rate": 9.687770562770562e-06, "loss": 31.2515, "step": 18226 }, { "epoch": 433.97910447761194, "grad_norm": 25.34805679321289, "learning_rate": 9.687229437229438e-06, "loss": 29.3708, "step": 18227 }, { "epoch": 434.0, "grad_norm": 19.4454402923584, "learning_rate": 9.686688311688313e-06, "loss": 26.7535, "step": 18228 }, { "epoch": 434.0238805970149, "grad_norm": 16.465877532958984, "learning_rate": 9.686147186147187e-06, "loss": 30.0039, "step": 18229 }, { "epoch": 434.0477611940299, "grad_norm": 22.68767547607422, "learning_rate": 9.68560606060606e-06, "loss": 29.7874, "step": 18230 }, { "epoch": 434.0716417910448, "grad_norm": 16.684471130371094, "learning_rate": 9.685064935064935e-06, "loss": 29.8318, "step": 18231 }, { "epoch": 434.0955223880597, "grad_norm": 23.836278915405273, "learning_rate": 9.68452380952381e-06, "loss": 30.5512, "step": 18232 }, { "epoch": 434.1194029850746, "grad_norm": 19.90594482421875, "learning_rate": 9.683982683982684e-06, "loss": 30.1523, "step": 18233 }, { "epoch": 434.14328358208957, "grad_norm": 18.027976989746094, "learning_rate": 9.68344155844156e-06, "loss": 30.4587, "step": 18234 }, { "epoch": 434.1671641791045, "grad_norm": 20.156003952026367, "learning_rate": 9.682900432900433e-06, "loss": 30.6802, "step": 18235 }, { "epoch": 434.1910447761194, "grad_norm": 16.125776290893555, "learning_rate": 9.682359307359309e-06, "loss": 30.2559, "step": 18236 }, { "epoch": 434.21492537313435, "grad_norm": 22.57579231262207, "learning_rate": 9.681818181818182e-06, "loss": 28.9034, "step": 18237 }, { "epoch": 434.23880597014926, "grad_norm": 18.740280151367188, "learning_rate": 9.681277056277057e-06, "loss": 31.3436, "step": 18238 }, { "epoch": 434.26268656716417, "grad_norm": 15.311004638671875, "learning_rate": 9.680735930735931e-06, "loss": 29.4365, "step": 18239 }, { "epoch": 434.28656716417913, "grad_norm": 19.666488647460938, "learning_rate": 9.680194805194806e-06, "loss": 31.0185, "step": 18240 }, { "epoch": 434.31044776119404, "grad_norm": 17.850126266479492, "learning_rate": 9.67965367965368e-06, "loss": 30.8535, "step": 18241 }, { "epoch": 434.33432835820895, "grad_norm": 21.057401657104492, "learning_rate": 9.679112554112555e-06, "loss": 30.4327, "step": 18242 }, { "epoch": 434.35820895522386, "grad_norm": 23.181318283081055, "learning_rate": 9.678571428571429e-06, "loss": 30.5596, "step": 18243 }, { "epoch": 434.3820895522388, "grad_norm": 22.088865280151367, "learning_rate": 9.678030303030304e-06, "loss": 29.6078, "step": 18244 }, { "epoch": 434.40597014925373, "grad_norm": 21.577547073364258, "learning_rate": 9.67748917748918e-06, "loss": 29.4521, "step": 18245 }, { "epoch": 434.42985074626864, "grad_norm": 17.609233856201172, "learning_rate": 9.676948051948053e-06, "loss": 31.0021, "step": 18246 }, { "epoch": 434.4537313432836, "grad_norm": 21.627946853637695, "learning_rate": 9.676406926406927e-06, "loss": 29.8607, "step": 18247 }, { "epoch": 434.4776119402985, "grad_norm": 17.885469436645508, "learning_rate": 9.675865800865802e-06, "loss": 30.6589, "step": 18248 }, { "epoch": 434.5014925373134, "grad_norm": 20.681833267211914, "learning_rate": 9.675324675324677e-06, "loss": 29.7241, "step": 18249 }, { "epoch": 434.52537313432833, "grad_norm": 18.545494079589844, "learning_rate": 9.67478354978355e-06, "loss": 29.7875, "step": 18250 }, { "epoch": 434.5492537313433, "grad_norm": 19.230512619018555, "learning_rate": 9.674242424242424e-06, "loss": 30.675, "step": 18251 }, { "epoch": 434.5731343283582, "grad_norm": 20.685983657836914, "learning_rate": 9.6737012987013e-06, "loss": 30.303, "step": 18252 }, { "epoch": 434.5970149253731, "grad_norm": 21.225440979003906, "learning_rate": 9.673160173160175e-06, "loss": 29.8067, "step": 18253 }, { "epoch": 434.6208955223881, "grad_norm": 22.626157760620117, "learning_rate": 9.672619047619049e-06, "loss": 29.4272, "step": 18254 }, { "epoch": 434.644776119403, "grad_norm": 19.518709182739258, "learning_rate": 9.672077922077922e-06, "loss": 29.5052, "step": 18255 }, { "epoch": 434.6686567164179, "grad_norm": 20.40300750732422, "learning_rate": 9.671536796536798e-06, "loss": 30.9477, "step": 18256 }, { "epoch": 434.6925373134328, "grad_norm": 23.23400115966797, "learning_rate": 9.670995670995673e-06, "loss": 31.575, "step": 18257 }, { "epoch": 434.7164179104478, "grad_norm": 19.318143844604492, "learning_rate": 9.670454545454546e-06, "loss": 31.069, "step": 18258 }, { "epoch": 434.7402985074627, "grad_norm": 21.579816818237305, "learning_rate": 9.66991341991342e-06, "loss": 30.5433, "step": 18259 }, { "epoch": 434.7641791044776, "grad_norm": 20.505245208740234, "learning_rate": 9.669372294372295e-06, "loss": 29.7707, "step": 18260 }, { "epoch": 434.78805970149256, "grad_norm": 18.020780563354492, "learning_rate": 9.66883116883117e-06, "loss": 30.7001, "step": 18261 }, { "epoch": 434.81194029850747, "grad_norm": 19.13140869140625, "learning_rate": 9.668290043290044e-06, "loss": 29.109, "step": 18262 }, { "epoch": 434.8358208955224, "grad_norm": 20.555435180664062, "learning_rate": 9.667748917748918e-06, "loss": 29.5558, "step": 18263 }, { "epoch": 434.85970149253734, "grad_norm": 18.302505493164062, "learning_rate": 9.667207792207793e-06, "loss": 29.3802, "step": 18264 }, { "epoch": 434.88358208955225, "grad_norm": 17.136194229125977, "learning_rate": 9.666666666666667e-06, "loss": 30.112, "step": 18265 }, { "epoch": 434.90746268656716, "grad_norm": 20.43988609313965, "learning_rate": 9.666125541125542e-06, "loss": 29.7273, "step": 18266 }, { "epoch": 434.93134328358207, "grad_norm": 21.64653968811035, "learning_rate": 9.665584415584416e-06, "loss": 29.9953, "step": 18267 }, { "epoch": 434.95522388059703, "grad_norm": 19.094741821289062, "learning_rate": 9.665043290043291e-06, "loss": 28.9775, "step": 18268 }, { "epoch": 434.97910447761194, "grad_norm": 20.54570960998535, "learning_rate": 9.664502164502165e-06, "loss": 30.3792, "step": 18269 }, { "epoch": 435.0, "grad_norm": 14.584463119506836, "learning_rate": 9.66396103896104e-06, "loss": 24.9574, "step": 18270 }, { "epoch": 435.0238805970149, "grad_norm": 15.791644096374512, "learning_rate": 9.663419913419915e-06, "loss": 30.1044, "step": 18271 }, { "epoch": 435.0477611940299, "grad_norm": 18.99878692626953, "learning_rate": 9.662878787878789e-06, "loss": 30.5295, "step": 18272 }, { "epoch": 435.0716417910448, "grad_norm": 17.93902587890625, "learning_rate": 9.662337662337662e-06, "loss": 31.205, "step": 18273 }, { "epoch": 435.0955223880597, "grad_norm": 20.62776756286621, "learning_rate": 9.661796536796538e-06, "loss": 30.331, "step": 18274 }, { "epoch": 435.1194029850746, "grad_norm": 24.08873176574707, "learning_rate": 9.661255411255413e-06, "loss": 30.398, "step": 18275 }, { "epoch": 435.14328358208957, "grad_norm": 19.241039276123047, "learning_rate": 9.660714285714287e-06, "loss": 29.3642, "step": 18276 }, { "epoch": 435.1671641791045, "grad_norm": 20.880123138427734, "learning_rate": 9.66017316017316e-06, "loss": 29.915, "step": 18277 }, { "epoch": 435.1910447761194, "grad_norm": 22.662073135375977, "learning_rate": 9.659632034632035e-06, "loss": 31.5605, "step": 18278 }, { "epoch": 435.21492537313435, "grad_norm": 18.814071655273438, "learning_rate": 9.65909090909091e-06, "loss": 29.1728, "step": 18279 }, { "epoch": 435.23880597014926, "grad_norm": 15.919159889221191, "learning_rate": 9.658549783549784e-06, "loss": 29.6376, "step": 18280 }, { "epoch": 435.26268656716417, "grad_norm": 17.640968322753906, "learning_rate": 9.658008658008658e-06, "loss": 30.3631, "step": 18281 }, { "epoch": 435.28656716417913, "grad_norm": 25.971193313598633, "learning_rate": 9.657467532467533e-06, "loss": 29.6467, "step": 18282 }, { "epoch": 435.31044776119404, "grad_norm": 20.070676803588867, "learning_rate": 9.656926406926409e-06, "loss": 30.4805, "step": 18283 }, { "epoch": 435.33432835820895, "grad_norm": 17.20748519897461, "learning_rate": 9.656385281385282e-06, "loss": 30.0635, "step": 18284 }, { "epoch": 435.35820895522386, "grad_norm": 20.692230224609375, "learning_rate": 9.655844155844156e-06, "loss": 30.4306, "step": 18285 }, { "epoch": 435.3820895522388, "grad_norm": 14.956451416015625, "learning_rate": 9.655303030303031e-06, "loss": 29.3072, "step": 18286 }, { "epoch": 435.40597014925373, "grad_norm": 18.895109176635742, "learning_rate": 9.654761904761906e-06, "loss": 29.1034, "step": 18287 }, { "epoch": 435.42985074626864, "grad_norm": 22.296415328979492, "learning_rate": 9.65422077922078e-06, "loss": 30.1436, "step": 18288 }, { "epoch": 435.4537313432836, "grad_norm": 21.924699783325195, "learning_rate": 9.653679653679654e-06, "loss": 30.0738, "step": 18289 }, { "epoch": 435.4776119402985, "grad_norm": 27.33721160888672, "learning_rate": 9.653138528138529e-06, "loss": 29.8749, "step": 18290 }, { "epoch": 435.5014925373134, "grad_norm": 18.547534942626953, "learning_rate": 9.652597402597404e-06, "loss": 28.7964, "step": 18291 }, { "epoch": 435.52537313432833, "grad_norm": 17.842880249023438, "learning_rate": 9.652056277056278e-06, "loss": 30.13, "step": 18292 }, { "epoch": 435.5492537313433, "grad_norm": 16.652873992919922, "learning_rate": 9.651515151515153e-06, "loss": 29.7252, "step": 18293 }, { "epoch": 435.5731343283582, "grad_norm": 17.452665328979492, "learning_rate": 9.650974025974027e-06, "loss": 29.0173, "step": 18294 }, { "epoch": 435.5970149253731, "grad_norm": 16.44731330871582, "learning_rate": 9.650432900432902e-06, "loss": 29.189, "step": 18295 }, { "epoch": 435.6208955223881, "grad_norm": 16.534452438354492, "learning_rate": 9.649891774891776e-06, "loss": 31.1884, "step": 18296 }, { "epoch": 435.644776119403, "grad_norm": 17.389270782470703, "learning_rate": 9.64935064935065e-06, "loss": 30.8596, "step": 18297 }, { "epoch": 435.6686567164179, "grad_norm": 23.34886360168457, "learning_rate": 9.648809523809524e-06, "loss": 30.7862, "step": 18298 }, { "epoch": 435.6925373134328, "grad_norm": 23.544044494628906, "learning_rate": 9.6482683982684e-06, "loss": 29.7926, "step": 18299 }, { "epoch": 435.7164179104478, "grad_norm": 17.028202056884766, "learning_rate": 9.647727272727273e-06, "loss": 30.1643, "step": 18300 }, { "epoch": 435.7402985074627, "grad_norm": 19.95630645751953, "learning_rate": 9.647186147186149e-06, "loss": 29.3429, "step": 18301 }, { "epoch": 435.7641791044776, "grad_norm": 25.058000564575195, "learning_rate": 9.646645021645022e-06, "loss": 30.7787, "step": 18302 }, { "epoch": 435.78805970149256, "grad_norm": 22.101835250854492, "learning_rate": 9.646103896103896e-06, "loss": 30.2931, "step": 18303 }, { "epoch": 435.81194029850747, "grad_norm": 22.743885040283203, "learning_rate": 9.645562770562771e-06, "loss": 30.3098, "step": 18304 }, { "epoch": 435.8358208955224, "grad_norm": 27.39186668395996, "learning_rate": 9.645021645021646e-06, "loss": 29.4458, "step": 18305 }, { "epoch": 435.85970149253734, "grad_norm": 20.444841384887695, "learning_rate": 9.64448051948052e-06, "loss": 29.3536, "step": 18306 }, { "epoch": 435.88358208955225, "grad_norm": 17.648893356323242, "learning_rate": 9.643939393939394e-06, "loss": 30.3493, "step": 18307 }, { "epoch": 435.90746268656716, "grad_norm": 20.359098434448242, "learning_rate": 9.643398268398269e-06, "loss": 30.1989, "step": 18308 }, { "epoch": 435.93134328358207, "grad_norm": 20.681459426879883, "learning_rate": 9.642857142857144e-06, "loss": 30.9317, "step": 18309 }, { "epoch": 435.95522388059703, "grad_norm": 20.243064880371094, "learning_rate": 9.642316017316018e-06, "loss": 30.7878, "step": 18310 }, { "epoch": 435.97910447761194, "grad_norm": 16.853389739990234, "learning_rate": 9.641774891774891e-06, "loss": 30.89, "step": 18311 }, { "epoch": 436.0, "grad_norm": 18.18767547607422, "learning_rate": 9.641233766233767e-06, "loss": 25.6831, "step": 18312 }, { "epoch": 436.0238805970149, "grad_norm": 20.59931182861328, "learning_rate": 9.640692640692642e-06, "loss": 30.2498, "step": 18313 }, { "epoch": 436.0477611940299, "grad_norm": 22.943437576293945, "learning_rate": 9.640151515151516e-06, "loss": 29.6802, "step": 18314 }, { "epoch": 436.0716417910448, "grad_norm": 18.298830032348633, "learning_rate": 9.63961038961039e-06, "loss": 29.9832, "step": 18315 }, { "epoch": 436.0955223880597, "grad_norm": 20.217575073242188, "learning_rate": 9.639069264069264e-06, "loss": 30.7649, "step": 18316 }, { "epoch": 436.1194029850746, "grad_norm": 20.93951416015625, "learning_rate": 9.63852813852814e-06, "loss": 30.559, "step": 18317 }, { "epoch": 436.14328358208957, "grad_norm": 23.427631378173828, "learning_rate": 9.637987012987013e-06, "loss": 28.9487, "step": 18318 }, { "epoch": 436.1671641791045, "grad_norm": 20.74120330810547, "learning_rate": 9.637445887445889e-06, "loss": 28.8305, "step": 18319 }, { "epoch": 436.1910447761194, "grad_norm": 20.74155616760254, "learning_rate": 9.636904761904762e-06, "loss": 30.583, "step": 18320 }, { "epoch": 436.21492537313435, "grad_norm": 29.1689453125, "learning_rate": 9.636363636363638e-06, "loss": 29.8797, "step": 18321 }, { "epoch": 436.23880597014926, "grad_norm": 22.114608764648438, "learning_rate": 9.635822510822511e-06, "loss": 29.4548, "step": 18322 }, { "epoch": 436.26268656716417, "grad_norm": 18.739072799682617, "learning_rate": 9.635281385281386e-06, "loss": 30.5758, "step": 18323 }, { "epoch": 436.28656716417913, "grad_norm": 21.38836669921875, "learning_rate": 9.63474025974026e-06, "loss": 29.4342, "step": 18324 }, { "epoch": 436.31044776119404, "grad_norm": 19.67865753173828, "learning_rate": 9.634199134199135e-06, "loss": 30.2517, "step": 18325 }, { "epoch": 436.33432835820895, "grad_norm": 19.225650787353516, "learning_rate": 9.633658008658009e-06, "loss": 30.7116, "step": 18326 }, { "epoch": 436.35820895522386, "grad_norm": 22.08695411682129, "learning_rate": 9.633116883116884e-06, "loss": 30.3946, "step": 18327 }, { "epoch": 436.3820895522388, "grad_norm": 26.342357635498047, "learning_rate": 9.632575757575758e-06, "loss": 30.4322, "step": 18328 }, { "epoch": 436.40597014925373, "grad_norm": 18.675661087036133, "learning_rate": 9.632034632034633e-06, "loss": 30.3804, "step": 18329 }, { "epoch": 436.42985074626864, "grad_norm": 22.840425491333008, "learning_rate": 9.631493506493508e-06, "loss": 29.9916, "step": 18330 }, { "epoch": 436.4537313432836, "grad_norm": 18.92214584350586, "learning_rate": 9.630952380952382e-06, "loss": 30.1596, "step": 18331 }, { "epoch": 436.4776119402985, "grad_norm": 18.427722930908203, "learning_rate": 9.630411255411256e-06, "loss": 30.9355, "step": 18332 }, { "epoch": 436.5014925373134, "grad_norm": 23.86881446838379, "learning_rate": 9.629870129870131e-06, "loss": 30.8778, "step": 18333 }, { "epoch": 436.52537313432833, "grad_norm": 22.45794105529785, "learning_rate": 9.629329004329006e-06, "loss": 30.3567, "step": 18334 }, { "epoch": 436.5492537313433, "grad_norm": 21.06729507446289, "learning_rate": 9.62878787878788e-06, "loss": 29.3477, "step": 18335 }, { "epoch": 436.5731343283582, "grad_norm": 18.070526123046875, "learning_rate": 9.628246753246753e-06, "loss": 30.2371, "step": 18336 }, { "epoch": 436.5970149253731, "grad_norm": 21.866718292236328, "learning_rate": 9.627705627705629e-06, "loss": 30.5973, "step": 18337 }, { "epoch": 436.6208955223881, "grad_norm": 20.23525619506836, "learning_rate": 9.627164502164504e-06, "loss": 29.9108, "step": 18338 }, { "epoch": 436.644776119403, "grad_norm": 20.61037826538086, "learning_rate": 9.626623376623378e-06, "loss": 29.1173, "step": 18339 }, { "epoch": 436.6686567164179, "grad_norm": 16.306133270263672, "learning_rate": 9.626082251082251e-06, "loss": 30.1601, "step": 18340 }, { "epoch": 436.6925373134328, "grad_norm": 19.701372146606445, "learning_rate": 9.625541125541127e-06, "loss": 29.3894, "step": 18341 }, { "epoch": 436.7164179104478, "grad_norm": 17.36764144897461, "learning_rate": 9.625e-06, "loss": 28.3988, "step": 18342 }, { "epoch": 436.7402985074627, "grad_norm": 23.294689178466797, "learning_rate": 9.624458874458875e-06, "loss": 30.0732, "step": 18343 }, { "epoch": 436.7641791044776, "grad_norm": 18.175935745239258, "learning_rate": 9.623917748917749e-06, "loss": 30.4979, "step": 18344 }, { "epoch": 436.78805970149256, "grad_norm": 18.192787170410156, "learning_rate": 9.623376623376624e-06, "loss": 30.0431, "step": 18345 }, { "epoch": 436.81194029850747, "grad_norm": 21.695138931274414, "learning_rate": 9.622835497835498e-06, "loss": 29.922, "step": 18346 }, { "epoch": 436.8358208955224, "grad_norm": 19.197233200073242, "learning_rate": 9.622294372294373e-06, "loss": 30.2292, "step": 18347 }, { "epoch": 436.85970149253734, "grad_norm": 19.522125244140625, "learning_rate": 9.621753246753247e-06, "loss": 29.7332, "step": 18348 }, { "epoch": 436.88358208955225, "grad_norm": 17.038297653198242, "learning_rate": 9.621212121212122e-06, "loss": 29.5148, "step": 18349 }, { "epoch": 436.90746268656716, "grad_norm": 21.62288475036621, "learning_rate": 9.620670995670996e-06, "loss": 29.949, "step": 18350 }, { "epoch": 436.93134328358207, "grad_norm": 19.052358627319336, "learning_rate": 9.620129870129871e-06, "loss": 29.7353, "step": 18351 }, { "epoch": 436.95522388059703, "grad_norm": 24.851343154907227, "learning_rate": 9.619588744588746e-06, "loss": 30.6338, "step": 18352 }, { "epoch": 436.97910447761194, "grad_norm": 19.704662322998047, "learning_rate": 9.61904761904762e-06, "loss": 31.0321, "step": 18353 }, { "epoch": 437.0, "grad_norm": 19.7940673828125, "learning_rate": 9.618506493506494e-06, "loss": 26.4347, "step": 18354 }, { "epoch": 437.0238805970149, "grad_norm": 21.809568405151367, "learning_rate": 9.617965367965369e-06, "loss": 30.4459, "step": 18355 }, { "epoch": 437.0477611940299, "grad_norm": 21.622299194335938, "learning_rate": 9.617424242424244e-06, "loss": 30.5263, "step": 18356 }, { "epoch": 437.0716417910448, "grad_norm": 19.945377349853516, "learning_rate": 9.616883116883118e-06, "loss": 30.7836, "step": 18357 }, { "epoch": 437.0955223880597, "grad_norm": 19.13798713684082, "learning_rate": 9.616341991341991e-06, "loss": 29.5093, "step": 18358 }, { "epoch": 437.1194029850746, "grad_norm": 19.33329200744629, "learning_rate": 9.615800865800867e-06, "loss": 31.2368, "step": 18359 }, { "epoch": 437.14328358208957, "grad_norm": 23.51718521118164, "learning_rate": 9.615259740259742e-06, "loss": 30.6811, "step": 18360 }, { "epoch": 437.1671641791045, "grad_norm": 22.49342918395996, "learning_rate": 9.614718614718616e-06, "loss": 30.6678, "step": 18361 }, { "epoch": 437.1910447761194, "grad_norm": 18.669591903686523, "learning_rate": 9.61417748917749e-06, "loss": 30.1849, "step": 18362 }, { "epoch": 437.21492537313435, "grad_norm": 19.08563995361328, "learning_rate": 9.613636363636364e-06, "loss": 30.6485, "step": 18363 }, { "epoch": 437.23880597014926, "grad_norm": 16.72998809814453, "learning_rate": 9.61309523809524e-06, "loss": 30.2999, "step": 18364 }, { "epoch": 437.26268656716417, "grad_norm": 15.711103439331055, "learning_rate": 9.612554112554113e-06, "loss": 31.2417, "step": 18365 }, { "epoch": 437.28656716417913, "grad_norm": 18.274826049804688, "learning_rate": 9.612012987012987e-06, "loss": 29.0119, "step": 18366 }, { "epoch": 437.31044776119404, "grad_norm": 16.25044059753418, "learning_rate": 9.611471861471862e-06, "loss": 29.3701, "step": 18367 }, { "epoch": 437.33432835820895, "grad_norm": 19.717958450317383, "learning_rate": 9.610930735930738e-06, "loss": 29.8097, "step": 18368 }, { "epoch": 437.35820895522386, "grad_norm": 24.66046714782715, "learning_rate": 9.610389610389611e-06, "loss": 29.3774, "step": 18369 }, { "epoch": 437.3820895522388, "grad_norm": 22.514968872070312, "learning_rate": 9.609848484848485e-06, "loss": 30.8877, "step": 18370 }, { "epoch": 437.40597014925373, "grad_norm": 16.56135368347168, "learning_rate": 9.60930735930736e-06, "loss": 29.161, "step": 18371 }, { "epoch": 437.42985074626864, "grad_norm": 22.930313110351562, "learning_rate": 9.608766233766235e-06, "loss": 30.1345, "step": 18372 }, { "epoch": 437.4537313432836, "grad_norm": 21.518224716186523, "learning_rate": 9.608225108225109e-06, "loss": 29.7609, "step": 18373 }, { "epoch": 437.4776119402985, "grad_norm": 18.309608459472656, "learning_rate": 9.607683982683983e-06, "loss": 30.1276, "step": 18374 }, { "epoch": 437.5014925373134, "grad_norm": 16.86956787109375, "learning_rate": 9.607142857142858e-06, "loss": 29.9542, "step": 18375 }, { "epoch": 437.52537313432833, "grad_norm": 16.955245971679688, "learning_rate": 9.606601731601733e-06, "loss": 28.8259, "step": 18376 }, { "epoch": 437.5492537313433, "grad_norm": 15.334726333618164, "learning_rate": 9.606060606060607e-06, "loss": 29.7862, "step": 18377 }, { "epoch": 437.5731343283582, "grad_norm": 16.968448638916016, "learning_rate": 9.605519480519482e-06, "loss": 28.543, "step": 18378 }, { "epoch": 437.5970149253731, "grad_norm": 18.797088623046875, "learning_rate": 9.604978354978356e-06, "loss": 29.9734, "step": 18379 }, { "epoch": 437.6208955223881, "grad_norm": 20.425338745117188, "learning_rate": 9.604437229437231e-06, "loss": 30.6261, "step": 18380 }, { "epoch": 437.644776119403, "grad_norm": 23.71156120300293, "learning_rate": 9.603896103896105e-06, "loss": 29.6452, "step": 18381 }, { "epoch": 437.6686567164179, "grad_norm": 19.44552230834961, "learning_rate": 9.60335497835498e-06, "loss": 29.8348, "step": 18382 }, { "epoch": 437.6925373134328, "grad_norm": 19.09593963623047, "learning_rate": 9.602813852813853e-06, "loss": 30.0302, "step": 18383 }, { "epoch": 437.7164179104478, "grad_norm": 16.287437438964844, "learning_rate": 9.602272727272727e-06, "loss": 30.1343, "step": 18384 }, { "epoch": 437.7402985074627, "grad_norm": 22.482288360595703, "learning_rate": 9.601731601731602e-06, "loss": 30.1525, "step": 18385 }, { "epoch": 437.7641791044776, "grad_norm": 21.083412170410156, "learning_rate": 9.601190476190478e-06, "loss": 30.0119, "step": 18386 }, { "epoch": 437.78805970149256, "grad_norm": 16.497600555419922, "learning_rate": 9.600649350649351e-06, "loss": 29.4542, "step": 18387 }, { "epoch": 437.81194029850747, "grad_norm": 19.62682342529297, "learning_rate": 9.600108225108225e-06, "loss": 28.9712, "step": 18388 }, { "epoch": 437.8358208955224, "grad_norm": 24.665424346923828, "learning_rate": 9.5995670995671e-06, "loss": 29.6004, "step": 18389 }, { "epoch": 437.85970149253734, "grad_norm": 24.325775146484375, "learning_rate": 9.599025974025975e-06, "loss": 29.7811, "step": 18390 }, { "epoch": 437.88358208955225, "grad_norm": 14.164239883422852, "learning_rate": 9.598484848484849e-06, "loss": 29.9186, "step": 18391 }, { "epoch": 437.90746268656716, "grad_norm": 20.377151489257812, "learning_rate": 9.597943722943723e-06, "loss": 30.8108, "step": 18392 }, { "epoch": 437.93134328358207, "grad_norm": 19.745681762695312, "learning_rate": 9.597402597402598e-06, "loss": 30.8965, "step": 18393 }, { "epoch": 437.95522388059703, "grad_norm": 23.006057739257812, "learning_rate": 9.596861471861473e-06, "loss": 30.3527, "step": 18394 }, { "epoch": 437.97910447761194, "grad_norm": 21.118085861206055, "learning_rate": 9.596320346320347e-06, "loss": 29.401, "step": 18395 }, { "epoch": 438.0, "grad_norm": 16.79197120666504, "learning_rate": 9.59577922077922e-06, "loss": 26.6836, "step": 18396 }, { "epoch": 438.0238805970149, "grad_norm": 24.08824348449707, "learning_rate": 9.595238095238096e-06, "loss": 29.4007, "step": 18397 }, { "epoch": 438.0477611940299, "grad_norm": 21.36703109741211, "learning_rate": 9.594696969696971e-06, "loss": 29.648, "step": 18398 }, { "epoch": 438.0716417910448, "grad_norm": 23.28516387939453, "learning_rate": 9.594155844155845e-06, "loss": 30.2528, "step": 18399 }, { "epoch": 438.0955223880597, "grad_norm": 19.036104202270508, "learning_rate": 9.59361471861472e-06, "loss": 30.0436, "step": 18400 }, { "epoch": 438.1194029850746, "grad_norm": 21.310630798339844, "learning_rate": 9.593073593073594e-06, "loss": 28.7011, "step": 18401 }, { "epoch": 438.14328358208957, "grad_norm": 17.626867294311523, "learning_rate": 9.592532467532469e-06, "loss": 29.9864, "step": 18402 }, { "epoch": 438.1671641791045, "grad_norm": 25.529451370239258, "learning_rate": 9.591991341991342e-06, "loss": 29.9949, "step": 18403 }, { "epoch": 438.1910447761194, "grad_norm": 20.48516845703125, "learning_rate": 9.591450216450218e-06, "loss": 29.5868, "step": 18404 }, { "epoch": 438.21492537313435, "grad_norm": 21.072383880615234, "learning_rate": 9.590909090909091e-06, "loss": 29.6007, "step": 18405 }, { "epoch": 438.23880597014926, "grad_norm": 18.2504825592041, "learning_rate": 9.590367965367967e-06, "loss": 30.4394, "step": 18406 }, { "epoch": 438.26268656716417, "grad_norm": 21.870798110961914, "learning_rate": 9.58982683982684e-06, "loss": 29.6911, "step": 18407 }, { "epoch": 438.28656716417913, "grad_norm": 21.054988861083984, "learning_rate": 9.589285714285716e-06, "loss": 31.2226, "step": 18408 }, { "epoch": 438.31044776119404, "grad_norm": 23.002134323120117, "learning_rate": 9.588744588744589e-06, "loss": 30.6077, "step": 18409 }, { "epoch": 438.33432835820895, "grad_norm": 20.597837448120117, "learning_rate": 9.588203463203464e-06, "loss": 30.642, "step": 18410 }, { "epoch": 438.35820895522386, "grad_norm": 20.26851463317871, "learning_rate": 9.587662337662338e-06, "loss": 31.7231, "step": 18411 }, { "epoch": 438.3820895522388, "grad_norm": 22.513456344604492, "learning_rate": 9.587121212121213e-06, "loss": 30.6977, "step": 18412 }, { "epoch": 438.40597014925373, "grad_norm": 24.793535232543945, "learning_rate": 9.586580086580087e-06, "loss": 31.4542, "step": 18413 }, { "epoch": 438.42985074626864, "grad_norm": 20.16357421875, "learning_rate": 9.586038961038962e-06, "loss": 30.6742, "step": 18414 }, { "epoch": 438.4537313432836, "grad_norm": 24.922685623168945, "learning_rate": 9.585497835497838e-06, "loss": 29.9368, "step": 18415 }, { "epoch": 438.4776119402985, "grad_norm": 26.20820426940918, "learning_rate": 9.584956709956711e-06, "loss": 30.7537, "step": 18416 }, { "epoch": 438.5014925373134, "grad_norm": 19.87091827392578, "learning_rate": 9.584415584415585e-06, "loss": 29.8322, "step": 18417 }, { "epoch": 438.52537313432833, "grad_norm": 21.4395809173584, "learning_rate": 9.58387445887446e-06, "loss": 29.5324, "step": 18418 }, { "epoch": 438.5492537313433, "grad_norm": 19.096750259399414, "learning_rate": 9.583333333333335e-06, "loss": 29.9812, "step": 18419 }, { "epoch": 438.5731343283582, "grad_norm": 18.143898010253906, "learning_rate": 9.582792207792209e-06, "loss": 29.1294, "step": 18420 }, { "epoch": 438.5970149253731, "grad_norm": 19.896244049072266, "learning_rate": 9.582251082251083e-06, "loss": 30.8624, "step": 18421 }, { "epoch": 438.6208955223881, "grad_norm": 16.79835319519043, "learning_rate": 9.581709956709956e-06, "loss": 30.0049, "step": 18422 }, { "epoch": 438.644776119403, "grad_norm": 24.06325912475586, "learning_rate": 9.581168831168831e-06, "loss": 29.8186, "step": 18423 }, { "epoch": 438.6686567164179, "grad_norm": 19.912395477294922, "learning_rate": 9.580627705627707e-06, "loss": 30.1886, "step": 18424 }, { "epoch": 438.6925373134328, "grad_norm": 23.333765029907227, "learning_rate": 9.58008658008658e-06, "loss": 28.5251, "step": 18425 }, { "epoch": 438.7164179104478, "grad_norm": 18.128990173339844, "learning_rate": 9.579545454545456e-06, "loss": 29.2279, "step": 18426 }, { "epoch": 438.7402985074627, "grad_norm": 23.521347045898438, "learning_rate": 9.57900432900433e-06, "loss": 29.4037, "step": 18427 }, { "epoch": 438.7641791044776, "grad_norm": 21.70577621459961, "learning_rate": 9.578463203463205e-06, "loss": 29.4827, "step": 18428 }, { "epoch": 438.78805970149256, "grad_norm": 19.254405975341797, "learning_rate": 9.577922077922078e-06, "loss": 29.448, "step": 18429 }, { "epoch": 438.81194029850747, "grad_norm": 18.917478561401367, "learning_rate": 9.577380952380953e-06, "loss": 28.6075, "step": 18430 }, { "epoch": 438.8358208955224, "grad_norm": 21.376449584960938, "learning_rate": 9.576839826839827e-06, "loss": 29.4114, "step": 18431 }, { "epoch": 438.85970149253734, "grad_norm": 21.97930908203125, "learning_rate": 9.576298701298702e-06, "loss": 30.9656, "step": 18432 }, { "epoch": 438.88358208955225, "grad_norm": 16.989635467529297, "learning_rate": 9.575757575757576e-06, "loss": 30.2065, "step": 18433 }, { "epoch": 438.90746268656716, "grad_norm": 19.979799270629883, "learning_rate": 9.575216450216451e-06, "loss": 29.4274, "step": 18434 }, { "epoch": 438.93134328358207, "grad_norm": 20.198780059814453, "learning_rate": 9.574675324675325e-06, "loss": 29.6797, "step": 18435 }, { "epoch": 438.95522388059703, "grad_norm": 23.670854568481445, "learning_rate": 9.5741341991342e-06, "loss": 29.9383, "step": 18436 }, { "epoch": 438.97910447761194, "grad_norm": 20.571134567260742, "learning_rate": 9.573593073593075e-06, "loss": 30.4454, "step": 18437 }, { "epoch": 439.0, "grad_norm": 19.257028579711914, "learning_rate": 9.573051948051949e-06, "loss": 26.2818, "step": 18438 }, { "epoch": 439.0238805970149, "grad_norm": 17.581405639648438, "learning_rate": 9.572510822510823e-06, "loss": 31.1575, "step": 18439 }, { "epoch": 439.0477611940299, "grad_norm": 19.168819427490234, "learning_rate": 9.571969696969698e-06, "loss": 30.432, "step": 18440 }, { "epoch": 439.0716417910448, "grad_norm": 25.43934440612793, "learning_rate": 9.571428571428573e-06, "loss": 29.1514, "step": 18441 }, { "epoch": 439.0955223880597, "grad_norm": 22.299837112426758, "learning_rate": 9.570887445887447e-06, "loss": 28.0818, "step": 18442 }, { "epoch": 439.1194029850746, "grad_norm": 19.19082260131836, "learning_rate": 9.57034632034632e-06, "loss": 30.8908, "step": 18443 }, { "epoch": 439.14328358208957, "grad_norm": 23.17669105529785, "learning_rate": 9.569805194805196e-06, "loss": 29.4613, "step": 18444 }, { "epoch": 439.1671641791045, "grad_norm": 23.474390029907227, "learning_rate": 9.569264069264071e-06, "loss": 30.4825, "step": 18445 }, { "epoch": 439.1910447761194, "grad_norm": 17.151443481445312, "learning_rate": 9.568722943722945e-06, "loss": 30.5358, "step": 18446 }, { "epoch": 439.21492537313435, "grad_norm": 21.566646575927734, "learning_rate": 9.568181818181818e-06, "loss": 30.2673, "step": 18447 }, { "epoch": 439.23880597014926, "grad_norm": 24.813852310180664, "learning_rate": 9.567640692640694e-06, "loss": 31.1043, "step": 18448 }, { "epoch": 439.26268656716417, "grad_norm": 20.139890670776367, "learning_rate": 9.567099567099569e-06, "loss": 29.8795, "step": 18449 }, { "epoch": 439.28656716417913, "grad_norm": 17.006986618041992, "learning_rate": 9.566558441558442e-06, "loss": 29.6531, "step": 18450 }, { "epoch": 439.31044776119404, "grad_norm": 28.538646697998047, "learning_rate": 9.566017316017316e-06, "loss": 29.801, "step": 18451 }, { "epoch": 439.33432835820895, "grad_norm": 21.307741165161133, "learning_rate": 9.565476190476191e-06, "loss": 29.3162, "step": 18452 }, { "epoch": 439.35820895522386, "grad_norm": 17.67075538635254, "learning_rate": 9.564935064935067e-06, "loss": 28.564, "step": 18453 }, { "epoch": 439.3820895522388, "grad_norm": 23.861326217651367, "learning_rate": 9.56439393939394e-06, "loss": 29.5506, "step": 18454 }, { "epoch": 439.40597014925373, "grad_norm": 20.954423904418945, "learning_rate": 9.563852813852814e-06, "loss": 30.6395, "step": 18455 }, { "epoch": 439.42985074626864, "grad_norm": 20.9737491607666, "learning_rate": 9.563311688311689e-06, "loss": 30.9972, "step": 18456 }, { "epoch": 439.4537313432836, "grad_norm": 20.269493103027344, "learning_rate": 9.562770562770564e-06, "loss": 29.935, "step": 18457 }, { "epoch": 439.4776119402985, "grad_norm": 29.25650405883789, "learning_rate": 9.562229437229438e-06, "loss": 31.0753, "step": 18458 }, { "epoch": 439.5014925373134, "grad_norm": 22.108583450317383, "learning_rate": 9.561688311688313e-06, "loss": 30.9956, "step": 18459 }, { "epoch": 439.52537313432833, "grad_norm": 17.152618408203125, "learning_rate": 9.561147186147187e-06, "loss": 28.8859, "step": 18460 }, { "epoch": 439.5492537313433, "grad_norm": 29.345670700073242, "learning_rate": 9.56060606060606e-06, "loss": 29.3872, "step": 18461 }, { "epoch": 439.5731343283582, "grad_norm": 21.24360466003418, "learning_rate": 9.560064935064936e-06, "loss": 29.3939, "step": 18462 }, { "epoch": 439.5970149253731, "grad_norm": 20.873172760009766, "learning_rate": 9.559523809523811e-06, "loss": 30.2505, "step": 18463 }, { "epoch": 439.6208955223881, "grad_norm": 18.2236270904541, "learning_rate": 9.558982683982685e-06, "loss": 30.2748, "step": 18464 }, { "epoch": 439.644776119403, "grad_norm": 23.175804138183594, "learning_rate": 9.558441558441558e-06, "loss": 28.8336, "step": 18465 }, { "epoch": 439.6686567164179, "grad_norm": 21.064016342163086, "learning_rate": 9.557900432900434e-06, "loss": 29.8691, "step": 18466 }, { "epoch": 439.6925373134328, "grad_norm": 17.81121253967285, "learning_rate": 9.557359307359309e-06, "loss": 30.5635, "step": 18467 }, { "epoch": 439.7164179104478, "grad_norm": 18.693376541137695, "learning_rate": 9.556818181818182e-06, "loss": 28.9394, "step": 18468 }, { "epoch": 439.7402985074627, "grad_norm": 20.317508697509766, "learning_rate": 9.556277056277056e-06, "loss": 29.9811, "step": 18469 }, { "epoch": 439.7641791044776, "grad_norm": 24.36268424987793, "learning_rate": 9.555735930735931e-06, "loss": 29.8501, "step": 18470 }, { "epoch": 439.78805970149256, "grad_norm": 18.132827758789062, "learning_rate": 9.555194805194807e-06, "loss": 30.5219, "step": 18471 }, { "epoch": 439.81194029850747, "grad_norm": 20.71527099609375, "learning_rate": 9.55465367965368e-06, "loss": 29.5205, "step": 18472 }, { "epoch": 439.8358208955224, "grad_norm": 17.375009536743164, "learning_rate": 9.554112554112554e-06, "loss": 30.0705, "step": 18473 }, { "epoch": 439.85970149253734, "grad_norm": 18.961162567138672, "learning_rate": 9.55357142857143e-06, "loss": 30.8293, "step": 18474 }, { "epoch": 439.88358208955225, "grad_norm": 17.276344299316406, "learning_rate": 9.553030303030304e-06, "loss": 29.609, "step": 18475 }, { "epoch": 439.90746268656716, "grad_norm": 20.65808868408203, "learning_rate": 9.552489177489178e-06, "loss": 30.5208, "step": 18476 }, { "epoch": 439.93134328358207, "grad_norm": 28.521509170532227, "learning_rate": 9.551948051948052e-06, "loss": 29.6507, "step": 18477 }, { "epoch": 439.95522388059703, "grad_norm": 17.221881866455078, "learning_rate": 9.551406926406927e-06, "loss": 29.306, "step": 18478 }, { "epoch": 439.97910447761194, "grad_norm": 16.699331283569336, "learning_rate": 9.550865800865802e-06, "loss": 28.9915, "step": 18479 }, { "epoch": 440.0, "grad_norm": 22.226022720336914, "learning_rate": 9.550324675324676e-06, "loss": 27.0842, "step": 18480 }, { "epoch": 440.0, "step": 18480, "total_flos": 9.084245825331505e+17, "train_loss": 1.3789456827815993, "train_runtime": 25688.7761, "train_samples_per_second": 91.67, "train_steps_per_second": 0.719 }, { "epoch": 440.0238805970149, "grad_norm": 17.3594970703125, "learning_rate": 1e-05, "loss": 28.5431, "step": 18481 }, { "epoch": 440.0477611940299, "grad_norm": Infinity, "learning_rate": 9.9994708994709e-06, "loss": 34.6472, "step": 18482 }, { "epoch": 440.0716417910448, "grad_norm": 216.82839965820312, "learning_rate": 9.9994708994709e-06, "loss": 34.2689, "step": 18483 }, { "epoch": 440.0955223880597, "grad_norm": 99.35746765136719, "learning_rate": 9.9989417989418e-06, "loss": 32.1615, "step": 18484 }, { "epoch": 440.1194029850746, "grad_norm": 67.5246353149414, "learning_rate": 9.998412698412699e-06, "loss": 31.404, "step": 18485 }, { "epoch": 440.14328358208957, "grad_norm": 54.333831787109375, "learning_rate": 9.997883597883598e-06, "loss": 31.1933, "step": 18486 }, { "epoch": 440.1671641791045, "grad_norm": 67.78981018066406, "learning_rate": 9.997354497354498e-06, "loss": 30.1989, "step": 18487 }, { "epoch": 440.1910447761194, "grad_norm": 64.62834167480469, "learning_rate": 9.996825396825399e-06, "loss": 31.7329, "step": 18488 }, { "epoch": 440.21492537313435, "grad_norm": 38.224002838134766, "learning_rate": 9.996296296296298e-06, "loss": 30.013, "step": 18489 }, { "epoch": 440.23880597014926, "grad_norm": 51.87199401855469, "learning_rate": 9.995767195767196e-06, "loss": 30.6313, "step": 18490 }, { "epoch": 440.26268656716417, "grad_norm": 35.34834289550781, "learning_rate": 9.995238095238095e-06, "loss": 30.4587, "step": 18491 }, { "epoch": 440.28656716417913, "grad_norm": 32.24353790283203, "learning_rate": 9.994708994708996e-06, "loss": 29.857, "step": 18492 }, { "epoch": 440.31044776119404, "grad_norm": 45.48128890991211, "learning_rate": 9.994179894179895e-06, "loss": 30.6734, "step": 18493 }, { "epoch": 440.33432835820895, "grad_norm": 29.150545120239258, "learning_rate": 9.993650793650793e-06, "loss": 29.4115, "step": 18494 }, { "epoch": 440.35820895522386, "grad_norm": 31.93963050842285, "learning_rate": 9.993121693121694e-06, "loss": 29.6597, "step": 18495 }, { "epoch": 440.3820895522388, "grad_norm": 37.35916519165039, "learning_rate": 9.992592592592594e-06, "loss": 29.9122, "step": 18496 }, { "epoch": 440.40597014925373, "grad_norm": 21.346900939941406, "learning_rate": 9.992063492063493e-06, "loss": 29.7236, "step": 18497 }, { "epoch": 440.42985074626864, "grad_norm": 30.50381851196289, "learning_rate": 9.991534391534392e-06, "loss": 29.7606, "step": 18498 }, { "epoch": 440.4537313432836, "grad_norm": 31.944915771484375, "learning_rate": 9.991005291005293e-06, "loss": 31.5071, "step": 18499 }, { "epoch": 440.4776119402985, "grad_norm": 24.084373474121094, "learning_rate": 9.990476190476191e-06, "loss": 30.5081, "step": 18500 }, { "epoch": 440.5014925373134, "grad_norm": 31.495773315429688, "learning_rate": 9.98994708994709e-06, "loss": 31.0624, "step": 18501 }, { "epoch": 440.52537313432833, "grad_norm": 25.130971908569336, "learning_rate": 9.989417989417989e-06, "loss": 29.6397, "step": 18502 }, { "epoch": 440.5492537313433, "grad_norm": 27.502403259277344, "learning_rate": 9.98888888888889e-06, "loss": 29.7606, "step": 18503 }, { "epoch": 440.5731343283582, "grad_norm": 24.509063720703125, "learning_rate": 9.98835978835979e-06, "loss": 30.8578, "step": 18504 }, { "epoch": 440.5970149253731, "grad_norm": 25.166122436523438, "learning_rate": 9.987830687830689e-06, "loss": 30.5301, "step": 18505 }, { "epoch": 440.6208955223881, "grad_norm": 22.01051139831543, "learning_rate": 9.987301587301588e-06, "loss": 30.5776, "step": 18506 }, { "epoch": 440.644776119403, "grad_norm": 26.435636520385742, "learning_rate": 9.986772486772488e-06, "loss": 29.8726, "step": 18507 }, { "epoch": 440.6686567164179, "grad_norm": 21.437650680541992, "learning_rate": 9.986243386243387e-06, "loss": 31.0078, "step": 18508 }, { "epoch": 440.6925373134328, "grad_norm": 23.44769287109375, "learning_rate": 9.985714285714286e-06, "loss": 30.2984, "step": 18509 }, { "epoch": 440.7164179104478, "grad_norm": 21.210704803466797, "learning_rate": 9.985185185185185e-06, "loss": 30.3408, "step": 18510 }, { "epoch": 440.7402985074627, "grad_norm": 24.448598861694336, "learning_rate": 9.984656084656085e-06, "loss": 29.5081, "step": 18511 }, { "epoch": 440.7641791044776, "grad_norm": 19.705307006835938, "learning_rate": 9.984126984126986e-06, "loss": 29.2404, "step": 18512 }, { "epoch": 440.78805970149256, "grad_norm": 21.462610244750977, "learning_rate": 9.983597883597885e-06, "loss": 30.1652, "step": 18513 }, { "epoch": 440.81194029850747, "grad_norm": 18.456214904785156, "learning_rate": 9.983068783068783e-06, "loss": 28.7153, "step": 18514 }, { "epoch": 440.8358208955224, "grad_norm": 20.732948303222656, "learning_rate": 9.982539682539684e-06, "loss": 29.555, "step": 18515 }, { "epoch": 440.85970149253734, "grad_norm": 18.703096389770508, "learning_rate": 9.982010582010583e-06, "loss": 29.4648, "step": 18516 }, { "epoch": 440.88358208955225, "grad_norm": 19.93602752685547, "learning_rate": 9.981481481481482e-06, "loss": 30.4644, "step": 18517 }, { "epoch": 440.90746268656716, "grad_norm": 18.751026153564453, "learning_rate": 9.980952380952382e-06, "loss": 30.0305, "step": 18518 }, { "epoch": 440.93134328358207, "grad_norm": 19.60203742980957, "learning_rate": 9.980423280423281e-06, "loss": 30.8265, "step": 18519 }, { "epoch": 440.95522388059703, "grad_norm": 16.871681213378906, "learning_rate": 9.979894179894181e-06, "loss": 29.7696, "step": 18520 }, { "epoch": 440.97910447761194, "grad_norm": 21.47115707397461, "learning_rate": 9.97936507936508e-06, "loss": 30.9418, "step": 18521 }, { "epoch": 441.0, "grad_norm": 19.628236770629883, "learning_rate": 9.97883597883598e-06, "loss": 26.7793, "step": 18522 }, { "epoch": 441.0238805970149, "grad_norm": 23.01894760131836, "learning_rate": 9.97830687830688e-06, "loss": 30.63, "step": 18523 }, { "epoch": 441.0477611940299, "grad_norm": 24.92695426940918, "learning_rate": 9.977777777777778e-06, "loss": 31.6279, "step": 18524 }, { "epoch": 441.0716417910448, "grad_norm": 22.709012985229492, "learning_rate": 9.977248677248677e-06, "loss": 30.1622, "step": 18525 }, { "epoch": 441.0955223880597, "grad_norm": 21.488391876220703, "learning_rate": 9.976719576719578e-06, "loss": 30.9013, "step": 18526 }, { "epoch": 441.1194029850746, "grad_norm": 24.677249908447266, "learning_rate": 9.976190476190477e-06, "loss": 29.3563, "step": 18527 }, { "epoch": 441.14328358208957, "grad_norm": 20.757888793945312, "learning_rate": 9.975661375661377e-06, "loss": 29.4941, "step": 18528 }, { "epoch": 441.1671641791045, "grad_norm": 28.257102966308594, "learning_rate": 9.975132275132276e-06, "loss": 28.1639, "step": 18529 }, { "epoch": 441.1910447761194, "grad_norm": 21.217844009399414, "learning_rate": 9.974603174603176e-06, "loss": 30.9851, "step": 18530 }, { "epoch": 441.21492537313435, "grad_norm": 20.33362579345703, "learning_rate": 9.974074074074075e-06, "loss": 28.5901, "step": 18531 }, { "epoch": 441.23880597014926, "grad_norm": 24.75749969482422, "learning_rate": 9.973544973544974e-06, "loss": 30.44, "step": 18532 }, { "epoch": 441.26268656716417, "grad_norm": 22.43568992614746, "learning_rate": 9.973015873015875e-06, "loss": 29.9177, "step": 18533 }, { "epoch": 441.28656716417913, "grad_norm": 21.92473030090332, "learning_rate": 9.972486772486773e-06, "loss": 29.5564, "step": 18534 }, { "epoch": 441.31044776119404, "grad_norm": 18.299039840698242, "learning_rate": 9.971957671957672e-06, "loss": 29.081, "step": 18535 }, { "epoch": 441.33432835820895, "grad_norm": 30.581947326660156, "learning_rate": 9.971428571428571e-06, "loss": 30.2632, "step": 18536 }, { "epoch": 441.35820895522386, "grad_norm": 18.41385841369629, "learning_rate": 9.970899470899472e-06, "loss": 30.6754, "step": 18537 }, { "epoch": 441.3820895522388, "grad_norm": 19.474079132080078, "learning_rate": 9.970370370370372e-06, "loss": 29.5535, "step": 18538 }, { "epoch": 441.40597014925373, "grad_norm": 21.05908966064453, "learning_rate": 9.969841269841271e-06, "loss": 29.9627, "step": 18539 }, { "epoch": 441.42985074626864, "grad_norm": 22.585433959960938, "learning_rate": 9.96931216931217e-06, "loss": 28.8729, "step": 18540 }, { "epoch": 441.4537313432836, "grad_norm": 20.29838752746582, "learning_rate": 9.96878306878307e-06, "loss": 28.6151, "step": 18541 }, { "epoch": 441.4776119402985, "grad_norm": 18.94068145751953, "learning_rate": 9.968253968253969e-06, "loss": 29.5402, "step": 18542 }, { "epoch": 441.5014925373134, "grad_norm": 19.419042587280273, "learning_rate": 9.967724867724868e-06, "loss": 30.2059, "step": 18543 }, { "epoch": 441.52537313432833, "grad_norm": 22.420263290405273, "learning_rate": 9.967195767195767e-06, "loss": 30.3212, "step": 18544 }, { "epoch": 441.5492537313433, "grad_norm": 24.03433609008789, "learning_rate": 9.966666666666667e-06, "loss": 30.5324, "step": 18545 }, { "epoch": 441.5731343283582, "grad_norm": 22.240964889526367, "learning_rate": 9.966137566137568e-06, "loss": 29.8519, "step": 18546 }, { "epoch": 441.5970149253731, "grad_norm": 17.776729583740234, "learning_rate": 9.965608465608467e-06, "loss": 30.1097, "step": 18547 }, { "epoch": 441.6208955223881, "grad_norm": 19.53279685974121, "learning_rate": 9.965079365079365e-06, "loss": 30.1563, "step": 18548 }, { "epoch": 441.644776119403, "grad_norm": 20.960952758789062, "learning_rate": 9.964550264550266e-06, "loss": 30.4249, "step": 18549 }, { "epoch": 441.6686567164179, "grad_norm": 25.247777938842773, "learning_rate": 9.964021164021165e-06, "loss": 31.019, "step": 18550 }, { "epoch": 441.6925373134328, "grad_norm": 19.04428482055664, "learning_rate": 9.963492063492064e-06, "loss": 29.7853, "step": 18551 }, { "epoch": 441.7164179104478, "grad_norm": 19.53366470336914, "learning_rate": 9.962962962962964e-06, "loss": 29.2121, "step": 18552 }, { "epoch": 441.7402985074627, "grad_norm": 17.115434646606445, "learning_rate": 9.962433862433863e-06, "loss": 29.8615, "step": 18553 }, { "epoch": 441.7641791044776, "grad_norm": 18.052148818969727, "learning_rate": 9.961904761904763e-06, "loss": 30.1049, "step": 18554 }, { "epoch": 441.78805970149256, "grad_norm": 23.43327522277832, "learning_rate": 9.961375661375662e-06, "loss": 28.4668, "step": 18555 }, { "epoch": 441.81194029850747, "grad_norm": 21.95025634765625, "learning_rate": 9.960846560846563e-06, "loss": 29.9168, "step": 18556 }, { "epoch": 441.8358208955224, "grad_norm": 19.90508270263672, "learning_rate": 9.960317460317462e-06, "loss": 30.6355, "step": 18557 }, { "epoch": 441.85970149253734, "grad_norm": 17.02307891845703, "learning_rate": 9.95978835978836e-06, "loss": 29.8329, "step": 18558 }, { "epoch": 441.88358208955225, "grad_norm": 19.480838775634766, "learning_rate": 9.95925925925926e-06, "loss": 29.438, "step": 18559 }, { "epoch": 441.90746268656716, "grad_norm": 19.61400604248047, "learning_rate": 9.95873015873016e-06, "loss": 29.7707, "step": 18560 }, { "epoch": 441.93134328358207, "grad_norm": 18.133447647094727, "learning_rate": 9.958201058201059e-06, "loss": 30.9157, "step": 18561 }, { "epoch": 441.95522388059703, "grad_norm": 19.02525520324707, "learning_rate": 9.957671957671959e-06, "loss": 29.9864, "step": 18562 }, { "epoch": 441.97910447761194, "grad_norm": 18.124753952026367, "learning_rate": 9.957142857142858e-06, "loss": 30.0027, "step": 18563 }, { "epoch": 442.0, "grad_norm": 19.648605346679688, "learning_rate": 9.956613756613758e-06, "loss": 26.2238, "step": 18564 }, { "epoch": 442.0238805970149, "grad_norm": 17.967876434326172, "learning_rate": 9.956084656084657e-06, "loss": 28.3698, "step": 18565 }, { "epoch": 442.0477611940299, "grad_norm": 20.19828987121582, "learning_rate": 9.955555555555556e-06, "loss": 29.6088, "step": 18566 }, { "epoch": 442.0716417910448, "grad_norm": 21.698062896728516, "learning_rate": 9.955026455026457e-06, "loss": 29.477, "step": 18567 }, { "epoch": 442.0955223880597, "grad_norm": 20.658674240112305, "learning_rate": 9.954497354497355e-06, "loss": 29.1767, "step": 18568 }, { "epoch": 442.1194029850746, "grad_norm": 21.921630859375, "learning_rate": 9.953968253968254e-06, "loss": 30.28, "step": 18569 }, { "epoch": 442.14328358208957, "grad_norm": 21.92098617553711, "learning_rate": 9.953439153439155e-06, "loss": 29.0111, "step": 18570 }, { "epoch": 442.1671641791045, "grad_norm": 17.449251174926758, "learning_rate": 9.952910052910054e-06, "loss": 31.0356, "step": 18571 }, { "epoch": 442.1910447761194, "grad_norm": 18.957931518554688, "learning_rate": 9.952380952380954e-06, "loss": 29.2397, "step": 18572 }, { "epoch": 442.21492537313435, "grad_norm": 17.29442024230957, "learning_rate": 9.951851851851853e-06, "loss": 30.5482, "step": 18573 }, { "epoch": 442.23880597014926, "grad_norm": 21.626771926879883, "learning_rate": 9.951322751322752e-06, "loss": 29.6465, "step": 18574 }, { "epoch": 442.26268656716417, "grad_norm": 18.364559173583984, "learning_rate": 9.950793650793652e-06, "loss": 30.3357, "step": 18575 }, { "epoch": 442.28656716417913, "grad_norm": 21.34333610534668, "learning_rate": 9.950264550264551e-06, "loss": 29.0995, "step": 18576 }, { "epoch": 442.31044776119404, "grad_norm": 17.538114547729492, "learning_rate": 9.94973544973545e-06, "loss": 29.9753, "step": 18577 }, { "epoch": 442.33432835820895, "grad_norm": 22.703763961791992, "learning_rate": 9.94920634920635e-06, "loss": 29.5471, "step": 18578 }, { "epoch": 442.35820895522386, "grad_norm": 18.60055923461914, "learning_rate": 9.94867724867725e-06, "loss": 30.6518, "step": 18579 }, { "epoch": 442.3820895522388, "grad_norm": 22.66931915283203, "learning_rate": 9.94814814814815e-06, "loss": 29.7501, "step": 18580 }, { "epoch": 442.40597014925373, "grad_norm": 18.586894989013672, "learning_rate": 9.947619047619049e-06, "loss": 29.0112, "step": 18581 }, { "epoch": 442.42985074626864, "grad_norm": 23.203092575073242, "learning_rate": 9.947089947089947e-06, "loss": 30.937, "step": 18582 }, { "epoch": 442.4537313432836, "grad_norm": 18.897573471069336, "learning_rate": 9.946560846560848e-06, "loss": 30.4058, "step": 18583 }, { "epoch": 442.4776119402985, "grad_norm": 22.032442092895508, "learning_rate": 9.946031746031747e-06, "loss": 29.1225, "step": 18584 }, { "epoch": 442.5014925373134, "grad_norm": 21.034584045410156, "learning_rate": 9.945502645502646e-06, "loss": 29.0111, "step": 18585 }, { "epoch": 442.52537313432833, "grad_norm": 17.994476318359375, "learning_rate": 9.944973544973546e-06, "loss": 30.4909, "step": 18586 }, { "epoch": 442.5492537313433, "grad_norm": 22.44892120361328, "learning_rate": 9.944444444444445e-06, "loss": 29.9081, "step": 18587 }, { "epoch": 442.5731343283582, "grad_norm": 27.11910057067871, "learning_rate": 9.943915343915345e-06, "loss": 30.1356, "step": 18588 }, { "epoch": 442.5970149253731, "grad_norm": 20.926912307739258, "learning_rate": 9.943386243386244e-06, "loss": 29.785, "step": 18589 }, { "epoch": 442.6208955223881, "grad_norm": 15.875774383544922, "learning_rate": 9.942857142857145e-06, "loss": 30.8185, "step": 18590 }, { "epoch": 442.644776119403, "grad_norm": 21.94791603088379, "learning_rate": 9.942328042328044e-06, "loss": 30.2967, "step": 18591 }, { "epoch": 442.6686567164179, "grad_norm": 22.416215896606445, "learning_rate": 9.941798941798942e-06, "loss": 30.0581, "step": 18592 }, { "epoch": 442.6925373134328, "grad_norm": 19.829410552978516, "learning_rate": 9.941269841269841e-06, "loss": 30.66, "step": 18593 }, { "epoch": 442.7164179104478, "grad_norm": 18.064342498779297, "learning_rate": 9.940740740740742e-06, "loss": 30.4255, "step": 18594 }, { "epoch": 442.7402985074627, "grad_norm": 19.131240844726562, "learning_rate": 9.94021164021164e-06, "loss": 29.9299, "step": 18595 }, { "epoch": 442.7641791044776, "grad_norm": 25.69076156616211, "learning_rate": 9.939682539682541e-06, "loss": 30.0875, "step": 18596 }, { "epoch": 442.78805970149256, "grad_norm": 24.921091079711914, "learning_rate": 9.93915343915344e-06, "loss": 30.9268, "step": 18597 }, { "epoch": 442.81194029850747, "grad_norm": NaN, "learning_rate": 9.93862433862434e-06, "loss": 37.3905, "step": 18598 }, { "epoch": 442.8358208955224, "grad_norm": 18.504121780395508, "learning_rate": 9.93862433862434e-06, "loss": 30.1702, "step": 18599 }, { "epoch": 442.85970149253734, "grad_norm": 23.754684448242188, "learning_rate": 9.93809523809524e-06, "loss": 30.1518, "step": 18600 }, { "epoch": 442.88358208955225, "grad_norm": 28.301525115966797, "learning_rate": 9.937566137566138e-06, "loss": 30.9355, "step": 18601 }, { "epoch": 442.90746268656716, "grad_norm": 17.825712203979492, "learning_rate": 9.937037037037039e-06, "loss": 29.4089, "step": 18602 }, { "epoch": 442.93134328358207, "grad_norm": 18.297264099121094, "learning_rate": 9.936507936507937e-06, "loss": 29.377, "step": 18603 }, { "epoch": 442.95522388059703, "grad_norm": 25.3094425201416, "learning_rate": 9.935978835978836e-06, "loss": 29.5095, "step": 18604 }, { "epoch": 442.97910447761194, "grad_norm": 24.7398681640625, "learning_rate": 9.935449735449737e-06, "loss": 29.4477, "step": 18605 }, { "epoch": 443.0, "grad_norm": 17.500024795532227, "learning_rate": 9.934920634920636e-06, "loss": 25.0404, "step": 18606 }, { "epoch": 443.0238805970149, "grad_norm": 17.63364601135254, "learning_rate": 9.934391534391536e-06, "loss": 30.2119, "step": 18607 }, { "epoch": 443.0477611940299, "grad_norm": 26.692359924316406, "learning_rate": 9.933862433862435e-06, "loss": 29.4333, "step": 18608 }, { "epoch": 443.0716417910448, "grad_norm": 20.488828659057617, "learning_rate": 9.933333333333334e-06, "loss": 28.8854, "step": 18609 }, { "epoch": 443.0955223880597, "grad_norm": 21.39193344116211, "learning_rate": 9.932804232804234e-06, "loss": 29.0897, "step": 18610 }, { "epoch": 443.1194029850746, "grad_norm": 19.156391143798828, "learning_rate": 9.932275132275133e-06, "loss": 29.3835, "step": 18611 }, { "epoch": 443.14328358208957, "grad_norm": 27.199663162231445, "learning_rate": 9.931746031746032e-06, "loss": 31.355, "step": 18612 }, { "epoch": 443.1671641791045, "grad_norm": 19.176109313964844, "learning_rate": 9.931216931216932e-06, "loss": 30.5999, "step": 18613 }, { "epoch": 443.1910447761194, "grad_norm": 21.19511604309082, "learning_rate": 9.930687830687831e-06, "loss": 29.8309, "step": 18614 }, { "epoch": 443.21492537313435, "grad_norm": 17.26340103149414, "learning_rate": 9.930158730158732e-06, "loss": 29.8293, "step": 18615 }, { "epoch": 443.23880597014926, "grad_norm": 25.40530014038086, "learning_rate": 9.92962962962963e-06, "loss": 29.635, "step": 18616 }, { "epoch": 443.26268656716417, "grad_norm": 23.863309860229492, "learning_rate": 9.929100529100531e-06, "loss": 30.0053, "step": 18617 }, { "epoch": 443.28656716417913, "grad_norm": 19.732179641723633, "learning_rate": 9.92857142857143e-06, "loss": 28.9756, "step": 18618 }, { "epoch": 443.31044776119404, "grad_norm": 19.102367401123047, "learning_rate": 9.928042328042329e-06, "loss": 30.768, "step": 18619 }, { "epoch": 443.33432835820895, "grad_norm": 16.483366012573242, "learning_rate": 9.927513227513227e-06, "loss": 27.8106, "step": 18620 }, { "epoch": 443.35820895522386, "grad_norm": 21.833375930786133, "learning_rate": 9.926984126984128e-06, "loss": 28.161, "step": 18621 }, { "epoch": 443.3820895522388, "grad_norm": 23.619691848754883, "learning_rate": 9.926455026455027e-06, "loss": 29.5948, "step": 18622 }, { "epoch": 443.40597014925373, "grad_norm": 22.981292724609375, "learning_rate": 9.925925925925927e-06, "loss": 30.1602, "step": 18623 }, { "epoch": 443.42985074626864, "grad_norm": 15.958575248718262, "learning_rate": 9.925396825396826e-06, "loss": 29.7908, "step": 18624 }, { "epoch": 443.4537313432836, "grad_norm": 21.9560604095459, "learning_rate": 9.924867724867727e-06, "loss": 30.6451, "step": 18625 }, { "epoch": 443.4776119402985, "grad_norm": 22.995847702026367, "learning_rate": 9.924338624338625e-06, "loss": 30.2189, "step": 18626 }, { "epoch": 443.5014925373134, "grad_norm": 19.66353416442871, "learning_rate": 9.923809523809524e-06, "loss": 29.547, "step": 18627 }, { "epoch": 443.52537313432833, "grad_norm": 18.92270851135254, "learning_rate": 9.923280423280423e-06, "loss": 29.694, "step": 18628 }, { "epoch": 443.5492537313433, "grad_norm": 19.271345138549805, "learning_rate": 9.922751322751324e-06, "loss": 29.9493, "step": 18629 }, { "epoch": 443.5731343283582, "grad_norm": 20.335243225097656, "learning_rate": 9.922222222222222e-06, "loss": 30.3863, "step": 18630 }, { "epoch": 443.5970149253731, "grad_norm": 24.05260467529297, "learning_rate": 9.921693121693123e-06, "loss": 30.5802, "step": 18631 }, { "epoch": 443.6208955223881, "grad_norm": 22.513591766357422, "learning_rate": 9.921164021164022e-06, "loss": 31.0789, "step": 18632 }, { "epoch": 443.644776119403, "grad_norm": 15.696191787719727, "learning_rate": 9.920634920634922e-06, "loss": 29.4079, "step": 18633 }, { "epoch": 443.6686567164179, "grad_norm": NaN, "learning_rate": 9.920105820105821e-06, "loss": 52.3631, "step": 18634 }, { "epoch": 443.6925373134328, "grad_norm": 23.175010681152344, "learning_rate": 9.920105820105821e-06, "loss": 29.5275, "step": 18635 }, { "epoch": 443.7164179104478, "grad_norm": 23.706695556640625, "learning_rate": 9.91957671957672e-06, "loss": 29.7385, "step": 18636 }, { "epoch": 443.7402985074627, "grad_norm": 18.948604583740234, "learning_rate": 9.91904761904762e-06, "loss": 30.7269, "step": 18637 }, { "epoch": 443.7641791044776, "grad_norm": 19.654752731323242, "learning_rate": 9.91851851851852e-06, "loss": 28.3922, "step": 18638 }, { "epoch": 443.78805970149256, "grad_norm": 17.5794677734375, "learning_rate": 9.917989417989418e-06, "loss": 29.2845, "step": 18639 }, { "epoch": 443.81194029850747, "grad_norm": 22.551067352294922, "learning_rate": 9.917460317460319e-06, "loss": 29.4956, "step": 18640 }, { "epoch": 443.8358208955224, "grad_norm": 21.116165161132812, "learning_rate": 9.916931216931217e-06, "loss": 29.8524, "step": 18641 }, { "epoch": 443.85970149253734, "grad_norm": 24.164901733398438, "learning_rate": 9.916402116402118e-06, "loss": 30.8316, "step": 18642 }, { "epoch": 443.88358208955225, "grad_norm": 18.75589370727539, "learning_rate": 9.915873015873017e-06, "loss": 30.6909, "step": 18643 }, { "epoch": 443.90746268656716, "grad_norm": 21.51673698425293, "learning_rate": 9.915343915343916e-06, "loss": 30.8981, "step": 18644 }, { "epoch": 443.93134328358207, "grad_norm": 20.762636184692383, "learning_rate": 9.914814814814816e-06, "loss": 29.25, "step": 18645 }, { "epoch": 443.95522388059703, "grad_norm": 23.939481735229492, "learning_rate": 9.914285714285715e-06, "loss": 30.0374, "step": 18646 }, { "epoch": 443.97910447761194, "grad_norm": 20.75077247619629, "learning_rate": 9.913756613756614e-06, "loss": 29.2723, "step": 18647 }, { "epoch": 444.0, "grad_norm": 19.254112243652344, "learning_rate": 9.913227513227514e-06, "loss": 26.2907, "step": 18648 }, { "epoch": 444.0238805970149, "grad_norm": 20.285133361816406, "learning_rate": 9.912698412698413e-06, "loss": 28.9627, "step": 18649 }, { "epoch": 444.0477611940299, "grad_norm": 20.10259437561035, "learning_rate": 9.912169312169314e-06, "loss": 29.3833, "step": 18650 }, { "epoch": 444.0716417910448, "grad_norm": 18.515504837036133, "learning_rate": 9.911640211640212e-06, "loss": 29.2917, "step": 18651 }, { "epoch": 444.0955223880597, "grad_norm": 20.508150100708008, "learning_rate": 9.911111111111113e-06, "loss": 29.2335, "step": 18652 }, { "epoch": 444.1194029850746, "grad_norm": 17.969497680664062, "learning_rate": 9.910582010582012e-06, "loss": 30.0243, "step": 18653 }, { "epoch": 444.14328358208957, "grad_norm": 25.349966049194336, "learning_rate": 9.91005291005291e-06, "loss": 30.3186, "step": 18654 }, { "epoch": 444.1671641791045, "grad_norm": 18.017724990844727, "learning_rate": 9.90952380952381e-06, "loss": 30.2973, "step": 18655 }, { "epoch": 444.1910447761194, "grad_norm": 26.20635414123535, "learning_rate": 9.90899470899471e-06, "loss": 29.9004, "step": 18656 }, { "epoch": 444.21492537313435, "grad_norm": 20.62889289855957, "learning_rate": 9.908465608465609e-06, "loss": 30.3634, "step": 18657 }, { "epoch": 444.23880597014926, "grad_norm": 21.628355026245117, "learning_rate": 9.90793650793651e-06, "loss": 30.4709, "step": 18658 }, { "epoch": 444.26268656716417, "grad_norm": 20.195955276489258, "learning_rate": 9.907407407407408e-06, "loss": 30.5781, "step": 18659 }, { "epoch": 444.28656716417913, "grad_norm": 24.391246795654297, "learning_rate": 9.906878306878309e-06, "loss": 29.4739, "step": 18660 }, { "epoch": 444.31044776119404, "grad_norm": 22.97021484375, "learning_rate": 9.906349206349207e-06, "loss": 29.2833, "step": 18661 }, { "epoch": 444.33432835820895, "grad_norm": 19.887691497802734, "learning_rate": 9.905820105820106e-06, "loss": 29.7117, "step": 18662 }, { "epoch": 444.35820895522386, "grad_norm": 23.528059005737305, "learning_rate": 9.905291005291005e-06, "loss": 30.1557, "step": 18663 }, { "epoch": 444.3820895522388, "grad_norm": 20.737913131713867, "learning_rate": 9.904761904761906e-06, "loss": 29.1906, "step": 18664 }, { "epoch": 444.40597014925373, "grad_norm": 21.257333755493164, "learning_rate": 9.904232804232804e-06, "loss": 30.1986, "step": 18665 }, { "epoch": 444.42985074626864, "grad_norm": 18.684720993041992, "learning_rate": 9.903703703703705e-06, "loss": 30.4563, "step": 18666 }, { "epoch": 444.4537313432836, "grad_norm": 19.677936553955078, "learning_rate": 9.903174603174604e-06, "loss": 28.8837, "step": 18667 }, { "epoch": 444.4776119402985, "grad_norm": 22.362728118896484, "learning_rate": 9.902645502645504e-06, "loss": 30.1345, "step": 18668 }, { "epoch": 444.5014925373134, "grad_norm": 23.642974853515625, "learning_rate": 9.902116402116403e-06, "loss": 30.1094, "step": 18669 }, { "epoch": 444.52537313432833, "grad_norm": 24.003171920776367, "learning_rate": 9.901587301587302e-06, "loss": 30.5538, "step": 18670 }, { "epoch": 444.5492537313433, "grad_norm": 20.49563217163086, "learning_rate": 9.901058201058202e-06, "loss": 30.2229, "step": 18671 }, { "epoch": 444.5731343283582, "grad_norm": 24.953784942626953, "learning_rate": 9.900529100529101e-06, "loss": 28.4147, "step": 18672 }, { "epoch": 444.5970149253731, "grad_norm": 21.08348274230957, "learning_rate": 9.9e-06, "loss": 30.7389, "step": 18673 }, { "epoch": 444.6208955223881, "grad_norm": 24.60961151123047, "learning_rate": 9.8994708994709e-06, "loss": 30.0161, "step": 18674 }, { "epoch": 444.644776119403, "grad_norm": 17.964492797851562, "learning_rate": 9.8989417989418e-06, "loss": 29.0658, "step": 18675 }, { "epoch": 444.6686567164179, "grad_norm": 23.04926300048828, "learning_rate": 9.8984126984127e-06, "loss": 28.9474, "step": 18676 }, { "epoch": 444.6925373134328, "grad_norm": 21.68794059753418, "learning_rate": 9.897883597883599e-06, "loss": 28.6149, "step": 18677 }, { "epoch": 444.7164179104478, "grad_norm": 25.25395965576172, "learning_rate": 9.897354497354498e-06, "loss": 29.3648, "step": 18678 }, { "epoch": 444.7402985074627, "grad_norm": 22.624588012695312, "learning_rate": 9.896825396825398e-06, "loss": 30.8506, "step": 18679 }, { "epoch": 444.7641791044776, "grad_norm": 19.853567123413086, "learning_rate": 9.896296296296297e-06, "loss": 30.412, "step": 18680 }, { "epoch": 444.78805970149256, "grad_norm": 28.002241134643555, "learning_rate": 9.895767195767196e-06, "loss": 31.018, "step": 18681 }, { "epoch": 444.81194029850747, "grad_norm": 28.90605926513672, "learning_rate": 9.895238095238096e-06, "loss": 29.1872, "step": 18682 }, { "epoch": 444.8358208955224, "grad_norm": 17.46945571899414, "learning_rate": 9.894708994708995e-06, "loss": 28.7829, "step": 18683 }, { "epoch": 444.85970149253734, "grad_norm": 24.100780487060547, "learning_rate": 9.894179894179896e-06, "loss": 28.8928, "step": 18684 }, { "epoch": 444.88358208955225, "grad_norm": 23.657773971557617, "learning_rate": 9.893650793650794e-06, "loss": 29.678, "step": 18685 }, { "epoch": 444.90746268656716, "grad_norm": 17.536001205444336, "learning_rate": 9.893121693121695e-06, "loss": 30.3878, "step": 18686 }, { "epoch": 444.93134328358207, "grad_norm": 20.298398971557617, "learning_rate": 9.892592592592594e-06, "loss": 29.8327, "step": 18687 }, { "epoch": 444.95522388059703, "grad_norm": 17.706310272216797, "learning_rate": 9.892063492063493e-06, "loss": 30.6398, "step": 18688 }, { "epoch": 444.97910447761194, "grad_norm": 27.356420516967773, "learning_rate": 9.891534391534391e-06, "loss": 31.0165, "step": 18689 }, { "epoch": 445.0, "grad_norm": 21.346406936645508, "learning_rate": 9.891005291005292e-06, "loss": 25.2093, "step": 18690 }, { "epoch": 445.0238805970149, "grad_norm": 20.276702880859375, "learning_rate": 9.89047619047619e-06, "loss": 30.3656, "step": 18691 }, { "epoch": 445.0477611940299, "grad_norm": 19.048446655273438, "learning_rate": 9.889947089947091e-06, "loss": 29.9698, "step": 18692 }, { "epoch": 445.0716417910448, "grad_norm": 20.90336036682129, "learning_rate": 9.88941798941799e-06, "loss": 30.4029, "step": 18693 }, { "epoch": 445.0955223880597, "grad_norm": 18.675891876220703, "learning_rate": 9.88888888888889e-06, "loss": 28.9267, "step": 18694 }, { "epoch": 445.1194029850746, "grad_norm": 20.610153198242188, "learning_rate": 9.88835978835979e-06, "loss": 29.4429, "step": 18695 }, { "epoch": 445.14328358208957, "grad_norm": 16.437572479248047, "learning_rate": 9.887830687830688e-06, "loss": 29.3179, "step": 18696 }, { "epoch": 445.1671641791045, "grad_norm": 23.6490421295166, "learning_rate": 9.887301587301587e-06, "loss": 29.5433, "step": 18697 }, { "epoch": 445.1910447761194, "grad_norm": 21.725297927856445, "learning_rate": 9.886772486772488e-06, "loss": 28.6549, "step": 18698 }, { "epoch": 445.21492537313435, "grad_norm": 24.445068359375, "learning_rate": 9.886243386243386e-06, "loss": 29.7307, "step": 18699 }, { "epoch": 445.23880597014926, "grad_norm": 16.260190963745117, "learning_rate": 9.885714285714287e-06, "loss": 29.4139, "step": 18700 }, { "epoch": 445.26268656716417, "grad_norm": 20.442712783813477, "learning_rate": 9.885185185185186e-06, "loss": 29.3785, "step": 18701 }, { "epoch": 445.28656716417913, "grad_norm": 24.0363826751709, "learning_rate": 9.884656084656086e-06, "loss": 30.5873, "step": 18702 }, { "epoch": 445.31044776119404, "grad_norm": 24.070919036865234, "learning_rate": 9.884126984126985e-06, "loss": 30.0358, "step": 18703 }, { "epoch": 445.33432835820895, "grad_norm": 20.672880172729492, "learning_rate": 9.883597883597884e-06, "loss": 29.025, "step": 18704 }, { "epoch": 445.35820895522386, "grad_norm": 20.473325729370117, "learning_rate": 9.883068783068784e-06, "loss": 29.3211, "step": 18705 }, { "epoch": 445.3820895522388, "grad_norm": 18.26165199279785, "learning_rate": 9.882539682539683e-06, "loss": 29.8058, "step": 18706 }, { "epoch": 445.40597014925373, "grad_norm": 23.052764892578125, "learning_rate": 9.882010582010582e-06, "loss": 30.271, "step": 18707 }, { "epoch": 445.42985074626864, "grad_norm": 19.159442901611328, "learning_rate": 9.881481481481483e-06, "loss": 30.7617, "step": 18708 }, { "epoch": 445.4537313432836, "grad_norm": 25.2326717376709, "learning_rate": 9.880952380952381e-06, "loss": 29.8031, "step": 18709 }, { "epoch": 445.4776119402985, "grad_norm": 20.35891342163086, "learning_rate": 9.880423280423282e-06, "loss": 29.6101, "step": 18710 }, { "epoch": 445.5014925373134, "grad_norm": 25.427818298339844, "learning_rate": 9.87989417989418e-06, "loss": 30.5315, "step": 18711 }, { "epoch": 445.52537313432833, "grad_norm": 19.373870849609375, "learning_rate": 9.87936507936508e-06, "loss": 29.9657, "step": 18712 }, { "epoch": 445.5492537313433, "grad_norm": 23.325149536132812, "learning_rate": 9.87883597883598e-06, "loss": 28.8324, "step": 18713 }, { "epoch": 445.5731343283582, "grad_norm": 19.05719566345215, "learning_rate": 9.878306878306879e-06, "loss": 29.459, "step": 18714 }, { "epoch": 445.5970149253731, "grad_norm": 23.693634033203125, "learning_rate": 9.877777777777778e-06, "loss": 30.169, "step": 18715 }, { "epoch": 445.6208955223881, "grad_norm": 18.649036407470703, "learning_rate": 9.877248677248678e-06, "loss": 29.9517, "step": 18716 }, { "epoch": 445.644776119403, "grad_norm": 22.940074920654297, "learning_rate": 9.876719576719577e-06, "loss": 29.5278, "step": 18717 }, { "epoch": 445.6686567164179, "grad_norm": 22.418012619018555, "learning_rate": 9.876190476190478e-06, "loss": 30.244, "step": 18718 }, { "epoch": 445.6925373134328, "grad_norm": 20.056880950927734, "learning_rate": 9.875661375661376e-06, "loss": 29.651, "step": 18719 }, { "epoch": 445.7164179104478, "grad_norm": 20.92812156677246, "learning_rate": 9.875132275132277e-06, "loss": 29.7148, "step": 18720 }, { "epoch": 445.7402985074627, "grad_norm": 19.421911239624023, "learning_rate": 9.874603174603176e-06, "loss": 29.6727, "step": 18721 }, { "epoch": 445.7641791044776, "grad_norm": 21.001712799072266, "learning_rate": 9.874074074074075e-06, "loss": 29.3088, "step": 18722 }, { "epoch": 445.78805970149256, "grad_norm": 20.59142303466797, "learning_rate": 9.873544973544973e-06, "loss": 29.4323, "step": 18723 }, { "epoch": 445.81194029850747, "grad_norm": 20.60802459716797, "learning_rate": 9.873015873015874e-06, "loss": 30.7279, "step": 18724 }, { "epoch": 445.8358208955224, "grad_norm": 17.077266693115234, "learning_rate": 9.872486772486773e-06, "loss": 29.0152, "step": 18725 }, { "epoch": 445.85970149253734, "grad_norm": 16.55583953857422, "learning_rate": 9.871957671957673e-06, "loss": 29.3891, "step": 18726 }, { "epoch": 445.88358208955225, "grad_norm": 17.879047393798828, "learning_rate": 9.871428571428572e-06, "loss": 30.1239, "step": 18727 }, { "epoch": 445.90746268656716, "grad_norm": 19.5820369720459, "learning_rate": 9.870899470899473e-06, "loss": 30.702, "step": 18728 }, { "epoch": 445.93134328358207, "grad_norm": 19.33560562133789, "learning_rate": 9.870370370370371e-06, "loss": 30.1221, "step": 18729 }, { "epoch": 445.95522388059703, "grad_norm": 18.632369995117188, "learning_rate": 9.86984126984127e-06, "loss": 29.8922, "step": 18730 }, { "epoch": 445.97910447761194, "grad_norm": 19.27196502685547, "learning_rate": 9.869312169312169e-06, "loss": 30.1569, "step": 18731 }, { "epoch": 446.0, "grad_norm": 25.46162986755371, "learning_rate": 9.86878306878307e-06, "loss": 25.4744, "step": 18732 }, { "epoch": 446.0238805970149, "grad_norm": 20.501720428466797, "learning_rate": 9.868253968253968e-06, "loss": 29.7296, "step": 18733 }, { "epoch": 446.0477611940299, "grad_norm": 15.892001152038574, "learning_rate": 9.867724867724869e-06, "loss": 29.951, "step": 18734 }, { "epoch": 446.0716417910448, "grad_norm": 23.434839248657227, "learning_rate": 9.86719576719577e-06, "loss": 29.5602, "step": 18735 }, { "epoch": 446.0955223880597, "grad_norm": 28.479963302612305, "learning_rate": 9.866666666666668e-06, "loss": 30.5887, "step": 18736 }, { "epoch": 446.1194029850746, "grad_norm": 18.02228355407715, "learning_rate": 9.866137566137567e-06, "loss": 29.1836, "step": 18737 }, { "epoch": 446.14328358208957, "grad_norm": 31.304271697998047, "learning_rate": 9.865608465608466e-06, "loss": 30.0157, "step": 18738 }, { "epoch": 446.1671641791045, "grad_norm": 23.778724670410156, "learning_rate": 9.865079365079366e-06, "loss": 29.6594, "step": 18739 }, { "epoch": 446.1910447761194, "grad_norm": 24.051563262939453, "learning_rate": 9.864550264550265e-06, "loss": 29.3966, "step": 18740 }, { "epoch": 446.21492537313435, "grad_norm": 28.467056274414062, "learning_rate": 9.864021164021164e-06, "loss": 29.6585, "step": 18741 }, { "epoch": 446.23880597014926, "grad_norm": 25.203536987304688, "learning_rate": 9.863492063492065e-06, "loss": 29.2565, "step": 18742 }, { "epoch": 446.26268656716417, "grad_norm": 18.118160247802734, "learning_rate": 9.862962962962963e-06, "loss": 30.0215, "step": 18743 }, { "epoch": 446.28656716417913, "grad_norm": 26.8660831451416, "learning_rate": 9.862433862433864e-06, "loss": 28.9416, "step": 18744 }, { "epoch": 446.31044776119404, "grad_norm": 26.6632080078125, "learning_rate": 9.861904761904763e-06, "loss": 29.9127, "step": 18745 }, { "epoch": 446.33432835820895, "grad_norm": 17.099775314331055, "learning_rate": 9.861375661375661e-06, "loss": 29.9441, "step": 18746 }, { "epoch": 446.35820895522386, "grad_norm": 35.367431640625, "learning_rate": 9.860846560846562e-06, "loss": 30.4273, "step": 18747 }, { "epoch": 446.3820895522388, "grad_norm": 21.456600189208984, "learning_rate": 9.86031746031746e-06, "loss": 29.8876, "step": 18748 }, { "epoch": 446.40597014925373, "grad_norm": 35.74831771850586, "learning_rate": 9.85978835978836e-06, "loss": 30.4487, "step": 18749 }, { "epoch": 446.42985074626864, "grad_norm": 25.279451370239258, "learning_rate": 9.85925925925926e-06, "loss": 30.0203, "step": 18750 }, { "epoch": 446.4537313432836, "grad_norm": 25.811397552490234, "learning_rate": 9.858730158730159e-06, "loss": 28.7727, "step": 18751 }, { "epoch": 446.4776119402985, "grad_norm": 34.82917022705078, "learning_rate": 9.85820105820106e-06, "loss": 30.5936, "step": 18752 }, { "epoch": 446.5014925373134, "grad_norm": 21.543052673339844, "learning_rate": 9.857671957671958e-06, "loss": 28.5933, "step": 18753 }, { "epoch": 446.52537313432833, "grad_norm": 41.380638122558594, "learning_rate": 9.857142857142859e-06, "loss": 30.0671, "step": 18754 }, { "epoch": 446.5492537313433, "grad_norm": 28.443233489990234, "learning_rate": 9.856613756613758e-06, "loss": 29.5265, "step": 18755 }, { "epoch": 446.5731343283582, "grad_norm": 51.77538299560547, "learning_rate": 9.856084656084656e-06, "loss": 29.667, "step": 18756 }, { "epoch": 446.5970149253731, "grad_norm": 43.79566192626953, "learning_rate": 9.855555555555555e-06, "loss": 29.8074, "step": 18757 }, { "epoch": 446.6208955223881, "grad_norm": 43.7327880859375, "learning_rate": 9.855026455026456e-06, "loss": 30.0798, "step": 18758 }, { "epoch": 446.644776119403, "grad_norm": 37.443233489990234, "learning_rate": 9.854497354497355e-06, "loss": 28.6069, "step": 18759 }, { "epoch": 446.6686567164179, "grad_norm": 35.89811706542969, "learning_rate": 9.853968253968255e-06, "loss": 29.1999, "step": 18760 }, { "epoch": 446.6925373134328, "grad_norm": 33.100379943847656, "learning_rate": 9.853439153439154e-06, "loss": 29.5536, "step": 18761 }, { "epoch": 446.7164179104478, "grad_norm": 39.89789962768555, "learning_rate": 9.852910052910054e-06, "loss": 29.5007, "step": 18762 }, { "epoch": 446.7402985074627, "grad_norm": 30.517335891723633, "learning_rate": 9.852380952380953e-06, "loss": 30.4981, "step": 18763 }, { "epoch": 446.7641791044776, "grad_norm": 44.66365051269531, "learning_rate": 9.851851851851852e-06, "loss": 29.4309, "step": 18764 }, { "epoch": 446.78805970149256, "grad_norm": 39.44496536254883, "learning_rate": 9.851322751322751e-06, "loss": 30.7236, "step": 18765 }, { "epoch": 446.81194029850747, "grad_norm": 35.49729537963867, "learning_rate": 9.850793650793651e-06, "loss": 29.855, "step": 18766 }, { "epoch": 446.8358208955224, "grad_norm": 35.016822814941406, "learning_rate": 9.85026455026455e-06, "loss": 29.8831, "step": 18767 }, { "epoch": 446.85970149253734, "grad_norm": 34.95130157470703, "learning_rate": 9.84973544973545e-06, "loss": 29.9338, "step": 18768 }, { "epoch": 446.88358208955225, "grad_norm": 31.749237060546875, "learning_rate": 9.849206349206351e-06, "loss": 29.0045, "step": 18769 }, { "epoch": 446.90746268656716, "grad_norm": 40.916542053222656, "learning_rate": 9.84867724867725e-06, "loss": 29.5833, "step": 18770 }, { "epoch": 446.93134328358207, "grad_norm": 36.60881423950195, "learning_rate": 9.848148148148149e-06, "loss": 29.3984, "step": 18771 }, { "epoch": 446.95522388059703, "grad_norm": 33.884708404541016, "learning_rate": 9.847619047619048e-06, "loss": 29.9819, "step": 18772 }, { "epoch": 446.97910447761194, "grad_norm": 31.100940704345703, "learning_rate": 9.847089947089948e-06, "loss": 31.2755, "step": 18773 }, { "epoch": 447.0, "grad_norm": 33.251338958740234, "learning_rate": 9.846560846560847e-06, "loss": 25.9599, "step": 18774 }, { "epoch": 447.0238805970149, "grad_norm": 31.726837158203125, "learning_rate": 9.846031746031746e-06, "loss": 30.208, "step": 18775 }, { "epoch": 447.0477611940299, "grad_norm": 41.428863525390625, "learning_rate": 9.845502645502646e-06, "loss": 28.9709, "step": 18776 }, { "epoch": 447.0716417910448, "grad_norm": 36.63111877441406, "learning_rate": 9.844973544973547e-06, "loss": 30.2441, "step": 18777 }, { "epoch": 447.0955223880597, "grad_norm": 36.10809326171875, "learning_rate": 9.844444444444446e-06, "loss": 29.8194, "step": 18778 }, { "epoch": 447.1194029850746, "grad_norm": 32.919063568115234, "learning_rate": 9.843915343915345e-06, "loss": 29.2762, "step": 18779 }, { "epoch": 447.14328358208957, "grad_norm": 35.652862548828125, "learning_rate": 9.843386243386243e-06, "loss": 30.7961, "step": 18780 }, { "epoch": 447.1671641791045, "grad_norm": 32.187049865722656, "learning_rate": 9.842857142857144e-06, "loss": 29.3027, "step": 18781 }, { "epoch": 447.1910447761194, "grad_norm": 36.274559020996094, "learning_rate": 9.842328042328043e-06, "loss": 30.6209, "step": 18782 }, { "epoch": 447.21492537313435, "grad_norm": 33.96268844604492, "learning_rate": 9.841798941798942e-06, "loss": 30.3775, "step": 18783 }, { "epoch": 447.23880597014926, "grad_norm": 34.651920318603516, "learning_rate": 9.841269841269842e-06, "loss": 29.7717, "step": 18784 }, { "epoch": 447.26268656716417, "grad_norm": 33.044281005859375, "learning_rate": 9.840740740740743e-06, "loss": 29.7228, "step": 18785 }, { "epoch": 447.28656716417913, "grad_norm": 34.92821502685547, "learning_rate": 9.840211640211641e-06, "loss": 28.877, "step": 18786 }, { "epoch": 447.31044776119404, "grad_norm": 29.17439079284668, "learning_rate": 9.83968253968254e-06, "loss": 30.2265, "step": 18787 }, { "epoch": 447.33432835820895, "grad_norm": 37.36045455932617, "learning_rate": 9.83915343915344e-06, "loss": 29.1902, "step": 18788 }, { "epoch": 447.35820895522386, "grad_norm": 31.162965774536133, "learning_rate": 9.83862433862434e-06, "loss": 30.106, "step": 18789 }, { "epoch": 447.3820895522388, "grad_norm": 37.819244384765625, "learning_rate": 9.838095238095238e-06, "loss": 29.6305, "step": 18790 }, { "epoch": 447.40597014925373, "grad_norm": 37.481231689453125, "learning_rate": 9.837566137566137e-06, "loss": 29.3044, "step": 18791 }, { "epoch": 447.42985074626864, "grad_norm": 33.168601989746094, "learning_rate": 9.837037037037038e-06, "loss": 29.4276, "step": 18792 }, { "epoch": 447.4537313432836, "grad_norm": 30.808195114135742, "learning_rate": 9.836507936507937e-06, "loss": 28.7775, "step": 18793 }, { "epoch": 447.4776119402985, "grad_norm": 36.16692352294922, "learning_rate": 9.835978835978837e-06, "loss": 29.6296, "step": 18794 }, { "epoch": 447.5014925373134, "grad_norm": 27.94761848449707, "learning_rate": 9.835449735449736e-06, "loss": 29.6343, "step": 18795 }, { "epoch": 447.52537313432833, "grad_norm": 40.37862777709961, "learning_rate": 9.834920634920636e-06, "loss": 31.2057, "step": 18796 }, { "epoch": 447.5492537313433, "grad_norm": 36.71797180175781, "learning_rate": 9.834391534391535e-06, "loss": 29.9827, "step": 18797 }, { "epoch": 447.5731343283582, "grad_norm": 33.4858512878418, "learning_rate": 9.833862433862434e-06, "loss": 29.3121, "step": 18798 }, { "epoch": 447.5970149253731, "grad_norm": 32.97297286987305, "learning_rate": 9.833333333333333e-06, "loss": 30.4292, "step": 18799 }, { "epoch": 447.6208955223881, "grad_norm": 36.36544418334961, "learning_rate": 9.832804232804233e-06, "loss": 29.3929, "step": 18800 }, { "epoch": 447.644776119403, "grad_norm": 31.649633407592773, "learning_rate": 9.832275132275132e-06, "loss": 28.9444, "step": 18801 }, { "epoch": 447.6686567164179, "grad_norm": 44.3813591003418, "learning_rate": 9.831746031746033e-06, "loss": 29.9432, "step": 18802 }, { "epoch": 447.6925373134328, "grad_norm": 37.95224380493164, "learning_rate": 9.831216931216933e-06, "loss": 28.9526, "step": 18803 }, { "epoch": 447.7164179104478, "grad_norm": 34.08156967163086, "learning_rate": 9.830687830687832e-06, "loss": 29.0852, "step": 18804 }, { "epoch": 447.7402985074627, "grad_norm": 33.56852722167969, "learning_rate": 9.830158730158731e-06, "loss": 28.6329, "step": 18805 }, { "epoch": 447.7641791044776, "grad_norm": 36.387908935546875, "learning_rate": 9.82962962962963e-06, "loss": 28.5445, "step": 18806 }, { "epoch": 447.78805970149256, "grad_norm": 30.244382858276367, "learning_rate": 9.82910052910053e-06, "loss": 29.7699, "step": 18807 }, { "epoch": 447.81194029850747, "grad_norm": 38.11452102661133, "learning_rate": 9.828571428571429e-06, "loss": 29.7457, "step": 18808 }, { "epoch": 447.8358208955224, "grad_norm": 35.806453704833984, "learning_rate": 9.828042328042328e-06, "loss": 29.4785, "step": 18809 }, { "epoch": 447.85970149253734, "grad_norm": 35.697021484375, "learning_rate": 9.827513227513228e-06, "loss": 30.0922, "step": 18810 }, { "epoch": 447.88358208955225, "grad_norm": 34.11040496826172, "learning_rate": 9.826984126984129e-06, "loss": 29.6874, "step": 18811 }, { "epoch": 447.90746268656716, "grad_norm": 33.44720458984375, "learning_rate": 9.826455026455028e-06, "loss": 30.0404, "step": 18812 }, { "epoch": 447.93134328358207, "grad_norm": 28.22995376586914, "learning_rate": 9.825925925925927e-06, "loss": 29.9623, "step": 18813 }, { "epoch": 447.95522388059703, "grad_norm": 33.87968826293945, "learning_rate": 9.825396825396825e-06, "loss": 29.5566, "step": 18814 }, { "epoch": 447.97910447761194, "grad_norm": 32.12936782836914, "learning_rate": 9.824867724867726e-06, "loss": 29.496, "step": 18815 }, { "epoch": 448.0, "grad_norm": 33.93634796142578, "learning_rate": 9.824338624338625e-06, "loss": 27.2855, "step": 18816 }, { "epoch": 448.0238805970149, "grad_norm": 30.463336944580078, "learning_rate": 9.823809523809524e-06, "loss": 29.377, "step": 18817 }, { "epoch": 448.0477611940299, "grad_norm": 33.1998291015625, "learning_rate": 9.823280423280424e-06, "loss": 30.1633, "step": 18818 }, { "epoch": 448.0716417910448, "grad_norm": 30.724191665649414, "learning_rate": 9.822751322751325e-06, "loss": 29.8424, "step": 18819 }, { "epoch": 448.0955223880597, "grad_norm": 38.30782699584961, "learning_rate": 9.822222222222223e-06, "loss": 29.4547, "step": 18820 }, { "epoch": 448.1194029850746, "grad_norm": 33.71158981323242, "learning_rate": 9.821693121693122e-06, "loss": 29.9913, "step": 18821 }, { "epoch": 448.14328358208957, "grad_norm": 37.22774887084961, "learning_rate": 9.821164021164023e-06, "loss": 30.966, "step": 18822 }, { "epoch": 448.1671641791045, "grad_norm": 33.146244049072266, "learning_rate": 9.820634920634922e-06, "loss": 29.3025, "step": 18823 }, { "epoch": 448.1910447761194, "grad_norm": 34.13437271118164, "learning_rate": 9.82010582010582e-06, "loss": 30.1923, "step": 18824 }, { "epoch": 448.21492537313435, "grad_norm": 32.6254997253418, "learning_rate": 9.81957671957672e-06, "loss": 30.1733, "step": 18825 }, { "epoch": 448.23880597014926, "grad_norm": 35.598819732666016, "learning_rate": 9.81904761904762e-06, "loss": 29.5885, "step": 18826 }, { "epoch": 448.26268656716417, "grad_norm": 33.22323989868164, "learning_rate": 9.81851851851852e-06, "loss": 30.7683, "step": 18827 }, { "epoch": 448.28656716417913, "grad_norm": 36.63930892944336, "learning_rate": 9.817989417989419e-06, "loss": 28.1788, "step": 18828 }, { "epoch": 448.31044776119404, "grad_norm": 32.096744537353516, "learning_rate": 9.817460317460318e-06, "loss": 30.0768, "step": 18829 }, { "epoch": 448.33432835820895, "grad_norm": 33.445011138916016, "learning_rate": 9.816931216931218e-06, "loss": 29.5985, "step": 18830 }, { "epoch": 448.35820895522386, "grad_norm": 28.628639221191406, "learning_rate": 9.816402116402117e-06, "loss": 29.2123, "step": 18831 }, { "epoch": 448.3820895522388, "grad_norm": 37.036949157714844, "learning_rate": 9.815873015873016e-06, "loss": 29.9231, "step": 18832 }, { "epoch": 448.40597014925373, "grad_norm": 32.678466796875, "learning_rate": 9.815343915343915e-06, "loss": 29.4799, "step": 18833 }, { "epoch": 448.42985074626864, "grad_norm": 36.6664924621582, "learning_rate": 9.814814814814815e-06, "loss": 29.0067, "step": 18834 }, { "epoch": 448.4537313432836, "grad_norm": 32.55145263671875, "learning_rate": 9.814285714285716e-06, "loss": 29.5954, "step": 18835 }, { "epoch": 448.4776119402985, "grad_norm": 32.24037170410156, "learning_rate": 9.813756613756615e-06, "loss": 29.2615, "step": 18836 }, { "epoch": 448.5014925373134, "grad_norm": 32.47368621826172, "learning_rate": 9.813227513227515e-06, "loss": 28.4796, "step": 18837 }, { "epoch": 448.52537313432833, "grad_norm": 34.174102783203125, "learning_rate": 9.812698412698414e-06, "loss": 29.8036, "step": 18838 }, { "epoch": 448.5492537313433, "grad_norm": 29.635099411010742, "learning_rate": 9.812169312169313e-06, "loss": 29.2207, "step": 18839 }, { "epoch": 448.5731343283582, "grad_norm": 36.1382942199707, "learning_rate": 9.811640211640212e-06, "loss": 30.018, "step": 18840 }, { "epoch": 448.5970149253731, "grad_norm": 34.936832427978516, "learning_rate": 9.811111111111112e-06, "loss": 29.5068, "step": 18841 }, { "epoch": 448.6208955223881, "grad_norm": 37.43104934692383, "learning_rate": 9.810582010582011e-06, "loss": 29.5357, "step": 18842 }, { "epoch": 448.644776119403, "grad_norm": 38.44711685180664, "learning_rate": 9.81005291005291e-06, "loss": 29.2115, "step": 18843 }, { "epoch": 448.6686567164179, "grad_norm": 28.588653564453125, "learning_rate": 9.80952380952381e-06, "loss": 29.349, "step": 18844 }, { "epoch": 448.6925373134328, "grad_norm": 29.671405792236328, "learning_rate": 9.808994708994711e-06, "loss": 28.7659, "step": 18845 }, { "epoch": 448.7164179104478, "grad_norm": 26.597749710083008, "learning_rate": 9.80846560846561e-06, "loss": 29.1947, "step": 18846 }, { "epoch": 448.7402985074627, "grad_norm": 25.768972396850586, "learning_rate": 9.807936507936509e-06, "loss": 30.2264, "step": 18847 }, { "epoch": 448.7641791044776, "grad_norm": 32.354278564453125, "learning_rate": 9.807407407407407e-06, "loss": 29.8559, "step": 18848 }, { "epoch": 448.78805970149256, "grad_norm": 25.641151428222656, "learning_rate": 9.806878306878308e-06, "loss": 30.4412, "step": 18849 }, { "epoch": 448.81194029850747, "grad_norm": 32.82002639770508, "learning_rate": 9.806349206349207e-06, "loss": 29.512, "step": 18850 }, { "epoch": 448.8358208955224, "grad_norm": 27.287641525268555, "learning_rate": 9.805820105820106e-06, "loss": 29.7458, "step": 18851 }, { "epoch": 448.85970149253734, "grad_norm": 33.86443328857422, "learning_rate": 9.805291005291006e-06, "loss": 29.8252, "step": 18852 }, { "epoch": 448.88358208955225, "grad_norm": 31.047433853149414, "learning_rate": 9.804761904761907e-06, "loss": 29.4836, "step": 18853 }, { "epoch": 448.90746268656716, "grad_norm": 33.35688018798828, "learning_rate": 9.804232804232805e-06, "loss": 29.5944, "step": 18854 }, { "epoch": 448.93134328358207, "grad_norm": 27.1873779296875, "learning_rate": 9.803703703703704e-06, "loss": 30.3422, "step": 18855 }, { "epoch": 448.95522388059703, "grad_norm": 32.179019927978516, "learning_rate": 9.803174603174605e-06, "loss": 29.5344, "step": 18856 }, { "epoch": 448.97910447761194, "grad_norm": 25.708383560180664, "learning_rate": 9.802645502645504e-06, "loss": 31.3246, "step": 18857 }, { "epoch": 449.0, "grad_norm": 26.604280471801758, "learning_rate": 9.802116402116402e-06, "loss": 25.5377, "step": 18858 }, { "epoch": 449.0238805970149, "grad_norm": 31.397233963012695, "learning_rate": 9.801587301587301e-06, "loss": 30.5642, "step": 18859 }, { "epoch": 449.0477611940299, "grad_norm": 25.787208557128906, "learning_rate": 9.801058201058202e-06, "loss": 28.8864, "step": 18860 }, { "epoch": 449.0716417910448, "grad_norm": 25.823400497436523, "learning_rate": 9.800529100529102e-06, "loss": 29.7379, "step": 18861 }, { "epoch": 449.0955223880597, "grad_norm": 23.510948181152344, "learning_rate": 9.800000000000001e-06, "loss": 29.8785, "step": 18862 }, { "epoch": 449.1194029850746, "grad_norm": 22.849016189575195, "learning_rate": 9.7994708994709e-06, "loss": 30.8151, "step": 18863 }, { "epoch": 449.14328358208957, "grad_norm": 21.94300651550293, "learning_rate": 9.7989417989418e-06, "loss": 28.4248, "step": 18864 }, { "epoch": 449.1671641791045, "grad_norm": 24.85818099975586, "learning_rate": 9.7984126984127e-06, "loss": 30.4554, "step": 18865 }, { "epoch": 449.1910447761194, "grad_norm": 22.531618118286133, "learning_rate": 9.797883597883598e-06, "loss": 29.7083, "step": 18866 }, { "epoch": 449.21492537313435, "grad_norm": 19.220949172973633, "learning_rate": 9.797354497354497e-06, "loss": 28.4596, "step": 18867 }, { "epoch": 449.23880597014926, "grad_norm": 23.896013259887695, "learning_rate": 9.796825396825397e-06, "loss": 29.422, "step": 18868 }, { "epoch": 449.26268656716417, "grad_norm": 18.753400802612305, "learning_rate": 9.796296296296298e-06, "loss": 29.6429, "step": 18869 }, { "epoch": 449.28656716417913, "grad_norm": 22.286386489868164, "learning_rate": 9.795767195767197e-06, "loss": 29.7818, "step": 18870 }, { "epoch": 449.31044776119404, "grad_norm": 20.750408172607422, "learning_rate": 9.795238095238097e-06, "loss": 30.5447, "step": 18871 }, { "epoch": 449.33432835820895, "grad_norm": 19.726438522338867, "learning_rate": 9.794708994708996e-06, "loss": 30.5707, "step": 18872 }, { "epoch": 449.35820895522386, "grad_norm": 18.497900009155273, "learning_rate": 9.794179894179895e-06, "loss": 30.4066, "step": 18873 }, { "epoch": 449.3820895522388, "grad_norm": 21.51070785522461, "learning_rate": 9.793650793650794e-06, "loss": 30.5392, "step": 18874 }, { "epoch": 449.40597014925373, "grad_norm": 17.816740036010742, "learning_rate": 9.793121693121694e-06, "loss": 29.9868, "step": 18875 }, { "epoch": 449.42985074626864, "grad_norm": 18.465328216552734, "learning_rate": 9.792592592592593e-06, "loss": 29.2578, "step": 18876 }, { "epoch": 449.4537313432836, "grad_norm": 22.90824317932129, "learning_rate": 9.792063492063494e-06, "loss": 29.6319, "step": 18877 }, { "epoch": 449.4776119402985, "grad_norm": 19.82144546508789, "learning_rate": 9.791534391534392e-06, "loss": 29.9684, "step": 18878 }, { "epoch": 449.5014925373134, "grad_norm": 20.08651351928711, "learning_rate": 9.791005291005293e-06, "loss": 29.4972, "step": 18879 }, { "epoch": 449.52537313432833, "grad_norm": 20.742626190185547, "learning_rate": 9.790476190476192e-06, "loss": 30.0482, "step": 18880 }, { "epoch": 449.5492537313433, "grad_norm": 21.239789962768555, "learning_rate": 9.78994708994709e-06, "loss": 29.0164, "step": 18881 }, { "epoch": 449.5731343283582, "grad_norm": 18.97139549255371, "learning_rate": 9.78941798941799e-06, "loss": 28.9804, "step": 18882 }, { "epoch": 449.5970149253731, "grad_norm": 18.684490203857422, "learning_rate": 9.78888888888889e-06, "loss": 27.6919, "step": 18883 }, { "epoch": 449.6208955223881, "grad_norm": 21.324514389038086, "learning_rate": 9.788359788359789e-06, "loss": 29.3598, "step": 18884 }, { "epoch": 449.644776119403, "grad_norm": 20.929763793945312, "learning_rate": 9.78783068783069e-06, "loss": 30.2065, "step": 18885 }, { "epoch": 449.6686567164179, "grad_norm": 18.934946060180664, "learning_rate": 9.787301587301588e-06, "loss": 29.4551, "step": 18886 }, { "epoch": 449.6925373134328, "grad_norm": 23.363676071166992, "learning_rate": 9.786772486772489e-06, "loss": 28.6651, "step": 18887 }, { "epoch": 449.7164179104478, "grad_norm": 20.75950813293457, "learning_rate": 9.786243386243387e-06, "loss": 30.1084, "step": 18888 }, { "epoch": 449.7402985074627, "grad_norm": 18.916799545288086, "learning_rate": 9.785714285714286e-06, "loss": 30.6394, "step": 18889 }, { "epoch": 449.7641791044776, "grad_norm": 24.755535125732422, "learning_rate": 9.785185185185187e-06, "loss": 29.147, "step": 18890 }, { "epoch": 449.78805970149256, "grad_norm": 20.55992317199707, "learning_rate": 9.784656084656086e-06, "loss": 29.1261, "step": 18891 }, { "epoch": 449.81194029850747, "grad_norm": 21.978919982910156, "learning_rate": 9.784126984126984e-06, "loss": 30.0534, "step": 18892 }, { "epoch": 449.8358208955224, "grad_norm": 20.38338279724121, "learning_rate": 9.783597883597883e-06, "loss": 29.9411, "step": 18893 }, { "epoch": 449.85970149253734, "grad_norm": 24.43837547302246, "learning_rate": 9.783068783068784e-06, "loss": 29.4558, "step": 18894 }, { "epoch": 449.88358208955225, "grad_norm": 21.979455947875977, "learning_rate": 9.782539682539684e-06, "loss": 29.3048, "step": 18895 }, { "epoch": 449.90746268656716, "grad_norm": 21.548137664794922, "learning_rate": 9.782010582010583e-06, "loss": 29.6003, "step": 18896 }, { "epoch": 449.93134328358207, "grad_norm": 19.433612823486328, "learning_rate": 9.781481481481482e-06, "loss": 30.1635, "step": 18897 }, { "epoch": 449.95522388059703, "grad_norm": 22.34962272644043, "learning_rate": 9.780952380952382e-06, "loss": 29.6221, "step": 18898 }, { "epoch": 449.97910447761194, "grad_norm": 19.86094093322754, "learning_rate": 9.780423280423281e-06, "loss": 30.0172, "step": 18899 }, { "epoch": 450.0, "grad_norm": 16.426191329956055, "learning_rate": 9.77989417989418e-06, "loss": 25.9236, "step": 18900 }, { "epoch": 450.0, "step": 18900, "total_flos": 9.29092219417275e+17, "train_loss": 0.6629277442245887, "train_runtime": 12803.0873, "train_samples_per_second": 188.111, "train_steps_per_second": 1.476 } ], "logging_steps": 1.0, "max_steps": 18900, "num_input_tokens_seen": 0, "num_train_epochs": 450, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.29092219417275e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }