{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 310.0, "eval_steps": 25.0, "global_step": 13020, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.023880597014925373, "grad_norm": Infinity, "learning_rate": 1e-05, "loss": 76.8913, "step": 1 }, { "epoch": 0.04776119402985075, "grad_norm": 395.8459777832031, "learning_rate": 1e-05, "loss": 77.3235, "step": 2 }, { "epoch": 0.07164179104477612, "grad_norm": 161.73968505859375, "learning_rate": 9.988095238095239e-06, "loss": 67.3668, "step": 3 }, { "epoch": 0.0955223880597015, "grad_norm": 145.1616973876953, "learning_rate": 9.976190476190477e-06, "loss": 62.8542, "step": 4 }, { "epoch": 0.11940298507462686, "grad_norm": 52.10374069213867, "learning_rate": 9.964285714285714e-06, "loss": 59.8627, "step": 5 }, { "epoch": 0.14328358208955225, "grad_norm": 31.430763244628906, "learning_rate": 9.952380952380954e-06, "loss": 59.2378, "step": 6 }, { "epoch": 0.16716417910447762, "grad_norm": 37.42692947387695, "learning_rate": 9.940476190476192e-06, "loss": 58.3114, "step": 7 }, { "epoch": 0.191044776119403, "grad_norm": 32.284332275390625, "learning_rate": 9.92857142857143e-06, "loss": 57.3408, "step": 8 }, { "epoch": 0.21492537313432836, "grad_norm": 28.520849227905273, "learning_rate": 9.916666666666668e-06, "loss": 56.9256, "step": 9 }, { "epoch": 0.23880597014925373, "grad_norm": 40.480167388916016, "learning_rate": 9.904761904761906e-06, "loss": 57.4842, "step": 10 }, { "epoch": 0.2626865671641791, "grad_norm": 20.3512020111084, "learning_rate": 9.892857142857143e-06, "loss": 56.7597, "step": 11 }, { "epoch": 0.2865671641791045, "grad_norm": 22.308382034301758, "learning_rate": 9.880952380952381e-06, "loss": 56.594, "step": 12 }, { "epoch": 0.31044776119402984, "grad_norm": 14.086284637451172, "learning_rate": 9.869047619047621e-06, "loss": 56.6327, "step": 13 }, { "epoch": 0.33432835820895523, "grad_norm": NaN, "learning_rate": 9.857142857142859e-06, "loss": 63.0791, "step": 14 }, { "epoch": 0.3582089552238806, "grad_norm": 17.055604934692383, "learning_rate": 9.857142857142859e-06, "loss": 56.1551, "step": 15 }, { "epoch": 0.382089552238806, "grad_norm": 18.52287483215332, "learning_rate": 9.845238095238097e-06, "loss": 54.9502, "step": 16 }, { "epoch": 0.4059701492537313, "grad_norm": 32.66905212402344, "learning_rate": 9.833333333333333e-06, "loss": 55.6494, "step": 17 }, { "epoch": 0.4298507462686567, "grad_norm": 24.075742721557617, "learning_rate": 9.821428571428573e-06, "loss": 55.4766, "step": 18 }, { "epoch": 0.4537313432835821, "grad_norm": 31.505783081054688, "learning_rate": 9.80952380952381e-06, "loss": 55.1481, "step": 19 }, { "epoch": 0.47761194029850745, "grad_norm": 30.53020477294922, "learning_rate": 9.797619047619048e-06, "loss": 55.158, "step": 20 }, { "epoch": 0.5014925373134328, "grad_norm": 14.44444751739502, "learning_rate": 9.785714285714286e-06, "loss": 54.8286, "step": 21 }, { "epoch": 0.5253731343283582, "grad_norm": 53.601078033447266, "learning_rate": 9.773809523809524e-06, "loss": 55.1466, "step": 22 }, { "epoch": 0.5492537313432836, "grad_norm": 37.156028747558594, "learning_rate": 9.761904761904762e-06, "loss": 54.629, "step": 23 }, { "epoch": 0.573134328358209, "grad_norm": 41.84994125366211, "learning_rate": 9.75e-06, "loss": 55.2766, "step": 24 }, { "epoch": 0.5970149253731343, "grad_norm": 50.61705017089844, "learning_rate": 9.73809523809524e-06, "loss": 54.3497, "step": 25 }, { "epoch": 0.6208955223880597, "grad_norm": 16.964982986450195, "learning_rate": 9.726190476190477e-06, "loss": 55.1673, "step": 26 }, { "epoch": 0.6447761194029851, "grad_norm": 22.71157455444336, "learning_rate": 9.714285714285715e-06, "loss": 54.8001, "step": 27 }, { "epoch": 0.6686567164179105, "grad_norm": 16.372802734375, "learning_rate": 9.702380952380953e-06, "loss": 54.4506, "step": 28 }, { "epoch": 0.6925373134328359, "grad_norm": 13.21664047241211, "learning_rate": 9.690476190476191e-06, "loss": 53.0488, "step": 29 }, { "epoch": 0.7164179104477612, "grad_norm": NaN, "learning_rate": 9.678571428571429e-06, "loss": 74.7707, "step": 30 }, { "epoch": 0.7402985074626866, "grad_norm": 23.045652389526367, "learning_rate": 9.678571428571429e-06, "loss": 54.1909, "step": 31 }, { "epoch": 0.764179104477612, "grad_norm": 15.034178733825684, "learning_rate": 9.666666666666667e-06, "loss": 53.3253, "step": 32 }, { "epoch": 0.7880597014925373, "grad_norm": 14.148232460021973, "learning_rate": 9.654761904761906e-06, "loss": 53.693, "step": 33 }, { "epoch": 0.8119402985074626, "grad_norm": NaN, "learning_rate": 9.642857142857144e-06, "loss": 61.3517, "step": 34 }, { "epoch": 0.835820895522388, "grad_norm": 14.757994651794434, "learning_rate": 9.642857142857144e-06, "loss": 53.3175, "step": 35 }, { "epoch": 0.8597014925373134, "grad_norm": 10.875706672668457, "learning_rate": 9.630952380952382e-06, "loss": 54.2592, "step": 36 }, { "epoch": 0.8835820895522388, "grad_norm": 9.926539421081543, "learning_rate": 9.61904761904762e-06, "loss": 53.8721, "step": 37 }, { "epoch": 0.9074626865671642, "grad_norm": 17.697235107421875, "learning_rate": 9.607142857142858e-06, "loss": 54.2901, "step": 38 }, { "epoch": 0.9313432835820895, "grad_norm": 29.19430160522461, "learning_rate": 9.595238095238096e-06, "loss": 53.1261, "step": 39 }, { "epoch": 0.9552238805970149, "grad_norm": 12.3985595703125, "learning_rate": 9.583333333333335e-06, "loss": 53.9815, "step": 40 }, { "epoch": 0.9791044776119403, "grad_norm": 17.109691619873047, "learning_rate": 9.571428571428573e-06, "loss": 52.5838, "step": 41 }, { "epoch": 1.0, "grad_norm": 14.529239654541016, "learning_rate": 9.559523809523811e-06, "loss": 46.1888, "step": 42 }, { "epoch": 1.0238805970149254, "grad_norm": 15.683514595031738, "learning_rate": 9.547619047619049e-06, "loss": 52.2043, "step": 43 }, { "epoch": 1.0477611940298508, "grad_norm": 26.219507217407227, "learning_rate": 9.535714285714287e-06, "loss": 53.09, "step": 44 }, { "epoch": 1.0716417910447762, "grad_norm": 19.859697341918945, "learning_rate": 9.523809523809525e-06, "loss": 52.5858, "step": 45 }, { "epoch": 1.0955223880597016, "grad_norm": 11.090332984924316, "learning_rate": 9.511904761904763e-06, "loss": 53.7674, "step": 46 }, { "epoch": 1.1194029850746268, "grad_norm": 15.586993217468262, "learning_rate": 9.5e-06, "loss": 53.4901, "step": 47 }, { "epoch": 1.1432835820895522, "grad_norm": 22.734928131103516, "learning_rate": 9.488095238095238e-06, "loss": 53.6667, "step": 48 }, { "epoch": 1.1671641791044776, "grad_norm": 16.382047653198242, "learning_rate": 9.476190476190476e-06, "loss": 53.2914, "step": 49 }, { "epoch": 1.191044776119403, "grad_norm": 15.916092872619629, "learning_rate": 9.464285714285714e-06, "loss": 51.729, "step": 50 }, { "epoch": 1.2149253731343284, "grad_norm": NaN, "learning_rate": 9.452380952380952e-06, "loss": 66.3151, "step": 51 }, { "epoch": 1.2388059701492538, "grad_norm": 20.121395111083984, "learning_rate": 9.452380952380952e-06, "loss": 53.866, "step": 52 }, { "epoch": 1.2626865671641792, "grad_norm": NaN, "learning_rate": 9.440476190476192e-06, "loss": 67.3538, "step": 53 }, { "epoch": 1.2865671641791045, "grad_norm": 13.869222640991211, "learning_rate": 9.440476190476192e-06, "loss": 53.5431, "step": 54 }, { "epoch": 1.31044776119403, "grad_norm": 25.478107452392578, "learning_rate": 9.42857142857143e-06, "loss": 52.7126, "step": 55 }, { "epoch": 1.3343283582089551, "grad_norm": 35.76942825317383, "learning_rate": 9.416666666666667e-06, "loss": 53.8082, "step": 56 }, { "epoch": 1.3582089552238805, "grad_norm": 17.95109748840332, "learning_rate": 9.404761904761905e-06, "loss": 53.0275, "step": 57 }, { "epoch": 1.382089552238806, "grad_norm": 26.80129623413086, "learning_rate": 9.392857142857143e-06, "loss": 53.3295, "step": 58 }, { "epoch": 1.4059701492537313, "grad_norm": 25.803054809570312, "learning_rate": 9.380952380952381e-06, "loss": 52.3771, "step": 59 }, { "epoch": 1.4298507462686567, "grad_norm": 35.39850616455078, "learning_rate": 9.36904761904762e-06, "loss": 52.9467, "step": 60 }, { "epoch": 1.4537313432835821, "grad_norm": 27.43315887451172, "learning_rate": 9.357142857142859e-06, "loss": 53.1359, "step": 61 }, { "epoch": 1.4776119402985075, "grad_norm": 33.431400299072266, "learning_rate": 9.345238095238096e-06, "loss": 52.2006, "step": 62 }, { "epoch": 1.5014925373134327, "grad_norm": 33.08237075805664, "learning_rate": 9.333333333333334e-06, "loss": 52.6816, "step": 63 }, { "epoch": 1.5253731343283583, "grad_norm": 25.711997985839844, "learning_rate": 9.321428571428572e-06, "loss": 52.6113, "step": 64 }, { "epoch": 1.5492537313432835, "grad_norm": 32.688297271728516, "learning_rate": 9.30952380952381e-06, "loss": 51.9086, "step": 65 }, { "epoch": 1.573134328358209, "grad_norm": 31.856857299804688, "learning_rate": 9.297619047619048e-06, "loss": 53.1913, "step": 66 }, { "epoch": 1.5970149253731343, "grad_norm": 26.231773376464844, "learning_rate": 9.285714285714288e-06, "loss": 51.464, "step": 67 }, { "epoch": 1.6208955223880597, "grad_norm": 29.39109230041504, "learning_rate": 9.273809523809525e-06, "loss": 52.0572, "step": 68 }, { "epoch": 1.644776119402985, "grad_norm": 28.86277198791504, "learning_rate": 9.261904761904763e-06, "loss": 52.7115, "step": 69 }, { "epoch": 1.6686567164179105, "grad_norm": 28.82640266418457, "learning_rate": 9.250000000000001e-06, "loss": 53.1756, "step": 70 }, { "epoch": 1.6925373134328359, "grad_norm": 31.32577133178711, "learning_rate": 9.238095238095239e-06, "loss": 52.369, "step": 71 }, { "epoch": 1.716417910447761, "grad_norm": 18.739269256591797, "learning_rate": 9.226190476190477e-06, "loss": 52.6631, "step": 72 }, { "epoch": 1.7402985074626867, "grad_norm": 22.889320373535156, "learning_rate": 9.214285714285715e-06, "loss": 52.4786, "step": 73 }, { "epoch": 1.7641791044776118, "grad_norm": 24.175626754760742, "learning_rate": 9.202380952380953e-06, "loss": 52.1792, "step": 74 }, { "epoch": 1.7880597014925375, "grad_norm": 25.257095336914062, "learning_rate": 9.19047619047619e-06, "loss": 51.39, "step": 75 }, { "epoch": 1.8119402985074626, "grad_norm": 35.745208740234375, "learning_rate": 9.178571428571429e-06, "loss": 53.331, "step": 76 }, { "epoch": 1.835820895522388, "grad_norm": 23.815813064575195, "learning_rate": 9.166666666666666e-06, "loss": 52.7632, "step": 77 }, { "epoch": 1.8597014925373134, "grad_norm": 32.405757904052734, "learning_rate": 9.154761904761906e-06, "loss": 52.3165, "step": 78 }, { "epoch": 1.8835820895522388, "grad_norm": 38.95046615600586, "learning_rate": 9.142857142857144e-06, "loss": 52.0931, "step": 79 }, { "epoch": 1.9074626865671642, "grad_norm": 22.412342071533203, "learning_rate": 9.130952380952382e-06, "loss": 51.8732, "step": 80 }, { "epoch": 1.9313432835820894, "grad_norm": 35.088253021240234, "learning_rate": 9.11904761904762e-06, "loss": 52.0182, "step": 81 }, { "epoch": 1.955223880597015, "grad_norm": 20.136964797973633, "learning_rate": 9.107142857142858e-06, "loss": 52.7956, "step": 82 }, { "epoch": 1.9791044776119402, "grad_norm": 20.860034942626953, "learning_rate": 9.095238095238095e-06, "loss": 50.339, "step": 83 }, { "epoch": 2.0, "grad_norm": 20.38931655883789, "learning_rate": 9.083333333333333e-06, "loss": 45.3651, "step": 84 }, { "epoch": 2.023880597014925, "grad_norm": 21.519498825073242, "learning_rate": 9.071428571428573e-06, "loss": 52.1228, "step": 85 }, { "epoch": 2.047761194029851, "grad_norm": 30.47000503540039, "learning_rate": 9.05952380952381e-06, "loss": 51.1968, "step": 86 }, { "epoch": 2.071641791044776, "grad_norm": 19.728044509887695, "learning_rate": 9.047619047619049e-06, "loss": 52.1514, "step": 87 }, { "epoch": 2.0955223880597016, "grad_norm": 42.647281646728516, "learning_rate": 9.035714285714287e-06, "loss": 52.5597, "step": 88 }, { "epoch": 2.1194029850746268, "grad_norm": 55.45186233520508, "learning_rate": 9.023809523809524e-06, "loss": 53.7621, "step": 89 }, { "epoch": 2.1432835820895524, "grad_norm": 14.879026412963867, "learning_rate": 9.011904761904762e-06, "loss": 52.0743, "step": 90 }, { "epoch": 2.1671641791044776, "grad_norm": 48.55704116821289, "learning_rate": 9e-06, "loss": 52.0543, "step": 91 }, { "epoch": 2.191044776119403, "grad_norm": 37.96782302856445, "learning_rate": 8.98809523809524e-06, "loss": 52.4684, "step": 92 }, { "epoch": 2.2149253731343284, "grad_norm": 27.714475631713867, "learning_rate": 8.976190476190478e-06, "loss": 52.725, "step": 93 }, { "epoch": 2.2388059701492535, "grad_norm": 32.12433624267578, "learning_rate": 8.964285714285716e-06, "loss": 52.7161, "step": 94 }, { "epoch": 2.262686567164179, "grad_norm": 23.8153018951416, "learning_rate": 8.952380952380953e-06, "loss": 51.3124, "step": 95 }, { "epoch": 2.2865671641791043, "grad_norm": 31.269794464111328, "learning_rate": 8.940476190476191e-06, "loss": 51.9646, "step": 96 }, { "epoch": 2.31044776119403, "grad_norm": 16.611865997314453, "learning_rate": 8.92857142857143e-06, "loss": 51.8503, "step": 97 }, { "epoch": 2.334328358208955, "grad_norm": 26.69631004333496, "learning_rate": 8.916666666666667e-06, "loss": 52.4857, "step": 98 }, { "epoch": 2.3582089552238807, "grad_norm": 21.10638999938965, "learning_rate": 8.904761904761905e-06, "loss": 52.0022, "step": 99 }, { "epoch": 2.382089552238806, "grad_norm": 16.273351669311523, "learning_rate": 8.892857142857143e-06, "loss": 50.4367, "step": 100 }, { "epoch": 2.405970149253731, "grad_norm": 16.407167434692383, "learning_rate": 8.88095238095238e-06, "loss": 51.2079, "step": 101 }, { "epoch": 2.4298507462686567, "grad_norm": 16.22024154663086, "learning_rate": 8.869047619047619e-06, "loss": 50.4939, "step": 102 }, { "epoch": 2.4537313432835823, "grad_norm": 27.11235809326172, "learning_rate": 8.857142857142858e-06, "loss": 50.0872, "step": 103 }, { "epoch": 2.4776119402985075, "grad_norm": 18.912181854248047, "learning_rate": 8.845238095238096e-06, "loss": 51.8135, "step": 104 }, { "epoch": 2.5014925373134327, "grad_norm": 29.597028732299805, "learning_rate": 8.833333333333334e-06, "loss": 49.4789, "step": 105 }, { "epoch": 2.5253731343283583, "grad_norm": 30.51687240600586, "learning_rate": 8.821428571428572e-06, "loss": 52.5555, "step": 106 }, { "epoch": 2.5492537313432835, "grad_norm": 31.4583797454834, "learning_rate": 8.80952380952381e-06, "loss": 51.0073, "step": 107 }, { "epoch": 2.573134328358209, "grad_norm": 30.35653305053711, "learning_rate": 8.797619047619048e-06, "loss": 50.9501, "step": 108 }, { "epoch": 2.5970149253731343, "grad_norm": 24.041545867919922, "learning_rate": 8.785714285714286e-06, "loss": 49.5162, "step": 109 }, { "epoch": 2.62089552238806, "grad_norm": 23.52166175842285, "learning_rate": 8.773809523809525e-06, "loss": 52.9747, "step": 110 }, { "epoch": 2.644776119402985, "grad_norm": 28.871065139770508, "learning_rate": 8.761904761904763e-06, "loss": 50.2273, "step": 111 }, { "epoch": 2.6686567164179102, "grad_norm": 26.484140396118164, "learning_rate": 8.750000000000001e-06, "loss": 51.2286, "step": 112 }, { "epoch": 2.692537313432836, "grad_norm": 37.570743560791016, "learning_rate": 8.738095238095239e-06, "loss": 49.7131, "step": 113 }, { "epoch": 2.716417910447761, "grad_norm": 23.827178955078125, "learning_rate": 8.726190476190477e-06, "loss": 51.913, "step": 114 }, { "epoch": 2.7402985074626867, "grad_norm": 33.89924621582031, "learning_rate": 8.714285714285715e-06, "loss": 52.2382, "step": 115 }, { "epoch": 2.764179104477612, "grad_norm": 29.397851943969727, "learning_rate": 8.702380952380952e-06, "loss": 52.1548, "step": 116 }, { "epoch": 2.7880597014925375, "grad_norm": 28.73517417907715, "learning_rate": 8.690476190476192e-06, "loss": 51.2892, "step": 117 }, { "epoch": 2.8119402985074626, "grad_norm": 32.068138122558594, "learning_rate": 8.67857142857143e-06, "loss": 51.542, "step": 118 }, { "epoch": 2.835820895522388, "grad_norm": 22.76898956298828, "learning_rate": 8.666666666666668e-06, "loss": 50.373, "step": 119 }, { "epoch": 2.8597014925373134, "grad_norm": 33.528263092041016, "learning_rate": 8.654761904761906e-06, "loss": 51.3075, "step": 120 }, { "epoch": 2.883582089552239, "grad_norm": 21.655696868896484, "learning_rate": 8.642857142857144e-06, "loss": 51.1397, "step": 121 }, { "epoch": 2.9074626865671642, "grad_norm": 25.94880485534668, "learning_rate": 8.630952380952381e-06, "loss": 51.4326, "step": 122 }, { "epoch": 2.9313432835820894, "grad_norm": 36.14421844482422, "learning_rate": 8.61904761904762e-06, "loss": 50.9524, "step": 123 }, { "epoch": 2.955223880597015, "grad_norm": 28.361459732055664, "learning_rate": 8.607142857142859e-06, "loss": 51.3171, "step": 124 }, { "epoch": 2.97910447761194, "grad_norm": 30.784954071044922, "learning_rate": 8.595238095238097e-06, "loss": 49.9797, "step": 125 }, { "epoch": 3.0, "grad_norm": 22.98565101623535, "learning_rate": 8.583333333333333e-06, "loss": 44.3471, "step": 126 }, { "epoch": 3.023880597014925, "grad_norm": 25.601985931396484, "learning_rate": 8.571428571428571e-06, "loss": 51.6574, "step": 127 }, { "epoch": 3.047761194029851, "grad_norm": 27.648792266845703, "learning_rate": 8.55952380952381e-06, "loss": 51.3385, "step": 128 }, { "epoch": 3.071641791044776, "grad_norm": 18.773529052734375, "learning_rate": 8.547619047619048e-06, "loss": 51.1451, "step": 129 }, { "epoch": 3.0955223880597016, "grad_norm": 29.439353942871094, "learning_rate": 8.535714285714286e-06, "loss": 51.6092, "step": 130 }, { "epoch": 3.1194029850746268, "grad_norm": 32.41486740112305, "learning_rate": 8.523809523809524e-06, "loss": 50.9068, "step": 131 }, { "epoch": 3.1432835820895524, "grad_norm": 23.441896438598633, "learning_rate": 8.511904761904762e-06, "loss": 51.7453, "step": 132 }, { "epoch": 3.1671641791044776, "grad_norm": 29.218734741210938, "learning_rate": 8.5e-06, "loss": 49.9124, "step": 133 }, { "epoch": 3.191044776119403, "grad_norm": 20.988981246948242, "learning_rate": 8.488095238095238e-06, "loss": 50.9788, "step": 134 }, { "epoch": 3.2149253731343284, "grad_norm": 22.57052993774414, "learning_rate": 8.476190476190477e-06, "loss": 51.4228, "step": 135 }, { "epoch": 3.2388059701492535, "grad_norm": 26.112573623657227, "learning_rate": 8.464285714285715e-06, "loss": 50.3332, "step": 136 }, { "epoch": 3.262686567164179, "grad_norm": 23.8747615814209, "learning_rate": 8.452380952380953e-06, "loss": 51.1763, "step": 137 }, { "epoch": 3.2865671641791043, "grad_norm": 24.12811851501465, "learning_rate": 8.440476190476191e-06, "loss": 49.8539, "step": 138 }, { "epoch": 3.31044776119403, "grad_norm": 27.462984085083008, "learning_rate": 8.428571428571429e-06, "loss": 50.7766, "step": 139 }, { "epoch": 3.334328358208955, "grad_norm": 31.261472702026367, "learning_rate": 8.416666666666667e-06, "loss": 49.7599, "step": 140 }, { "epoch": 3.3582089552238807, "grad_norm": 21.049545288085938, "learning_rate": 8.404761904761905e-06, "loss": 49.6827, "step": 141 }, { "epoch": 3.382089552238806, "grad_norm": 30.103389739990234, "learning_rate": 8.392857142857144e-06, "loss": 49.3866, "step": 142 }, { "epoch": 3.405970149253731, "grad_norm": 31.348888397216797, "learning_rate": 8.380952380952382e-06, "loss": 51.4607, "step": 143 }, { "epoch": 3.4298507462686567, "grad_norm": 28.910200119018555, "learning_rate": 8.36904761904762e-06, "loss": 51.2337, "step": 144 }, { "epoch": 3.4537313432835823, "grad_norm": 21.00281524658203, "learning_rate": 8.357142857142858e-06, "loss": 50.9557, "step": 145 }, { "epoch": 3.4776119402985075, "grad_norm": 45.842002868652344, "learning_rate": 8.345238095238096e-06, "loss": 49.7377, "step": 146 }, { "epoch": 3.5014925373134327, "grad_norm": 30.77996253967285, "learning_rate": 8.333333333333334e-06, "loss": 51.1234, "step": 147 }, { "epoch": 3.5253731343283583, "grad_norm": 31.492767333984375, "learning_rate": 8.321428571428573e-06, "loss": 50.5733, "step": 148 }, { "epoch": 3.5492537313432835, "grad_norm": 36.57206344604492, "learning_rate": 8.309523809523811e-06, "loss": 50.6762, "step": 149 }, { "epoch": 3.573134328358209, "grad_norm": 33.86347198486328, "learning_rate": 8.297619047619049e-06, "loss": 50.8281, "step": 150 }, { "epoch": 3.5970149253731343, "grad_norm": 30.812152862548828, "learning_rate": 8.285714285714287e-06, "loss": 50.6509, "step": 151 }, { "epoch": 3.62089552238806, "grad_norm": 24.536882400512695, "learning_rate": 8.273809523809523e-06, "loss": 50.1112, "step": 152 }, { "epoch": 3.644776119402985, "grad_norm": 29.8430233001709, "learning_rate": 8.261904761904763e-06, "loss": 50.846, "step": 153 }, { "epoch": 3.6686567164179102, "grad_norm": 26.18596076965332, "learning_rate": 8.25e-06, "loss": 50.3806, "step": 154 }, { "epoch": 3.692537313432836, "grad_norm": 38.75019836425781, "learning_rate": 8.238095238095239e-06, "loss": 49.8915, "step": 155 }, { "epoch": 3.716417910447761, "grad_norm": 34.30149841308594, "learning_rate": 8.226190476190476e-06, "loss": 50.7886, "step": 156 }, { "epoch": 3.7402985074626867, "grad_norm": 33.179298400878906, "learning_rate": 8.214285714285714e-06, "loss": 50.8175, "step": 157 }, { "epoch": 3.764179104477612, "grad_norm": 34.90909957885742, "learning_rate": 8.202380952380952e-06, "loss": 50.3521, "step": 158 }, { "epoch": 3.7880597014925375, "grad_norm": 33.2717399597168, "learning_rate": 8.190476190476192e-06, "loss": 51.2006, "step": 159 }, { "epoch": 3.8119402985074626, "grad_norm": 33.082672119140625, "learning_rate": 8.17857142857143e-06, "loss": 49.5627, "step": 160 }, { "epoch": 3.835820895522388, "grad_norm": 23.65228843688965, "learning_rate": 8.166666666666668e-06, "loss": 49.9631, "step": 161 }, { "epoch": 3.8597014925373134, "grad_norm": 37.3172492980957, "learning_rate": 8.154761904761905e-06, "loss": 50.7175, "step": 162 }, { "epoch": 3.883582089552239, "grad_norm": 29.369930267333984, "learning_rate": 8.142857142857143e-06, "loss": 51.1435, "step": 163 }, { "epoch": 3.9074626865671642, "grad_norm": 28.807470321655273, "learning_rate": 8.130952380952381e-06, "loss": 50.3349, "step": 164 }, { "epoch": 3.9313432835820894, "grad_norm": 33.90628433227539, "learning_rate": 8.119047619047619e-06, "loss": 50.7241, "step": 165 }, { "epoch": 3.955223880597015, "grad_norm": 21.72952651977539, "learning_rate": 8.107142857142859e-06, "loss": 49.9013, "step": 166 }, { "epoch": 3.97910447761194, "grad_norm": 26.831520080566406, "learning_rate": 8.095238095238097e-06, "loss": 51.0161, "step": 167 }, { "epoch": 4.0, "grad_norm": 24.49069595336914, "learning_rate": 8.083333333333334e-06, "loss": 44.6758, "step": 168 }, { "epoch": 4.023880597014926, "grad_norm": 36.32711410522461, "learning_rate": 8.071428571428572e-06, "loss": 49.8601, "step": 169 }, { "epoch": 4.04776119402985, "grad_norm": 29.862812042236328, "learning_rate": 8.05952380952381e-06, "loss": 51.712, "step": 170 }, { "epoch": 4.071641791044776, "grad_norm": 40.245887756347656, "learning_rate": 8.047619047619048e-06, "loss": 50.3353, "step": 171 }, { "epoch": 4.095522388059702, "grad_norm": 34.22684097290039, "learning_rate": 8.035714285714286e-06, "loss": 50.6474, "step": 172 }, { "epoch": 4.119402985074627, "grad_norm": 36.754669189453125, "learning_rate": 8.023809523809526e-06, "loss": 50.1623, "step": 173 }, { "epoch": 4.143283582089552, "grad_norm": 35.76541519165039, "learning_rate": 8.011904761904763e-06, "loss": 50.2426, "step": 174 }, { "epoch": 4.167164179104478, "grad_norm": 25.851362228393555, "learning_rate": 8.000000000000001e-06, "loss": 49.9525, "step": 175 }, { "epoch": 4.191044776119403, "grad_norm": 24.48278045654297, "learning_rate": 7.98809523809524e-06, "loss": 49.1466, "step": 176 }, { "epoch": 4.214925373134328, "grad_norm": 28.79146385192871, "learning_rate": 7.976190476190477e-06, "loss": 49.9365, "step": 177 }, { "epoch": 4.2388059701492535, "grad_norm": 29.29482650756836, "learning_rate": 7.964285714285715e-06, "loss": 50.7427, "step": 178 }, { "epoch": 4.262686567164179, "grad_norm": 23.50571060180664, "learning_rate": 7.952380952380953e-06, "loss": 49.7287, "step": 179 }, { "epoch": 4.286567164179105, "grad_norm": 27.805828094482422, "learning_rate": 7.94047619047619e-06, "loss": 50.4316, "step": 180 }, { "epoch": 4.3104477611940295, "grad_norm": 28.323888778686523, "learning_rate": 7.928571428571429e-06, "loss": 50.0263, "step": 181 }, { "epoch": 4.334328358208955, "grad_norm": 25.43438148498535, "learning_rate": 7.916666666666667e-06, "loss": 49.426, "step": 182 }, { "epoch": 4.358208955223881, "grad_norm": 22.169496536254883, "learning_rate": 7.904761904761904e-06, "loss": 51.1048, "step": 183 }, { "epoch": 4.382089552238806, "grad_norm": 33.660545349121094, "learning_rate": 7.892857142857144e-06, "loss": 49.7654, "step": 184 }, { "epoch": 4.405970149253731, "grad_norm": 24.276273727416992, "learning_rate": 7.880952380952382e-06, "loss": 50.4976, "step": 185 }, { "epoch": 4.429850746268657, "grad_norm": 41.48741149902344, "learning_rate": 7.86904761904762e-06, "loss": 52.0386, "step": 186 }, { "epoch": 4.453731343283582, "grad_norm": 25.86789894104004, "learning_rate": 7.857142857142858e-06, "loss": 49.129, "step": 187 }, { "epoch": 4.477611940298507, "grad_norm": 26.607038497924805, "learning_rate": 7.845238095238096e-06, "loss": 49.3561, "step": 188 }, { "epoch": 4.501492537313433, "grad_norm": 43.54303741455078, "learning_rate": 7.833333333333333e-06, "loss": 50.1143, "step": 189 }, { "epoch": 4.525373134328358, "grad_norm": 45.6146354675293, "learning_rate": 7.821428571428571e-06, "loss": 49.3217, "step": 190 }, { "epoch": 4.549253731343284, "grad_norm": 24.00080680847168, "learning_rate": 7.809523809523811e-06, "loss": 50.484, "step": 191 }, { "epoch": 4.573134328358209, "grad_norm": 29.736740112304688, "learning_rate": 7.797619047619049e-06, "loss": 49.748, "step": 192 }, { "epoch": 4.597014925373134, "grad_norm": 33.08702850341797, "learning_rate": 7.785714285714287e-06, "loss": 50.2142, "step": 193 }, { "epoch": 4.62089552238806, "grad_norm": 19.16411018371582, "learning_rate": 7.773809523809525e-06, "loss": 50.1073, "step": 194 }, { "epoch": 4.6447761194029855, "grad_norm": 32.145721435546875, "learning_rate": 7.761904761904762e-06, "loss": 48.5769, "step": 195 }, { "epoch": 4.66865671641791, "grad_norm": 38.768341064453125, "learning_rate": 7.75e-06, "loss": 49.681, "step": 196 }, { "epoch": 4.692537313432836, "grad_norm": 26.108245849609375, "learning_rate": 7.738095238095238e-06, "loss": 49.9193, "step": 197 }, { "epoch": 4.7164179104477615, "grad_norm": 28.86294174194336, "learning_rate": 7.726190476190478e-06, "loss": 50.4584, "step": 198 }, { "epoch": 4.740298507462686, "grad_norm": 31.089380264282227, "learning_rate": 7.714285714285716e-06, "loss": 50.7873, "step": 199 }, { "epoch": 4.764179104477612, "grad_norm": 22.934032440185547, "learning_rate": 7.702380952380954e-06, "loss": 50.611, "step": 200 }, { "epoch": 4.7880597014925375, "grad_norm": 27.986371994018555, "learning_rate": 7.690476190476191e-06, "loss": 49.275, "step": 201 }, { "epoch": 4.811940298507462, "grad_norm": 23.44196319580078, "learning_rate": 7.67857142857143e-06, "loss": 50.2035, "step": 202 }, { "epoch": 4.835820895522388, "grad_norm": 22.05059242248535, "learning_rate": 7.666666666666667e-06, "loss": 48.9595, "step": 203 }, { "epoch": 4.859701492537313, "grad_norm": 29.709396362304688, "learning_rate": 7.654761904761905e-06, "loss": 50.5343, "step": 204 }, { "epoch": 4.883582089552239, "grad_norm": 23.702781677246094, "learning_rate": 7.642857142857143e-06, "loss": 50.2627, "step": 205 }, { "epoch": 4.907462686567165, "grad_norm": 20.144807815551758, "learning_rate": 7.630952380952381e-06, "loss": 51.0125, "step": 206 }, { "epoch": 4.931343283582089, "grad_norm": 28.83676528930664, "learning_rate": 7.61904761904762e-06, "loss": 50.5985, "step": 207 }, { "epoch": 4.955223880597015, "grad_norm": 34.40160369873047, "learning_rate": 7.6071428571428575e-06, "loss": 49.6469, "step": 208 }, { "epoch": 4.979104477611941, "grad_norm": 26.982925415039062, "learning_rate": 7.595238095238095e-06, "loss": 50.1666, "step": 209 }, { "epoch": 5.0, "grad_norm": 19.569746017456055, "learning_rate": 7.583333333333333e-06, "loss": 43.6715, "step": 210 }, { "epoch": 5.023880597014926, "grad_norm": 23.753328323364258, "learning_rate": 7.571428571428572e-06, "loss": 49.9273, "step": 211 }, { "epoch": 5.04776119402985, "grad_norm": 22.463659286499023, "learning_rate": 7.55952380952381e-06, "loss": 48.8499, "step": 212 }, { "epoch": 5.071641791044776, "grad_norm": 24.507875442504883, "learning_rate": 7.547619047619048e-06, "loss": 49.3275, "step": 213 }, { "epoch": 5.095522388059702, "grad_norm": 21.727603912353516, "learning_rate": 7.5357142857142865e-06, "loss": 49.1879, "step": 214 }, { "epoch": 5.119402985074627, "grad_norm": 26.122251510620117, "learning_rate": 7.523809523809524e-06, "loss": 50.1094, "step": 215 }, { "epoch": 5.143283582089552, "grad_norm": 24.142263412475586, "learning_rate": 7.511904761904762e-06, "loss": 50.2708, "step": 216 }, { "epoch": 5.167164179104478, "grad_norm": 22.762237548828125, "learning_rate": 7.500000000000001e-06, "loss": 50.441, "step": 217 }, { "epoch": 5.191044776119403, "grad_norm": 35.74570846557617, "learning_rate": 7.488095238095239e-06, "loss": 48.5121, "step": 218 }, { "epoch": 5.214925373134328, "grad_norm": 30.92180824279785, "learning_rate": 7.476190476190477e-06, "loss": 49.4257, "step": 219 }, { "epoch": 5.2388059701492535, "grad_norm": 26.90997314453125, "learning_rate": 7.464285714285715e-06, "loss": 50.9712, "step": 220 }, { "epoch": 5.262686567164179, "grad_norm": 35.544700622558594, "learning_rate": 7.4523809523809534e-06, "loss": 49.7908, "step": 221 }, { "epoch": 5.286567164179105, "grad_norm": 33.78145217895508, "learning_rate": 7.440476190476191e-06, "loss": 49.2105, "step": 222 }, { "epoch": 5.3104477611940295, "grad_norm": 32.16508102416992, "learning_rate": 7.428571428571429e-06, "loss": 49.8545, "step": 223 }, { "epoch": 5.334328358208955, "grad_norm": 30.4263973236084, "learning_rate": 7.416666666666668e-06, "loss": 50.0994, "step": 224 }, { "epoch": 5.358208955223881, "grad_norm": 25.801084518432617, "learning_rate": 7.404761904761906e-06, "loss": 49.6227, "step": 225 }, { "epoch": 5.382089552238806, "grad_norm": 27.16851234436035, "learning_rate": 7.392857142857144e-06, "loss": 50.0005, "step": 226 }, { "epoch": 5.405970149253731, "grad_norm": 30.102867126464844, "learning_rate": 7.380952380952382e-06, "loss": 50.7114, "step": 227 }, { "epoch": 5.429850746268657, "grad_norm": 26.032968521118164, "learning_rate": 7.36904761904762e-06, "loss": 48.99, "step": 228 }, { "epoch": 5.453731343283582, "grad_norm": 24.074424743652344, "learning_rate": 7.357142857142858e-06, "loss": 49.6965, "step": 229 }, { "epoch": 5.477611940298507, "grad_norm": 24.5870361328125, "learning_rate": 7.345238095238096e-06, "loss": 48.8593, "step": 230 }, { "epoch": 5.501492537313433, "grad_norm": 22.831932067871094, "learning_rate": 7.333333333333333e-06, "loss": 49.9975, "step": 231 }, { "epoch": 5.525373134328358, "grad_norm": 26.88197135925293, "learning_rate": 7.321428571428572e-06, "loss": 49.5131, "step": 232 }, { "epoch": 5.549253731343284, "grad_norm": 28.986154556274414, "learning_rate": 7.30952380952381e-06, "loss": 48.9042, "step": 233 }, { "epoch": 5.573134328358209, "grad_norm": 17.220605850219727, "learning_rate": 7.297619047619048e-06, "loss": 49.453, "step": 234 }, { "epoch": 5.597014925373134, "grad_norm": 25.110107421875, "learning_rate": 7.285714285714286e-06, "loss": 49.8196, "step": 235 }, { "epoch": 5.62089552238806, "grad_norm": 34.680870056152344, "learning_rate": 7.273809523809524e-06, "loss": 49.9709, "step": 236 }, { "epoch": 5.6447761194029855, "grad_norm": 24.10121726989746, "learning_rate": 7.261904761904762e-06, "loss": 49.4685, "step": 237 }, { "epoch": 5.66865671641791, "grad_norm": 28.65550422668457, "learning_rate": 7.25e-06, "loss": 50.3439, "step": 238 }, { "epoch": 5.692537313432836, "grad_norm": 27.49604606628418, "learning_rate": 7.238095238095239e-06, "loss": 50.0419, "step": 239 }, { "epoch": 5.7164179104477615, "grad_norm": 22.843509674072266, "learning_rate": 7.226190476190477e-06, "loss": 49.0357, "step": 240 }, { "epoch": 5.740298507462686, "grad_norm": 36.56801223754883, "learning_rate": 7.2142857142857145e-06, "loss": 49.4478, "step": 241 }, { "epoch": 5.764179104477612, "grad_norm": 38.233734130859375, "learning_rate": 7.202380952380953e-06, "loss": 50.4473, "step": 242 }, { "epoch": 5.7880597014925375, "grad_norm": 29.198333740234375, "learning_rate": 7.190476190476191e-06, "loss": 49.8598, "step": 243 }, { "epoch": 5.811940298507462, "grad_norm": 34.49404525756836, "learning_rate": 7.178571428571429e-06, "loss": 49.1441, "step": 244 }, { "epoch": 5.835820895522388, "grad_norm": 35.568359375, "learning_rate": 7.166666666666667e-06, "loss": 49.9402, "step": 245 }, { "epoch": 5.859701492537313, "grad_norm": 31.041446685791016, "learning_rate": 7.154761904761906e-06, "loss": 50.1265, "step": 246 }, { "epoch": 5.883582089552239, "grad_norm": 48.34186935424805, "learning_rate": 7.1428571428571436e-06, "loss": 50.7649, "step": 247 }, { "epoch": 5.907462686567165, "grad_norm": 39.171661376953125, "learning_rate": 7.1309523809523814e-06, "loss": 48.943, "step": 248 }, { "epoch": 5.931343283582089, "grad_norm": 28.724523544311523, "learning_rate": 7.11904761904762e-06, "loss": 50.8039, "step": 249 }, { "epoch": 5.955223880597015, "grad_norm": 36.57830810546875, "learning_rate": 7.107142857142858e-06, "loss": 50.2311, "step": 250 }, { "epoch": 5.979104477611941, "grad_norm": 39.91551971435547, "learning_rate": 7.095238095238096e-06, "loss": 49.1617, "step": 251 }, { "epoch": 6.0, "grad_norm": 17.104145050048828, "learning_rate": 7.083333333333335e-06, "loss": 42.8003, "step": 252 }, { "epoch": 6.023880597014926, "grad_norm": 33.03441619873047, "learning_rate": 7.0714285714285726e-06, "loss": 48.2896, "step": 253 }, { "epoch": 6.04776119402985, "grad_norm": 26.487470626831055, "learning_rate": 7.0595238095238105e-06, "loss": 49.205, "step": 254 }, { "epoch": 6.071641791044776, "grad_norm": 26.752981185913086, "learning_rate": 7.047619047619048e-06, "loss": 50.3943, "step": 255 }, { "epoch": 6.095522388059702, "grad_norm": 22.44376564025879, "learning_rate": 7.035714285714287e-06, "loss": 49.285, "step": 256 }, { "epoch": 6.119402985074627, "grad_norm": 31.066368103027344, "learning_rate": 7.023809523809524e-06, "loss": 49.3131, "step": 257 }, { "epoch": 6.143283582089552, "grad_norm": 28.67262840270996, "learning_rate": 7.011904761904762e-06, "loss": 50.6188, "step": 258 }, { "epoch": 6.167164179104478, "grad_norm": 24.013134002685547, "learning_rate": 7e-06, "loss": 50.4382, "step": 259 }, { "epoch": 6.191044776119403, "grad_norm": 26.5673828125, "learning_rate": 6.988095238095239e-06, "loss": 49.7058, "step": 260 }, { "epoch": 6.214925373134328, "grad_norm": 20.803695678710938, "learning_rate": 6.9761904761904765e-06, "loss": 48.9389, "step": 261 }, { "epoch": 6.2388059701492535, "grad_norm": 23.450183868408203, "learning_rate": 6.964285714285714e-06, "loss": 49.0091, "step": 262 }, { "epoch": 6.262686567164179, "grad_norm": 36.94446563720703, "learning_rate": 6.952380952380952e-06, "loss": 50.2589, "step": 263 }, { "epoch": 6.286567164179105, "grad_norm": 39.548095703125, "learning_rate": 6.940476190476191e-06, "loss": 49.3129, "step": 264 }, { "epoch": 6.3104477611940295, "grad_norm": 30.536083221435547, "learning_rate": 6.928571428571429e-06, "loss": 49.1838, "step": 265 }, { "epoch": 6.334328358208955, "grad_norm": 27.97296714782715, "learning_rate": 6.916666666666667e-06, "loss": 50.3184, "step": 266 }, { "epoch": 6.358208955223881, "grad_norm": 25.69655418395996, "learning_rate": 6.9047619047619055e-06, "loss": 49.2226, "step": 267 }, { "epoch": 6.382089552238806, "grad_norm": 22.114097595214844, "learning_rate": 6.892857142857143e-06, "loss": 49.4455, "step": 268 }, { "epoch": 6.405970149253731, "grad_norm": 30.47511100769043, "learning_rate": 6.880952380952381e-06, "loss": 49.7409, "step": 269 }, { "epoch": 6.429850746268657, "grad_norm": 26.32929039001465, "learning_rate": 6.86904761904762e-06, "loss": 50.3336, "step": 270 }, { "epoch": 6.453731343283582, "grad_norm": 28.09309196472168, "learning_rate": 6.857142857142858e-06, "loss": 49.6044, "step": 271 }, { "epoch": 6.477611940298507, "grad_norm": 25.840974807739258, "learning_rate": 6.845238095238096e-06, "loss": 49.9185, "step": 272 }, { "epoch": 6.501492537313433, "grad_norm": 31.89126205444336, "learning_rate": 6.833333333333334e-06, "loss": 48.2732, "step": 273 }, { "epoch": 6.525373134328358, "grad_norm": 24.013029098510742, "learning_rate": 6.8214285714285724e-06, "loss": 49.9752, "step": 274 }, { "epoch": 6.549253731343284, "grad_norm": 25.509836196899414, "learning_rate": 6.80952380952381e-06, "loss": 50.5493, "step": 275 }, { "epoch": 6.573134328358209, "grad_norm": 35.25442886352539, "learning_rate": 6.797619047619048e-06, "loss": 49.2553, "step": 276 }, { "epoch": 6.597014925373134, "grad_norm": 29.42585563659668, "learning_rate": 6.785714285714287e-06, "loss": 48.776, "step": 277 }, { "epoch": 6.62089552238806, "grad_norm": 25.90894889831543, "learning_rate": 6.773809523809525e-06, "loss": 49.1964, "step": 278 }, { "epoch": 6.6447761194029855, "grad_norm": 25.63600730895996, "learning_rate": 6.761904761904763e-06, "loss": 48.4528, "step": 279 }, { "epoch": 6.66865671641791, "grad_norm": 29.943740844726562, "learning_rate": 6.750000000000001e-06, "loss": 49.9026, "step": 280 }, { "epoch": 6.692537313432836, "grad_norm": 33.253910064697266, "learning_rate": 6.738095238095239e-06, "loss": 49.2364, "step": 281 }, { "epoch": 6.7164179104477615, "grad_norm": 23.465354919433594, "learning_rate": 6.726190476190477e-06, "loss": 49.2759, "step": 282 }, { "epoch": 6.740298507462686, "grad_norm": 31.023218154907227, "learning_rate": 6.714285714285714e-06, "loss": 49.3256, "step": 283 }, { "epoch": 6.764179104477612, "grad_norm": 32.376991271972656, "learning_rate": 6.702380952380952e-06, "loss": 47.7239, "step": 284 }, { "epoch": 6.7880597014925375, "grad_norm": 18.388896942138672, "learning_rate": 6.690476190476191e-06, "loss": 49.5751, "step": 285 }, { "epoch": 6.811940298507462, "grad_norm": 22.1639404296875, "learning_rate": 6.678571428571429e-06, "loss": 48.8153, "step": 286 }, { "epoch": 6.835820895522388, "grad_norm": 31.39455223083496, "learning_rate": 6.666666666666667e-06, "loss": 49.7872, "step": 287 }, { "epoch": 6.859701492537313, "grad_norm": 36.480533599853516, "learning_rate": 6.654761904761905e-06, "loss": 48.9679, "step": 288 }, { "epoch": 6.883582089552239, "grad_norm": 23.432872772216797, "learning_rate": 6.642857142857143e-06, "loss": 48.4627, "step": 289 }, { "epoch": 6.907462686567165, "grad_norm": 33.31097412109375, "learning_rate": 6.630952380952381e-06, "loss": 49.617, "step": 290 }, { "epoch": 6.931343283582089, "grad_norm": 34.07685852050781, "learning_rate": 6.619047619047619e-06, "loss": 49.3365, "step": 291 }, { "epoch": 6.955223880597015, "grad_norm": 25.68811798095703, "learning_rate": 6.607142857142858e-06, "loss": 49.7821, "step": 292 }, { "epoch": 6.979104477611941, "grad_norm": 24.179588317871094, "learning_rate": 6.595238095238096e-06, "loss": 48.5466, "step": 293 }, { "epoch": 7.0, "grad_norm": 26.158781051635742, "learning_rate": 6.5833333333333335e-06, "loss": 43.2838, "step": 294 }, { "epoch": 7.023880597014926, "grad_norm": 22.84689712524414, "learning_rate": 6.571428571428572e-06, "loss": 49.3253, "step": 295 }, { "epoch": 7.04776119402985, "grad_norm": NaN, "learning_rate": 6.55952380952381e-06, "loss": 75.2786, "step": 296 }, { "epoch": 7.071641791044776, "grad_norm": 31.13886070251465, "learning_rate": 6.55952380952381e-06, "loss": 49.984, "step": 297 }, { "epoch": 7.095522388059702, "grad_norm": 32.37982940673828, "learning_rate": 6.547619047619048e-06, "loss": 49.6632, "step": 298 }, { "epoch": 7.119402985074627, "grad_norm": 22.977916717529297, "learning_rate": 6.535714285714286e-06, "loss": 48.7802, "step": 299 }, { "epoch": 7.143283582089552, "grad_norm": NaN, "learning_rate": 6.523809523809525e-06, "loss": 60.3381, "step": 300 }, { "epoch": 7.167164179104478, "grad_norm": 32.18650817871094, "learning_rate": 6.523809523809525e-06, "loss": 49.2689, "step": 301 }, { "epoch": 7.191044776119403, "grad_norm": 30.0800724029541, "learning_rate": 6.5119047619047626e-06, "loss": 49.3891, "step": 302 }, { "epoch": 7.214925373134328, "grad_norm": 32.35110855102539, "learning_rate": 6.5000000000000004e-06, "loss": 48.4497, "step": 303 }, { "epoch": 7.2388059701492535, "grad_norm": 34.08786392211914, "learning_rate": 6.488095238095239e-06, "loss": 49.1321, "step": 304 }, { "epoch": 7.262686567164179, "grad_norm": 25.25969696044922, "learning_rate": 6.476190476190477e-06, "loss": 49.0524, "step": 305 }, { "epoch": 7.286567164179105, "grad_norm": 25.843929290771484, "learning_rate": 6.464285714285715e-06, "loss": 49.8077, "step": 306 }, { "epoch": 7.3104477611940295, "grad_norm": 34.57284927368164, "learning_rate": 6.452380952380954e-06, "loss": 49.5393, "step": 307 }, { "epoch": 7.334328358208955, "grad_norm": 33.44814682006836, "learning_rate": 6.4404761904761916e-06, "loss": 49.0375, "step": 308 }, { "epoch": 7.358208955223881, "grad_norm": 25.127429962158203, "learning_rate": 6.4285714285714295e-06, "loss": 48.8145, "step": 309 }, { "epoch": 7.382089552238806, "grad_norm": 31.81999969482422, "learning_rate": 6.416666666666667e-06, "loss": 49.6432, "step": 310 }, { "epoch": 7.405970149253731, "grad_norm": 22.428335189819336, "learning_rate": 6.404761904761904e-06, "loss": 47.6232, "step": 311 }, { "epoch": 7.429850746268657, "grad_norm": 45.87803268432617, "learning_rate": 6.392857142857143e-06, "loss": 48.3479, "step": 312 }, { "epoch": 7.453731343283582, "grad_norm": 37.441253662109375, "learning_rate": 6.380952380952381e-06, "loss": 48.593, "step": 313 }, { "epoch": 7.477611940298507, "grad_norm": 23.15785789489746, "learning_rate": 6.369047619047619e-06, "loss": 49.1204, "step": 314 }, { "epoch": 7.501492537313433, "grad_norm": 35.8905029296875, "learning_rate": 6.357142857142858e-06, "loss": 49.2918, "step": 315 }, { "epoch": 7.525373134328358, "grad_norm": 37.41954040527344, "learning_rate": 6.3452380952380955e-06, "loss": 47.7495, "step": 316 }, { "epoch": 7.549253731343284, "grad_norm": 31.173114776611328, "learning_rate": 6.333333333333333e-06, "loss": 49.539, "step": 317 }, { "epoch": 7.573134328358209, "grad_norm": 23.941965103149414, "learning_rate": 6.321428571428571e-06, "loss": 49.0958, "step": 318 }, { "epoch": 7.597014925373134, "grad_norm": 31.949769973754883, "learning_rate": 6.30952380952381e-06, "loss": 49.1945, "step": 319 }, { "epoch": 7.62089552238806, "grad_norm": 21.299409866333008, "learning_rate": 6.297619047619048e-06, "loss": 49.3823, "step": 320 }, { "epoch": 7.6447761194029855, "grad_norm": 34.93647766113281, "learning_rate": 6.285714285714286e-06, "loss": 48.8867, "step": 321 }, { "epoch": 7.66865671641791, "grad_norm": 30.189655303955078, "learning_rate": 6.2738095238095245e-06, "loss": 49.8644, "step": 322 }, { "epoch": 7.692537313432836, "grad_norm": 19.964523315429688, "learning_rate": 6.261904761904762e-06, "loss": 49.6489, "step": 323 }, { "epoch": 7.7164179104477615, "grad_norm": 22.253337860107422, "learning_rate": 6.25e-06, "loss": 48.0582, "step": 324 }, { "epoch": 7.740298507462686, "grad_norm": 26.631391525268555, "learning_rate": 6.238095238095239e-06, "loss": 48.5585, "step": 325 }, { "epoch": 7.764179104477612, "grad_norm": 26.0469913482666, "learning_rate": 6.226190476190477e-06, "loss": 49.4969, "step": 326 }, { "epoch": 7.7880597014925375, "grad_norm": 30.000507354736328, "learning_rate": 6.214285714285715e-06, "loss": 49.6044, "step": 327 }, { "epoch": 7.811940298507462, "grad_norm": 29.44800567626953, "learning_rate": 6.202380952380953e-06, "loss": 50.3622, "step": 328 }, { "epoch": 7.835820895522388, "grad_norm": 24.83717918395996, "learning_rate": 6.1904761904761914e-06, "loss": 50.0974, "step": 329 }, { "epoch": 7.859701492537313, "grad_norm": 30.0760555267334, "learning_rate": 6.178571428571429e-06, "loss": 48.9307, "step": 330 }, { "epoch": 7.883582089552239, "grad_norm": 21.087966918945312, "learning_rate": 6.166666666666667e-06, "loss": 49.3432, "step": 331 }, { "epoch": 7.907462686567165, "grad_norm": 23.193716049194336, "learning_rate": 6.154761904761906e-06, "loss": 48.6664, "step": 332 }, { "epoch": 7.931343283582089, "grad_norm": 22.764123916625977, "learning_rate": 6.142857142857144e-06, "loss": 49.3497, "step": 333 }, { "epoch": 7.955223880597015, "grad_norm": 22.411897659301758, "learning_rate": 6.130952380952382e-06, "loss": 49.4106, "step": 334 }, { "epoch": 7.979104477611941, "grad_norm": 29.535375595092773, "learning_rate": 6.11904761904762e-06, "loss": 49.0695, "step": 335 }, { "epoch": 8.0, "grad_norm": 21.094457626342773, "learning_rate": 6.107142857142858e-06, "loss": 42.1367, "step": 336 }, { "epoch": 8.023880597014925, "grad_norm": 33.74859619140625, "learning_rate": 6.095238095238096e-06, "loss": 47.0065, "step": 337 }, { "epoch": 8.047761194029851, "grad_norm": 32.539127349853516, "learning_rate": 6.083333333333333e-06, "loss": 47.9697, "step": 338 }, { "epoch": 8.071641791044776, "grad_norm": 19.168655395507812, "learning_rate": 6.071428571428571e-06, "loss": 49.4919, "step": 339 }, { "epoch": 8.0955223880597, "grad_norm": 30.041269302368164, "learning_rate": 6.05952380952381e-06, "loss": 48.7887, "step": 340 }, { "epoch": 8.119402985074627, "grad_norm": 21.070598602294922, "learning_rate": 6.047619047619048e-06, "loss": 48.5064, "step": 341 }, { "epoch": 8.143283582089552, "grad_norm": 29.560287475585938, "learning_rate": 6.035714285714286e-06, "loss": 47.7472, "step": 342 }, { "epoch": 8.167164179104478, "grad_norm": 24.256393432617188, "learning_rate": 6.023809523809524e-06, "loss": 48.8917, "step": 343 }, { "epoch": 8.191044776119403, "grad_norm": 29.970674514770508, "learning_rate": 6.011904761904762e-06, "loss": 48.3464, "step": 344 }, { "epoch": 8.214925373134328, "grad_norm": 25.274595260620117, "learning_rate": 6e-06, "loss": 49.1565, "step": 345 }, { "epoch": 8.238805970149254, "grad_norm": 24.000280380249023, "learning_rate": 5.988095238095238e-06, "loss": 49.3396, "step": 346 }, { "epoch": 8.26268656716418, "grad_norm": 25.110261917114258, "learning_rate": 5.976190476190477e-06, "loss": 49.786, "step": 347 }, { "epoch": 8.286567164179104, "grad_norm": 26.188514709472656, "learning_rate": 5.964285714285715e-06, "loss": 50.1652, "step": 348 }, { "epoch": 8.31044776119403, "grad_norm": 18.536714553833008, "learning_rate": 5.9523809523809525e-06, "loss": 49.7224, "step": 349 }, { "epoch": 8.334328358208955, "grad_norm": 33.79502868652344, "learning_rate": 5.940476190476191e-06, "loss": 48.2923, "step": 350 }, { "epoch": 8.35820895522388, "grad_norm": 33.03609085083008, "learning_rate": 5.928571428571429e-06, "loss": 49.128, "step": 351 }, { "epoch": 8.382089552238806, "grad_norm": 23.88555145263672, "learning_rate": 5.916666666666667e-06, "loss": 49.6072, "step": 352 }, { "epoch": 8.405970149253731, "grad_norm": 29.688135147094727, "learning_rate": 5.904761904761905e-06, "loss": 49.0984, "step": 353 }, { "epoch": 8.429850746268656, "grad_norm": 23.166162490844727, "learning_rate": 5.892857142857144e-06, "loss": 48.8104, "step": 354 }, { "epoch": 8.453731343283582, "grad_norm": 27.68876838684082, "learning_rate": 5.8809523809523816e-06, "loss": 48.7745, "step": 355 }, { "epoch": 8.477611940298507, "grad_norm": 26.520286560058594, "learning_rate": 5.8690476190476194e-06, "loss": 47.883, "step": 356 }, { "epoch": 8.501492537313434, "grad_norm": 28.830135345458984, "learning_rate": 5.857142857142858e-06, "loss": 49.1347, "step": 357 }, { "epoch": 8.525373134328358, "grad_norm": 27.387250900268555, "learning_rate": 5.845238095238096e-06, "loss": 48.2092, "step": 358 }, { "epoch": 8.549253731343283, "grad_norm": 23.53616714477539, "learning_rate": 5.833333333333334e-06, "loss": 48.437, "step": 359 }, { "epoch": 8.57313432835821, "grad_norm": 25.665664672851562, "learning_rate": 5.821428571428573e-06, "loss": 49.3006, "step": 360 }, { "epoch": 8.597014925373134, "grad_norm": 24.35331153869629, "learning_rate": 5.8095238095238106e-06, "loss": 49.5249, "step": 361 }, { "epoch": 8.620895522388059, "grad_norm": 28.612688064575195, "learning_rate": 5.7976190476190485e-06, "loss": 50.1344, "step": 362 }, { "epoch": 8.644776119402986, "grad_norm": 25.055545806884766, "learning_rate": 5.785714285714286e-06, "loss": 48.6014, "step": 363 }, { "epoch": 8.66865671641791, "grad_norm": 27.645490646362305, "learning_rate": 5.773809523809523e-06, "loss": 48.953, "step": 364 }, { "epoch": 8.692537313432837, "grad_norm": 26.791471481323242, "learning_rate": 5.761904761904762e-06, "loss": 49.5912, "step": 365 }, { "epoch": 8.716417910447761, "grad_norm": 27.57213592529297, "learning_rate": 5.75e-06, "loss": 48.9958, "step": 366 }, { "epoch": 8.740298507462686, "grad_norm": 20.936344146728516, "learning_rate": 5.738095238095238e-06, "loss": 48.3449, "step": 367 }, { "epoch": 8.764179104477613, "grad_norm": 31.695810317993164, "learning_rate": 5.726190476190477e-06, "loss": 49.1015, "step": 368 }, { "epoch": 8.788059701492537, "grad_norm": 31.584064483642578, "learning_rate": 5.7142857142857145e-06, "loss": 48.8249, "step": 369 }, { "epoch": 8.811940298507462, "grad_norm": 30.70412826538086, "learning_rate": 5.702380952380952e-06, "loss": 49.2984, "step": 370 }, { "epoch": 8.835820895522389, "grad_norm": 36.31315231323242, "learning_rate": 5.690476190476191e-06, "loss": 48.6769, "step": 371 }, { "epoch": 8.859701492537313, "grad_norm": 28.98838996887207, "learning_rate": 5.678571428571429e-06, "loss": 50.2101, "step": 372 }, { "epoch": 8.883582089552238, "grad_norm": 29.07052230834961, "learning_rate": 5.666666666666667e-06, "loss": 49.9206, "step": 373 }, { "epoch": 8.907462686567165, "grad_norm": 31.653087615966797, "learning_rate": 5.654761904761905e-06, "loss": 48.3035, "step": 374 }, { "epoch": 8.93134328358209, "grad_norm": 27.019704818725586, "learning_rate": 5.6428571428571435e-06, "loss": 48.6833, "step": 375 }, { "epoch": 8.955223880597014, "grad_norm": 30.919578552246094, "learning_rate": 5.630952380952381e-06, "loss": 47.7973, "step": 376 }, { "epoch": 8.97910447761194, "grad_norm": 28.002975463867188, "learning_rate": 5.619047619047619e-06, "loss": 49.5539, "step": 377 }, { "epoch": 9.0, "grad_norm": 27.587263107299805, "learning_rate": 5.607142857142858e-06, "loss": 42.9343, "step": 378 }, { "epoch": 9.023880597014925, "grad_norm": 31.024024963378906, "learning_rate": 5.595238095238096e-06, "loss": 48.6774, "step": 379 }, { "epoch": 9.047761194029851, "grad_norm": 27.262426376342773, "learning_rate": 5.583333333333334e-06, "loss": 47.8833, "step": 380 }, { "epoch": 9.071641791044776, "grad_norm": 29.223133087158203, "learning_rate": 5.571428571428572e-06, "loss": 47.1563, "step": 381 }, { "epoch": 9.0955223880597, "grad_norm": 21.004749298095703, "learning_rate": 5.5595238095238104e-06, "loss": 49.0407, "step": 382 }, { "epoch": 9.119402985074627, "grad_norm": 25.157907485961914, "learning_rate": 5.547619047619048e-06, "loss": 48.5233, "step": 383 }, { "epoch": 9.143283582089552, "grad_norm": 17.611478805541992, "learning_rate": 5.535714285714286e-06, "loss": 47.4846, "step": 384 }, { "epoch": 9.167164179104478, "grad_norm": 21.00395965576172, "learning_rate": 5.523809523809525e-06, "loss": 49.7533, "step": 385 }, { "epoch": 9.191044776119403, "grad_norm": 22.07697296142578, "learning_rate": 5.511904761904763e-06, "loss": 48.5003, "step": 386 }, { "epoch": 9.214925373134328, "grad_norm": 21.743778228759766, "learning_rate": 5.500000000000001e-06, "loss": 48.149, "step": 387 }, { "epoch": 9.238805970149254, "grad_norm": 23.499980926513672, "learning_rate": 5.4880952380952394e-06, "loss": 48.2213, "step": 388 }, { "epoch": 9.26268656716418, "grad_norm": 22.22580337524414, "learning_rate": 5.476190476190477e-06, "loss": 48.4671, "step": 389 }, { "epoch": 9.286567164179104, "grad_norm": 26.5915470123291, "learning_rate": 5.464285714285714e-06, "loss": 49.2343, "step": 390 }, { "epoch": 9.31044776119403, "grad_norm": 22.510892868041992, "learning_rate": 5.452380952380952e-06, "loss": 48.9363, "step": 391 }, { "epoch": 9.334328358208955, "grad_norm": 27.17405128479004, "learning_rate": 5.44047619047619e-06, "loss": 49.1814, "step": 392 }, { "epoch": 9.35820895522388, "grad_norm": 29.143529891967773, "learning_rate": 5.428571428571429e-06, "loss": 48.4786, "step": 393 }, { "epoch": 9.382089552238806, "grad_norm": 20.24784278869629, "learning_rate": 5.416666666666667e-06, "loss": 49.2987, "step": 394 }, { "epoch": 9.405970149253731, "grad_norm": 31.44426155090332, "learning_rate": 5.404761904761905e-06, "loss": 49.9466, "step": 395 }, { "epoch": 9.429850746268656, "grad_norm": 23.775951385498047, "learning_rate": 5.392857142857143e-06, "loss": 49.1681, "step": 396 }, { "epoch": 9.453731343283582, "grad_norm": 22.168636322021484, "learning_rate": 5.380952380952381e-06, "loss": 48.8523, "step": 397 }, { "epoch": 9.477611940298507, "grad_norm": 20.944936752319336, "learning_rate": 5.369047619047619e-06, "loss": 48.7369, "step": 398 }, { "epoch": 9.501492537313434, "grad_norm": 23.880292892456055, "learning_rate": 5.357142857142857e-06, "loss": 48.4703, "step": 399 }, { "epoch": 9.525373134328358, "grad_norm": 25.316978454589844, "learning_rate": 5.345238095238096e-06, "loss": 48.3752, "step": 400 }, { "epoch": 9.549253731343283, "grad_norm": 24.398311614990234, "learning_rate": 5.333333333333334e-06, "loss": 47.532, "step": 401 }, { "epoch": 9.57313432835821, "grad_norm": 23.157140731811523, "learning_rate": 5.3214285714285715e-06, "loss": 49.1824, "step": 402 }, { "epoch": 9.597014925373134, "grad_norm": 21.641061782836914, "learning_rate": 5.30952380952381e-06, "loss": 49.6601, "step": 403 }, { "epoch": 9.620895522388059, "grad_norm": 23.863712310791016, "learning_rate": 5.297619047619048e-06, "loss": 49.2146, "step": 404 }, { "epoch": 9.644776119402986, "grad_norm": 21.876007080078125, "learning_rate": 5.285714285714286e-06, "loss": 48.0027, "step": 405 }, { "epoch": 9.66865671641791, "grad_norm": 25.783042907714844, "learning_rate": 5.273809523809525e-06, "loss": 48.2702, "step": 406 }, { "epoch": 9.692537313432837, "grad_norm": 18.782087326049805, "learning_rate": 5.261904761904763e-06, "loss": 48.9365, "step": 407 }, { "epoch": 9.716417910447761, "grad_norm": 20.206588745117188, "learning_rate": 5.2500000000000006e-06, "loss": 49.4144, "step": 408 }, { "epoch": 9.740298507462686, "grad_norm": 20.98710823059082, "learning_rate": 5.2380952380952384e-06, "loss": 49.7442, "step": 409 }, { "epoch": 9.764179104477613, "grad_norm": 19.24452018737793, "learning_rate": 5.226190476190477e-06, "loss": 49.0249, "step": 410 }, { "epoch": 9.788059701492537, "grad_norm": 23.18075180053711, "learning_rate": 5.214285714285715e-06, "loss": 48.8795, "step": 411 }, { "epoch": 9.811940298507462, "grad_norm": 17.233261108398438, "learning_rate": 5.202380952380953e-06, "loss": 49.2985, "step": 412 }, { "epoch": 9.835820895522389, "grad_norm": 24.74007797241211, "learning_rate": 5.190476190476192e-06, "loss": 48.8793, "step": 413 }, { "epoch": 9.859701492537313, "grad_norm": 20.26863670349121, "learning_rate": 5.1785714285714296e-06, "loss": 49.6989, "step": 414 }, { "epoch": 9.883582089552238, "grad_norm": 26.168167114257812, "learning_rate": 5.1666666666666675e-06, "loss": 48.7413, "step": 415 }, { "epoch": 9.907462686567165, "grad_norm": 29.008501052856445, "learning_rate": 5.1547619047619045e-06, "loss": 48.7414, "step": 416 }, { "epoch": 9.93134328358209, "grad_norm": 18.459829330444336, "learning_rate": 5.142857142857142e-06, "loss": 47.7865, "step": 417 }, { "epoch": 9.955223880597014, "grad_norm": 20.898181915283203, "learning_rate": 5.130952380952381e-06, "loss": 47.7274, "step": 418 }, { "epoch": 9.97910447761194, "grad_norm": 23.5065860748291, "learning_rate": 5.119047619047619e-06, "loss": 48.471, "step": 419 }, { "epoch": 10.0, "grad_norm": 23.147043228149414, "learning_rate": 5.107142857142857e-06, "loss": 42.3971, "step": 420 }, { "epoch": 10.023880597014925, "grad_norm": 28.423707962036133, "learning_rate": 5.095238095238096e-06, "loss": 49.4977, "step": 421 }, { "epoch": 10.047761194029851, "grad_norm": 22.017820358276367, "learning_rate": 5.0833333333333335e-06, "loss": 47.0638, "step": 422 }, { "epoch": 10.071641791044776, "grad_norm": 18.173845291137695, "learning_rate": 5.071428571428571e-06, "loss": 48.338, "step": 423 }, { "epoch": 10.0955223880597, "grad_norm": 17.628551483154297, "learning_rate": 5.05952380952381e-06, "loss": 48.2847, "step": 424 }, { "epoch": 10.119402985074627, "grad_norm": 19.974040985107422, "learning_rate": 5.047619047619048e-06, "loss": 49.2284, "step": 425 }, { "epoch": 10.143283582089552, "grad_norm": 22.45549774169922, "learning_rate": 5.035714285714286e-06, "loss": 49.6345, "step": 426 }, { "epoch": 10.167164179104478, "grad_norm": 21.609479904174805, "learning_rate": 5.023809523809524e-06, "loss": 48.2098, "step": 427 }, { "epoch": 10.191044776119403, "grad_norm": 24.7137451171875, "learning_rate": 5.0119047619047625e-06, "loss": 47.9527, "step": 428 }, { "epoch": 10.214925373134328, "grad_norm": 22.888975143432617, "learning_rate": 5e-06, "loss": 49.781, "step": 429 }, { "epoch": 10.238805970149254, "grad_norm": 25.53217124938965, "learning_rate": 4.988095238095238e-06, "loss": 48.9902, "step": 430 }, { "epoch": 10.26268656716418, "grad_norm": 27.80384063720703, "learning_rate": 4.976190476190477e-06, "loss": 48.2545, "step": 431 }, { "epoch": 10.286567164179104, "grad_norm": 21.421342849731445, "learning_rate": 4.964285714285715e-06, "loss": 49.1483, "step": 432 }, { "epoch": 10.31044776119403, "grad_norm": 26.178152084350586, "learning_rate": 4.952380952380953e-06, "loss": 49.1129, "step": 433 }, { "epoch": 10.334328358208955, "grad_norm": 27.993371963500977, "learning_rate": 4.940476190476191e-06, "loss": 48.1783, "step": 434 }, { "epoch": 10.35820895522388, "grad_norm": 26.75821876525879, "learning_rate": 4.928571428571429e-06, "loss": 48.1773, "step": 435 }, { "epoch": 10.382089552238806, "grad_norm": 25.641353607177734, "learning_rate": 4.9166666666666665e-06, "loss": 48.9295, "step": 436 }, { "epoch": 10.405970149253731, "grad_norm": 23.26271629333496, "learning_rate": 4.904761904761905e-06, "loss": 49.5486, "step": 437 }, { "epoch": 10.429850746268656, "grad_norm": 23.637466430664062, "learning_rate": 4.892857142857143e-06, "loss": 48.1263, "step": 438 }, { "epoch": 10.453731343283582, "grad_norm": 29.285432815551758, "learning_rate": 4.880952380952381e-06, "loss": 48.2424, "step": 439 }, { "epoch": 10.477611940298507, "grad_norm": 29.91914939880371, "learning_rate": 4.86904761904762e-06, "loss": 48.3695, "step": 440 }, { "epoch": 10.501492537313434, "grad_norm": 25.249099731445312, "learning_rate": 4.857142857142858e-06, "loss": 48.3644, "step": 441 }, { "epoch": 10.525373134328358, "grad_norm": 22.37591552734375, "learning_rate": 4.8452380952380955e-06, "loss": 49.397, "step": 442 }, { "epoch": 10.549253731343283, "grad_norm": 22.805437088012695, "learning_rate": 4.833333333333333e-06, "loss": 48.6522, "step": 443 }, { "epoch": 10.57313432835821, "grad_norm": 21.229095458984375, "learning_rate": 4.821428571428572e-06, "loss": 47.6681, "step": 444 }, { "epoch": 10.597014925373134, "grad_norm": 23.359468460083008, "learning_rate": 4.80952380952381e-06, "loss": 48.602, "step": 445 }, { "epoch": 10.620895522388059, "grad_norm": 20.953310012817383, "learning_rate": 4.797619047619048e-06, "loss": 49.3366, "step": 446 }, { "epoch": 10.644776119402986, "grad_norm": 21.970388412475586, "learning_rate": 4.785714285714287e-06, "loss": 46.964, "step": 447 }, { "epoch": 10.66865671641791, "grad_norm": 24.282426834106445, "learning_rate": 4.7738095238095245e-06, "loss": 48.2676, "step": 448 }, { "epoch": 10.692537313432837, "grad_norm": 15.47967529296875, "learning_rate": 4.761904761904762e-06, "loss": 48.1993, "step": 449 }, { "epoch": 10.716417910447761, "grad_norm": 23.230947494506836, "learning_rate": 4.75e-06, "loss": 48.5229, "step": 450 }, { "epoch": 10.740298507462686, "grad_norm": 20.514225006103516, "learning_rate": 4.738095238095238e-06, "loss": 48.062, "step": 451 }, { "epoch": 10.764179104477613, "grad_norm": 19.060667037963867, "learning_rate": 4.726190476190476e-06, "loss": 48.3893, "step": 452 }, { "epoch": 10.788059701492537, "grad_norm": 29.78558349609375, "learning_rate": 4.714285714285715e-06, "loss": 48.9921, "step": 453 }, { "epoch": 10.811940298507462, "grad_norm": 23.262001037597656, "learning_rate": 4.702380952380953e-06, "loss": 48.5597, "step": 454 }, { "epoch": 10.835820895522389, "grad_norm": 25.83403778076172, "learning_rate": 4.6904761904761905e-06, "loss": 49.2911, "step": 455 }, { "epoch": 10.859701492537313, "grad_norm": 21.846391677856445, "learning_rate": 4.678571428571429e-06, "loss": 47.3256, "step": 456 }, { "epoch": 10.883582089552238, "grad_norm": 17.09532356262207, "learning_rate": 4.666666666666667e-06, "loss": 48.3647, "step": 457 }, { "epoch": 10.907462686567165, "grad_norm": 31.050525665283203, "learning_rate": 4.654761904761905e-06, "loss": 48.3605, "step": 458 }, { "epoch": 10.93134328358209, "grad_norm": 22.532379150390625, "learning_rate": 4.642857142857144e-06, "loss": 49.0826, "step": 459 }, { "epoch": 10.955223880597014, "grad_norm": 23.585033416748047, "learning_rate": 4.630952380952382e-06, "loss": 48.5111, "step": 460 }, { "epoch": 10.97910447761194, "grad_norm": NaN, "learning_rate": 4.6190476190476196e-06, "loss": 66.9717, "step": 461 }, { "epoch": 11.0, "grad_norm": 24.73590087890625, "learning_rate": 4.6190476190476196e-06, "loss": 41.9122, "step": 462 }, { "epoch": 11.023880597014925, "grad_norm": 27.4709415435791, "learning_rate": 4.6071428571428574e-06, "loss": 48.4682, "step": 463 }, { "epoch": 11.047761194029851, "grad_norm": 26.158245086669922, "learning_rate": 4.595238095238095e-06, "loss": 48.1845, "step": 464 }, { "epoch": 11.071641791044776, "grad_norm": 25.14693260192871, "learning_rate": 4.583333333333333e-06, "loss": 48.4229, "step": 465 }, { "epoch": 11.0955223880597, "grad_norm": 22.229764938354492, "learning_rate": 4.571428571428572e-06, "loss": 47.8876, "step": 466 }, { "epoch": 11.119402985074627, "grad_norm": 24.202686309814453, "learning_rate": 4.55952380952381e-06, "loss": 48.4304, "step": 467 }, { "epoch": 11.143283582089552, "grad_norm": 21.449726104736328, "learning_rate": 4.547619047619048e-06, "loss": 47.6457, "step": 468 }, { "epoch": 11.167164179104478, "grad_norm": 23.769763946533203, "learning_rate": 4.5357142857142865e-06, "loss": 49.1031, "step": 469 }, { "epoch": 11.191044776119403, "grad_norm": 21.20684814453125, "learning_rate": 4.523809523809524e-06, "loss": 47.6488, "step": 470 }, { "epoch": 11.214925373134328, "grad_norm": 17.992631912231445, "learning_rate": 4.511904761904762e-06, "loss": 47.9435, "step": 471 }, { "epoch": 11.238805970149254, "grad_norm": 22.017776489257812, "learning_rate": 4.5e-06, "loss": 48.5224, "step": 472 }, { "epoch": 11.26268656716418, "grad_norm": 22.98673439025879, "learning_rate": 4.488095238095239e-06, "loss": 47.9258, "step": 473 }, { "epoch": 11.286567164179104, "grad_norm": 16.146743774414062, "learning_rate": 4.476190476190477e-06, "loss": 48.3957, "step": 474 }, { "epoch": 11.31044776119403, "grad_norm": 23.30071258544922, "learning_rate": 4.464285714285715e-06, "loss": 48.5472, "step": 475 }, { "epoch": 11.334328358208955, "grad_norm": 24.949913024902344, "learning_rate": 4.4523809523809525e-06, "loss": 48.2387, "step": 476 }, { "epoch": 11.35820895522388, "grad_norm": 23.10662841796875, "learning_rate": 4.44047619047619e-06, "loss": 49.0681, "step": 477 }, { "epoch": 11.382089552238806, "grad_norm": 19.024614334106445, "learning_rate": 4.428571428571429e-06, "loss": 49.3255, "step": 478 }, { "epoch": 11.405970149253731, "grad_norm": 22.34437370300293, "learning_rate": 4.416666666666667e-06, "loss": 47.0069, "step": 479 }, { "epoch": 11.429850746268656, "grad_norm": 23.563596725463867, "learning_rate": 4.404761904761905e-06, "loss": 46.8188, "step": 480 }, { "epoch": 11.453731343283582, "grad_norm": 20.5488338470459, "learning_rate": 4.392857142857143e-06, "loss": 47.8277, "step": 481 }, { "epoch": 11.477611940298507, "grad_norm": 18.416519165039062, "learning_rate": 4.3809523809523815e-06, "loss": 48.2203, "step": 482 }, { "epoch": 11.501492537313434, "grad_norm": 28.21132469177246, "learning_rate": 4.369047619047619e-06, "loss": 48.0691, "step": 483 }, { "epoch": 11.525373134328358, "grad_norm": 21.36182975769043, "learning_rate": 4.357142857142857e-06, "loss": 48.273, "step": 484 }, { "epoch": 11.549253731343283, "grad_norm": 25.726530075073242, "learning_rate": 4.345238095238096e-06, "loss": 48.7529, "step": 485 }, { "epoch": 11.57313432835821, "grad_norm": 21.686412811279297, "learning_rate": 4.333333333333334e-06, "loss": 48.3005, "step": 486 }, { "epoch": 11.597014925373134, "grad_norm": 20.56638526916504, "learning_rate": 4.321428571428572e-06, "loss": 50.1248, "step": 487 }, { "epoch": 11.620895522388059, "grad_norm": 24.193323135375977, "learning_rate": 4.30952380952381e-06, "loss": 48.6031, "step": 488 }, { "epoch": 11.644776119402986, "grad_norm": 17.18548583984375, "learning_rate": 4.297619047619048e-06, "loss": 49.2039, "step": 489 }, { "epoch": 11.66865671641791, "grad_norm": 19.07050895690918, "learning_rate": 4.2857142857142855e-06, "loss": 48.0961, "step": 490 }, { "epoch": 11.692537313432837, "grad_norm": 19.831188201904297, "learning_rate": 4.273809523809524e-06, "loss": 48.5481, "step": 491 }, { "epoch": 11.716417910447761, "grad_norm": 23.408592224121094, "learning_rate": 4.261904761904762e-06, "loss": 48.583, "step": 492 }, { "epoch": 11.740298507462686, "grad_norm": 22.152788162231445, "learning_rate": 4.25e-06, "loss": 48.6684, "step": 493 }, { "epoch": 11.764179104477613, "grad_norm": NaN, "learning_rate": 4.238095238095239e-06, "loss": 54.7097, "step": 494 }, { "epoch": 11.788059701492537, "grad_norm": 23.1225528717041, "learning_rate": 4.238095238095239e-06, "loss": 48.5439, "step": 495 }, { "epoch": 11.811940298507462, "grad_norm": 24.673904418945312, "learning_rate": 4.226190476190477e-06, "loss": 48.2645, "step": 496 }, { "epoch": 11.835820895522389, "grad_norm": 23.318784713745117, "learning_rate": 4.2142857142857145e-06, "loss": 47.9159, "step": 497 }, { "epoch": 11.859701492537313, "grad_norm": 24.62889289855957, "learning_rate": 4.202380952380952e-06, "loss": 48.1392, "step": 498 }, { "epoch": 11.883582089552238, "grad_norm": 17.315168380737305, "learning_rate": 4.190476190476191e-06, "loss": 49.399, "step": 499 }, { "epoch": 11.907462686567165, "grad_norm": 24.458532333374023, "learning_rate": 4.178571428571429e-06, "loss": 49.2189, "step": 500 }, { "epoch": 11.93134328358209, "grad_norm": 28.294036865234375, "learning_rate": 4.166666666666667e-06, "loss": 48.4759, "step": 501 }, { "epoch": 11.955223880597014, "grad_norm": 22.393577575683594, "learning_rate": 4.154761904761906e-06, "loss": 48.9718, "step": 502 }, { "epoch": 11.97910447761194, "grad_norm": 20.199522018432617, "learning_rate": 4.1428571428571435e-06, "loss": 47.5364, "step": 503 }, { "epoch": 12.0, "grad_norm": 22.080204010009766, "learning_rate": 4.130952380952381e-06, "loss": 42.5308, "step": 504 }, { "epoch": 12.023880597014925, "grad_norm": 28.897024154663086, "learning_rate": 4.119047619047619e-06, "loss": 48.9022, "step": 505 }, { "epoch": 12.047761194029851, "grad_norm": 28.31342887878418, "learning_rate": 4.107142857142857e-06, "loss": 47.6489, "step": 506 }, { "epoch": 12.071641791044776, "grad_norm": 22.62079620361328, "learning_rate": 4.095238095238096e-06, "loss": 48.1606, "step": 507 }, { "epoch": 12.0955223880597, "grad_norm": 33.49858474731445, "learning_rate": 4.083333333333334e-06, "loss": 47.8462, "step": 508 }, { "epoch": 12.119402985074627, "grad_norm": 22.20858383178711, "learning_rate": 4.071428571428572e-06, "loss": 47.2505, "step": 509 }, { "epoch": 12.143283582089552, "grad_norm": 25.425495147705078, "learning_rate": 4.0595238095238095e-06, "loss": 48.6289, "step": 510 }, { "epoch": 12.167164179104478, "grad_norm": 29.32784652709961, "learning_rate": 4.047619047619048e-06, "loss": 47.7772, "step": 511 }, { "epoch": 12.191044776119403, "grad_norm": 20.661781311035156, "learning_rate": 4.035714285714286e-06, "loss": 47.1414, "step": 512 }, { "epoch": 12.214925373134328, "grad_norm": 31.4210205078125, "learning_rate": 4.023809523809524e-06, "loss": 47.4312, "step": 513 }, { "epoch": 12.238805970149254, "grad_norm": 32.390071868896484, "learning_rate": 4.011904761904763e-06, "loss": 49.9899, "step": 514 }, { "epoch": 12.26268656716418, "grad_norm": 17.431835174560547, "learning_rate": 4.000000000000001e-06, "loss": 48.8975, "step": 515 }, { "epoch": 12.286567164179104, "grad_norm": 29.32766342163086, "learning_rate": 3.9880952380952386e-06, "loss": 48.8764, "step": 516 }, { "epoch": 12.31044776119403, "grad_norm": 29.523069381713867, "learning_rate": 3.9761904761904764e-06, "loss": 48.2602, "step": 517 }, { "epoch": 12.334328358208955, "grad_norm": 23.866840362548828, "learning_rate": 3.964285714285714e-06, "loss": 47.4016, "step": 518 }, { "epoch": 12.35820895522388, "grad_norm": 27.464962005615234, "learning_rate": 3.952380952380952e-06, "loss": 48.2559, "step": 519 }, { "epoch": 12.382089552238806, "grad_norm": 19.796552658081055, "learning_rate": 3.940476190476191e-06, "loss": 48.7665, "step": 520 }, { "epoch": 12.405970149253731, "grad_norm": 18.637983322143555, "learning_rate": 3.928571428571429e-06, "loss": 48.1456, "step": 521 }, { "epoch": 12.429850746268656, "grad_norm": 22.065799713134766, "learning_rate": 3.916666666666667e-06, "loss": 48.7803, "step": 522 }, { "epoch": 12.453731343283582, "grad_norm": 22.648218154907227, "learning_rate": 3.9047619047619055e-06, "loss": 47.3376, "step": 523 }, { "epoch": 12.477611940298507, "grad_norm": 17.55946922302246, "learning_rate": 3.892857142857143e-06, "loss": 47.6002, "step": 524 }, { "epoch": 12.501492537313434, "grad_norm": 19.173139572143555, "learning_rate": 3.880952380952381e-06, "loss": 48.8976, "step": 525 }, { "epoch": 12.525373134328358, "grad_norm": 24.052696228027344, "learning_rate": 3.869047619047619e-06, "loss": 48.1851, "step": 526 }, { "epoch": 12.549253731343283, "grad_norm": 19.28683090209961, "learning_rate": 3.857142857142858e-06, "loss": 48.0342, "step": 527 }, { "epoch": 12.57313432835821, "grad_norm": 21.528470993041992, "learning_rate": 3.845238095238096e-06, "loss": 49.3597, "step": 528 }, { "epoch": 12.597014925373134, "grad_norm": 22.880159378051758, "learning_rate": 3.833333333333334e-06, "loss": 47.9594, "step": 529 }, { "epoch": 12.620895522388059, "grad_norm": 19.00438117980957, "learning_rate": 3.8214285714285715e-06, "loss": 47.2837, "step": 530 }, { "epoch": 12.644776119402986, "grad_norm": 22.21845054626465, "learning_rate": 3.80952380952381e-06, "loss": 47.1453, "step": 531 }, { "epoch": 12.66865671641791, "grad_norm": 18.551712036132812, "learning_rate": 3.7976190476190477e-06, "loss": 47.9594, "step": 532 }, { "epoch": 12.692537313432837, "grad_norm": 17.805360794067383, "learning_rate": 3.785714285714286e-06, "loss": 49.1036, "step": 533 }, { "epoch": 12.716417910447761, "grad_norm": 14.508918762207031, "learning_rate": 3.773809523809524e-06, "loss": 48.1203, "step": 534 }, { "epoch": 12.740298507462686, "grad_norm": 19.395994186401367, "learning_rate": 3.761904761904762e-06, "loss": 47.7891, "step": 535 }, { "epoch": 12.764179104477613, "grad_norm": 27.492908477783203, "learning_rate": 3.7500000000000005e-06, "loss": 48.9027, "step": 536 }, { "epoch": 12.788059701492537, "grad_norm": 21.751968383789062, "learning_rate": 3.7380952380952384e-06, "loss": 48.0929, "step": 537 }, { "epoch": 12.811940298507462, "grad_norm": 24.78274917602539, "learning_rate": 3.7261904761904767e-06, "loss": 48.1678, "step": 538 }, { "epoch": 12.835820895522389, "grad_norm": 26.319196701049805, "learning_rate": 3.7142857142857146e-06, "loss": 49.1874, "step": 539 }, { "epoch": 12.859701492537313, "grad_norm": 20.670148849487305, "learning_rate": 3.702380952380953e-06, "loss": 48.8441, "step": 540 }, { "epoch": 12.883582089552238, "grad_norm": 23.578706741333008, "learning_rate": 3.690476190476191e-06, "loss": 47.1627, "step": 541 }, { "epoch": 12.907462686567165, "grad_norm": 23.807973861694336, "learning_rate": 3.678571428571429e-06, "loss": 47.493, "step": 542 }, { "epoch": 12.93134328358209, "grad_norm": 20.977373123168945, "learning_rate": 3.6666666666666666e-06, "loss": 49.3489, "step": 543 }, { "epoch": 12.955223880597014, "grad_norm": 21.219995498657227, "learning_rate": 3.654761904761905e-06, "loss": 49.8562, "step": 544 }, { "epoch": 12.97910447761194, "grad_norm": 17.777210235595703, "learning_rate": 3.642857142857143e-06, "loss": 48.4018, "step": 545 }, { "epoch": 13.0, "grad_norm": 17.52475929260254, "learning_rate": 3.630952380952381e-06, "loss": 42.3621, "step": 546 }, { "epoch": 13.023880597014925, "grad_norm": 23.431884765625, "learning_rate": 3.6190476190476194e-06, "loss": 49.0982, "step": 547 }, { "epoch": 13.047761194029851, "grad_norm": 25.512338638305664, "learning_rate": 3.6071428571428573e-06, "loss": 47.5758, "step": 548 }, { "epoch": 13.071641791044776, "grad_norm": 25.41205406188965, "learning_rate": 3.5952380952380956e-06, "loss": 49.0519, "step": 549 }, { "epoch": 13.0955223880597, "grad_norm": 20.511945724487305, "learning_rate": 3.5833333333333335e-06, "loss": 48.9739, "step": 550 }, { "epoch": 13.119402985074627, "grad_norm": 18.88302993774414, "learning_rate": 3.5714285714285718e-06, "loss": 47.0551, "step": 551 }, { "epoch": 13.143283582089552, "grad_norm": 17.176782608032227, "learning_rate": 3.55952380952381e-06, "loss": 48.0771, "step": 552 }, { "epoch": 13.167164179104478, "grad_norm": 19.72154426574707, "learning_rate": 3.547619047619048e-06, "loss": 49.5084, "step": 553 }, { "epoch": 13.191044776119403, "grad_norm": 24.780994415283203, "learning_rate": 3.5357142857142863e-06, "loss": 46.5557, "step": 554 }, { "epoch": 13.214925373134328, "grad_norm": 20.380996704101562, "learning_rate": 3.523809523809524e-06, "loss": 48.841, "step": 555 }, { "epoch": 13.238805970149254, "grad_norm": 26.90860939025879, "learning_rate": 3.511904761904762e-06, "loss": 47.6185, "step": 556 }, { "epoch": 13.26268656716418, "grad_norm": 21.40388298034668, "learning_rate": 3.5e-06, "loss": 47.787, "step": 557 }, { "epoch": 13.286567164179104, "grad_norm": 24.708845138549805, "learning_rate": 3.4880952380952383e-06, "loss": 47.1974, "step": 558 }, { "epoch": 13.31044776119403, "grad_norm": 25.317148208618164, "learning_rate": 3.476190476190476e-06, "loss": 49.2282, "step": 559 }, { "epoch": 13.334328358208955, "grad_norm": 22.903011322021484, "learning_rate": 3.4642857142857145e-06, "loss": 47.0762, "step": 560 }, { "epoch": 13.35820895522388, "grad_norm": 23.626604080200195, "learning_rate": 3.4523809523809528e-06, "loss": 47.3622, "step": 561 }, { "epoch": 13.382089552238806, "grad_norm": 16.69061279296875, "learning_rate": 3.4404761904761907e-06, "loss": 48.5621, "step": 562 }, { "epoch": 13.405970149253731, "grad_norm": 20.52508544921875, "learning_rate": 3.428571428571429e-06, "loss": 47.6565, "step": 563 }, { "epoch": 13.429850746268656, "grad_norm": 25.125743865966797, "learning_rate": 3.416666666666667e-06, "loss": 48.1353, "step": 564 }, { "epoch": 13.453731343283582, "grad_norm": 20.697166442871094, "learning_rate": 3.404761904761905e-06, "loss": 47.9368, "step": 565 }, { "epoch": 13.477611940298507, "grad_norm": 22.396892547607422, "learning_rate": 3.3928571428571435e-06, "loss": 48.2956, "step": 566 }, { "epoch": 13.501492537313434, "grad_norm": 24.770437240600586, "learning_rate": 3.3809523809523814e-06, "loss": 48.4467, "step": 567 }, { "epoch": 13.525373134328358, "grad_norm": 19.44706153869629, "learning_rate": 3.3690476190476197e-06, "loss": 48.3155, "step": 568 }, { "epoch": 13.549253731343283, "grad_norm": 27.680660247802734, "learning_rate": 3.357142857142857e-06, "loss": 47.9039, "step": 569 }, { "epoch": 13.57313432835821, "grad_norm": 21.89419174194336, "learning_rate": 3.3452380952380954e-06, "loss": 47.9416, "step": 570 }, { "epoch": 13.597014925373134, "grad_norm": 19.10918426513672, "learning_rate": 3.3333333333333333e-06, "loss": 48.45, "step": 571 }, { "epoch": 13.620895522388059, "grad_norm": 29.83106231689453, "learning_rate": 3.3214285714285716e-06, "loss": 48.9583, "step": 572 }, { "epoch": 13.644776119402986, "grad_norm": 28.05882453918457, "learning_rate": 3.3095238095238095e-06, "loss": 49.108, "step": 573 }, { "epoch": 13.66865671641791, "grad_norm": 17.379384994506836, "learning_rate": 3.297619047619048e-06, "loss": 48.4707, "step": 574 }, { "epoch": 13.692537313432837, "grad_norm": 19.15117645263672, "learning_rate": 3.285714285714286e-06, "loss": 48.476, "step": 575 }, { "epoch": 13.716417910447761, "grad_norm": 23.892152786254883, "learning_rate": 3.273809523809524e-06, "loss": 48.0321, "step": 576 }, { "epoch": 13.740298507462686, "grad_norm": 18.658008575439453, "learning_rate": 3.2619047619047623e-06, "loss": 47.2192, "step": 577 }, { "epoch": 13.764179104477613, "grad_norm": 16.940099716186523, "learning_rate": 3.2500000000000002e-06, "loss": 49.2263, "step": 578 }, { "epoch": 13.788059701492537, "grad_norm": 25.7972412109375, "learning_rate": 3.2380952380952385e-06, "loss": 47.5039, "step": 579 }, { "epoch": 13.811940298507462, "grad_norm": 28.928129196166992, "learning_rate": 3.226190476190477e-06, "loss": 47.9264, "step": 580 }, { "epoch": 13.835820895522389, "grad_norm": 23.67597007751465, "learning_rate": 3.2142857142857147e-06, "loss": 49.1464, "step": 581 }, { "epoch": 13.859701492537313, "grad_norm": 18.345443725585938, "learning_rate": 3.202380952380952e-06, "loss": 47.888, "step": 582 }, { "epoch": 13.883582089552238, "grad_norm": 19.80716896057129, "learning_rate": 3.1904761904761905e-06, "loss": 47.4324, "step": 583 }, { "epoch": 13.907462686567165, "grad_norm": 20.488346099853516, "learning_rate": 3.178571428571429e-06, "loss": 48.3033, "step": 584 }, { "epoch": 13.93134328358209, "grad_norm": 22.3657283782959, "learning_rate": 3.1666666666666667e-06, "loss": 48.1474, "step": 585 }, { "epoch": 13.955223880597014, "grad_norm": 17.457408905029297, "learning_rate": 3.154761904761905e-06, "loss": 47.2418, "step": 586 }, { "epoch": 13.97910447761194, "grad_norm": NaN, "learning_rate": 3.142857142857143e-06, "loss": 54.1812, "step": 587 }, { "epoch": 14.0, "grad_norm": 17.137672424316406, "learning_rate": 3.142857142857143e-06, "loss": 42.3703, "step": 588 }, { "epoch": 14.023880597014925, "grad_norm": 20.55642318725586, "learning_rate": 3.130952380952381e-06, "loss": 49.4628, "step": 589 }, { "epoch": 14.047761194029851, "grad_norm": 19.925596237182617, "learning_rate": 3.1190476190476195e-06, "loss": 47.5266, "step": 590 }, { "epoch": 14.071641791044776, "grad_norm": 12.49276065826416, "learning_rate": 3.1071428571428574e-06, "loss": 47.8654, "step": 591 }, { "epoch": 14.0955223880597, "grad_norm": 17.266550064086914, "learning_rate": 3.0952380952380957e-06, "loss": 48.4362, "step": 592 }, { "epoch": 14.119402985074627, "grad_norm": 18.234397888183594, "learning_rate": 3.0833333333333336e-06, "loss": 48.9532, "step": 593 }, { "epoch": 14.143283582089552, "grad_norm": 19.880165100097656, "learning_rate": 3.071428571428572e-06, "loss": 48.0088, "step": 594 }, { "epoch": 14.167164179104478, "grad_norm": 23.04216766357422, "learning_rate": 3.05952380952381e-06, "loss": 48.0934, "step": 595 }, { "epoch": 14.191044776119403, "grad_norm": 19.199676513671875, "learning_rate": 3.047619047619048e-06, "loss": 48.3845, "step": 596 }, { "epoch": 14.214925373134328, "grad_norm": 20.758337020874023, "learning_rate": 3.0357142857142856e-06, "loss": 47.4652, "step": 597 }, { "epoch": 14.238805970149254, "grad_norm": 17.532787322998047, "learning_rate": 3.023809523809524e-06, "loss": 48.0212, "step": 598 }, { "epoch": 14.26268656716418, "grad_norm": 16.547094345092773, "learning_rate": 3.011904761904762e-06, "loss": 48.6113, "step": 599 }, { "epoch": 14.286567164179104, "grad_norm": 16.324464797973633, "learning_rate": 3e-06, "loss": 47.9735, "step": 600 }, { "epoch": 14.31044776119403, "grad_norm": 16.54167938232422, "learning_rate": 2.9880952380952384e-06, "loss": 47.4436, "step": 601 }, { "epoch": 14.334328358208955, "grad_norm": 23.455759048461914, "learning_rate": 2.9761904761904763e-06, "loss": 47.6631, "step": 602 }, { "epoch": 14.35820895522388, "grad_norm": 19.159008026123047, "learning_rate": 2.9642857142857146e-06, "loss": 48.0291, "step": 603 }, { "epoch": 14.382089552238806, "grad_norm": 18.66881561279297, "learning_rate": 2.9523809523809525e-06, "loss": 46.4582, "step": 604 }, { "epoch": 14.405970149253731, "grad_norm": 19.129064559936523, "learning_rate": 2.9404761904761908e-06, "loss": 49.4455, "step": 605 }, { "epoch": 14.429850746268656, "grad_norm": NaN, "learning_rate": 2.928571428571429e-06, "loss": 78.6564, "step": 606 }, { "epoch": 14.453731343283582, "grad_norm": 18.47364044189453, "learning_rate": 2.928571428571429e-06, "loss": 48.1748, "step": 607 }, { "epoch": 14.477611940298507, "grad_norm": 17.920883178710938, "learning_rate": 2.916666666666667e-06, "loss": 47.6447, "step": 608 }, { "epoch": 14.501492537313434, "grad_norm": 18.263038635253906, "learning_rate": 2.9047619047619053e-06, "loss": 48.7324, "step": 609 }, { "epoch": 14.525373134328358, "grad_norm": 24.323266983032227, "learning_rate": 2.892857142857143e-06, "loss": 48.6135, "step": 610 }, { "epoch": 14.549253731343283, "grad_norm": 21.56492042541504, "learning_rate": 2.880952380952381e-06, "loss": 47.0007, "step": 611 }, { "epoch": 14.57313432835821, "grad_norm": 17.741748809814453, "learning_rate": 2.869047619047619e-06, "loss": 46.3136, "step": 612 }, { "epoch": 14.597014925373134, "grad_norm": 17.218914031982422, "learning_rate": 2.8571428571428573e-06, "loss": 47.6417, "step": 613 }, { "epoch": 14.620895522388059, "grad_norm": 22.856996536254883, "learning_rate": 2.8452380952380956e-06, "loss": 47.6898, "step": 614 }, { "epoch": 14.644776119402986, "grad_norm": NaN, "learning_rate": 2.8333333333333335e-06, "loss": 53.529, "step": 615 }, { "epoch": 14.66865671641791, "grad_norm": 23.29751968383789, "learning_rate": 2.8333333333333335e-06, "loss": 48.164, "step": 616 }, { "epoch": 14.692537313432837, "grad_norm": 15.633321762084961, "learning_rate": 2.8214285714285718e-06, "loss": 46.9866, "step": 617 }, { "epoch": 14.716417910447761, "grad_norm": 21.713376998901367, "learning_rate": 2.8095238095238096e-06, "loss": 48.2856, "step": 618 }, { "epoch": 14.740298507462686, "grad_norm": 17.07369613647461, "learning_rate": 2.797619047619048e-06, "loss": 46.4404, "step": 619 }, { "epoch": 14.764179104477613, "grad_norm": 14.855449676513672, "learning_rate": 2.785714285714286e-06, "loss": 48.2668, "step": 620 }, { "epoch": 14.788059701492537, "grad_norm": 16.479616165161133, "learning_rate": 2.773809523809524e-06, "loss": 49.2661, "step": 621 }, { "epoch": 14.811940298507462, "grad_norm": 14.471490859985352, "learning_rate": 2.7619047619047625e-06, "loss": 47.0484, "step": 622 }, { "epoch": 14.835820895522389, "grad_norm": 19.018714904785156, "learning_rate": 2.7500000000000004e-06, "loss": 49.2253, "step": 623 }, { "epoch": 14.859701492537313, "grad_norm": 16.21799087524414, "learning_rate": 2.7380952380952387e-06, "loss": 49.0738, "step": 624 }, { "epoch": 14.883582089552238, "grad_norm": 20.86383628845215, "learning_rate": 2.726190476190476e-06, "loss": 48.4231, "step": 625 }, { "epoch": 14.907462686567165, "grad_norm": 20.60930633544922, "learning_rate": 2.7142857142857144e-06, "loss": 47.7464, "step": 626 }, { "epoch": 14.93134328358209, "grad_norm": 20.909135818481445, "learning_rate": 2.7023809523809523e-06, "loss": 48.519, "step": 627 }, { "epoch": 14.955223880597014, "grad_norm": 18.555694580078125, "learning_rate": 2.6904761904761906e-06, "loss": 48.1625, "step": 628 }, { "epoch": 14.97910447761194, "grad_norm": 18.154813766479492, "learning_rate": 2.6785714285714285e-06, "loss": 48.9444, "step": 629 }, { "epoch": 15.0, "grad_norm": 19.308523178100586, "learning_rate": 2.666666666666667e-06, "loss": 42.1936, "step": 630 }, { "epoch": 15.023880597014925, "grad_norm": 22.725357055664062, "learning_rate": 2.654761904761905e-06, "loss": 49.5597, "step": 631 }, { "epoch": 15.047761194029851, "grad_norm": 18.862451553344727, "learning_rate": 2.642857142857143e-06, "loss": 46.914, "step": 632 }, { "epoch": 15.071641791044776, "grad_norm": 19.017065048217773, "learning_rate": 2.6309523809523813e-06, "loss": 47.7233, "step": 633 }, { "epoch": 15.0955223880597, "grad_norm": 19.03627586364746, "learning_rate": 2.6190476190476192e-06, "loss": 48.0406, "step": 634 }, { "epoch": 15.119402985074627, "grad_norm": 18.53116798400879, "learning_rate": 2.6071428571428575e-06, "loss": 47.3259, "step": 635 }, { "epoch": 15.143283582089552, "grad_norm": 19.265275955200195, "learning_rate": 2.595238095238096e-06, "loss": 47.2465, "step": 636 }, { "epoch": 15.167164179104478, "grad_norm": 19.497289657592773, "learning_rate": 2.5833333333333337e-06, "loss": 48.5984, "step": 637 }, { "epoch": 15.191044776119403, "grad_norm": 20.183780670166016, "learning_rate": 2.571428571428571e-06, "loss": 46.6221, "step": 638 }, { "epoch": 15.214925373134328, "grad_norm": 22.911672592163086, "learning_rate": 2.5595238095238095e-06, "loss": 48.0178, "step": 639 }, { "epoch": 15.238805970149254, "grad_norm": 20.678709030151367, "learning_rate": 2.547619047619048e-06, "loss": 47.0322, "step": 640 }, { "epoch": 15.26268656716418, "grad_norm": 18.579042434692383, "learning_rate": 2.5357142857142857e-06, "loss": 48.1428, "step": 641 }, { "epoch": 15.286567164179104, "grad_norm": 23.61576271057129, "learning_rate": 2.523809523809524e-06, "loss": 48.444, "step": 642 }, { "epoch": 15.31044776119403, "grad_norm": 19.602746963500977, "learning_rate": 2.511904761904762e-06, "loss": 48.2582, "step": 643 }, { "epoch": 15.334328358208955, "grad_norm": 12.509607315063477, "learning_rate": 2.5e-06, "loss": 48.8531, "step": 644 }, { "epoch": 15.35820895522388, "grad_norm": 18.749767303466797, "learning_rate": 2.4880952380952385e-06, "loss": 47.6453, "step": 645 }, { "epoch": 15.382089552238806, "grad_norm": 20.612041473388672, "learning_rate": 2.4761904761904764e-06, "loss": 48.7038, "step": 646 }, { "epoch": 15.405970149253731, "grad_norm": 18.65719985961914, "learning_rate": 2.4642857142857147e-06, "loss": 47.7954, "step": 647 }, { "epoch": 15.429850746268656, "grad_norm": 22.636686325073242, "learning_rate": 2.4523809523809526e-06, "loss": 48.1164, "step": 648 }, { "epoch": 15.453731343283582, "grad_norm": 20.93446922302246, "learning_rate": 2.4404761904761905e-06, "loss": 48.5955, "step": 649 }, { "epoch": 15.477611940298507, "grad_norm": 20.77125358581543, "learning_rate": 2.428571428571429e-06, "loss": 48.4369, "step": 650 }, { "epoch": 15.501492537313434, "grad_norm": 17.003498077392578, "learning_rate": 2.4166666666666667e-06, "loss": 49.0355, "step": 651 }, { "epoch": 15.525373134328358, "grad_norm": 20.743436813354492, "learning_rate": 2.404761904761905e-06, "loss": 47.8368, "step": 652 }, { "epoch": 15.549253731343283, "grad_norm": NaN, "learning_rate": 2.3928571428571433e-06, "loss": 41.6371, "step": 653 }, { "epoch": 15.57313432835821, "grad_norm": 21.716781616210938, "learning_rate": 2.3928571428571433e-06, "loss": 48.5806, "step": 654 }, { "epoch": 15.597014925373134, "grad_norm": 18.8812198638916, "learning_rate": 2.380952380952381e-06, "loss": 49.0707, "step": 655 }, { "epoch": 15.620895522388059, "grad_norm": 22.305049896240234, "learning_rate": 2.369047619047619e-06, "loss": 47.7556, "step": 656 }, { "epoch": 15.644776119402986, "grad_norm": 20.51401710510254, "learning_rate": 2.3571428571428574e-06, "loss": 48.1588, "step": 657 }, { "epoch": 15.66865671641791, "grad_norm": 17.691770553588867, "learning_rate": 2.3452380952380953e-06, "loss": 47.5187, "step": 658 }, { "epoch": 15.692537313432837, "grad_norm": 22.343585968017578, "learning_rate": 2.3333333333333336e-06, "loss": 47.5725, "step": 659 }, { "epoch": 15.716417910447761, "grad_norm": 21.656587600708008, "learning_rate": 2.321428571428572e-06, "loss": 47.6903, "step": 660 }, { "epoch": 15.740298507462686, "grad_norm": 20.632055282592773, "learning_rate": 2.3095238095238098e-06, "loss": 47.4526, "step": 661 }, { "epoch": 15.764179104477613, "grad_norm": 22.324811935424805, "learning_rate": 2.2976190476190477e-06, "loss": 47.3316, "step": 662 }, { "epoch": 15.788059701492537, "grad_norm": 19.320737838745117, "learning_rate": 2.285714285714286e-06, "loss": 48.2315, "step": 663 }, { "epoch": 15.811940298507462, "grad_norm": 18.58050537109375, "learning_rate": 2.273809523809524e-06, "loss": 47.9147, "step": 664 }, { "epoch": 15.835820895522389, "grad_norm": 20.37384796142578, "learning_rate": 2.261904761904762e-06, "loss": 47.5874, "step": 665 }, { "epoch": 15.859701492537313, "grad_norm": 20.893856048583984, "learning_rate": 2.25e-06, "loss": 49.225, "step": 666 }, { "epoch": 15.883582089552238, "grad_norm": 18.4589786529541, "learning_rate": 2.2380952380952384e-06, "loss": 47.3042, "step": 667 }, { "epoch": 15.907462686567165, "grad_norm": 20.845996856689453, "learning_rate": 2.2261904761904763e-06, "loss": 47.3255, "step": 668 }, { "epoch": 15.93134328358209, "grad_norm": 20.149137496948242, "learning_rate": 2.2142857142857146e-06, "loss": 48.6543, "step": 669 }, { "epoch": 15.955223880597014, "grad_norm": 14.768882751464844, "learning_rate": 2.2023809523809525e-06, "loss": 46.8274, "step": 670 }, { "epoch": 15.97910447761194, "grad_norm": 26.926074981689453, "learning_rate": 2.1904761904761908e-06, "loss": 48.2035, "step": 671 }, { "epoch": 16.0, "grad_norm": 22.840618133544922, "learning_rate": 2.1785714285714286e-06, "loss": 42.8242, "step": 672 }, { "epoch": 16.023880597014927, "grad_norm": 16.183008193969727, "learning_rate": 2.166666666666667e-06, "loss": 47.8309, "step": 673 }, { "epoch": 16.04776119402985, "grad_norm": 20.603744506835938, "learning_rate": 2.154761904761905e-06, "loss": 48.5197, "step": 674 }, { "epoch": 16.071641791044776, "grad_norm": 26.492107391357422, "learning_rate": 2.1428571428571427e-06, "loss": 47.2312, "step": 675 }, { "epoch": 16.095522388059702, "grad_norm": 19.786901473999023, "learning_rate": 2.130952380952381e-06, "loss": 49.6201, "step": 676 }, { "epoch": 16.119402985074625, "grad_norm": 18.150909423828125, "learning_rate": 2.1190476190476194e-06, "loss": 48.7407, "step": 677 }, { "epoch": 16.143283582089552, "grad_norm": 18.797983169555664, "learning_rate": 2.1071428571428572e-06, "loss": 47.0801, "step": 678 }, { "epoch": 16.16716417910448, "grad_norm": 16.088953018188477, "learning_rate": 2.0952380952380955e-06, "loss": 47.6509, "step": 679 }, { "epoch": 16.1910447761194, "grad_norm": 20.359085083007812, "learning_rate": 2.0833333333333334e-06, "loss": 48.9226, "step": 680 }, { "epoch": 16.214925373134328, "grad_norm": 21.99265480041504, "learning_rate": 2.0714285714285717e-06, "loss": 47.3775, "step": 681 }, { "epoch": 16.238805970149254, "grad_norm": 18.616743087768555, "learning_rate": 2.0595238095238096e-06, "loss": 45.8448, "step": 682 }, { "epoch": 16.262686567164177, "grad_norm": 19.6337947845459, "learning_rate": 2.047619047619048e-06, "loss": 48.2077, "step": 683 }, { "epoch": 16.286567164179104, "grad_norm": 23.881439208984375, "learning_rate": 2.035714285714286e-06, "loss": 48.6796, "step": 684 }, { "epoch": 16.31044776119403, "grad_norm": 19.665023803710938, "learning_rate": 2.023809523809524e-06, "loss": 48.7275, "step": 685 }, { "epoch": 16.334328358208957, "grad_norm": 18.438793182373047, "learning_rate": 2.011904761904762e-06, "loss": 49.585, "step": 686 }, { "epoch": 16.35820895522388, "grad_norm": 17.073816299438477, "learning_rate": 2.0000000000000003e-06, "loss": 47.4548, "step": 687 }, { "epoch": 16.382089552238806, "grad_norm": 20.504276275634766, "learning_rate": 1.9880952380952382e-06, "loss": 47.3555, "step": 688 }, { "epoch": 16.405970149253733, "grad_norm": 21.564546585083008, "learning_rate": 1.976190476190476e-06, "loss": 47.6304, "step": 689 }, { "epoch": 16.429850746268656, "grad_norm": 16.773197174072266, "learning_rate": 1.9642857142857144e-06, "loss": 49.078, "step": 690 }, { "epoch": 16.453731343283582, "grad_norm": 22.77934455871582, "learning_rate": 1.9523809523809527e-06, "loss": 47.8289, "step": 691 }, { "epoch": 16.47761194029851, "grad_norm": 17.375993728637695, "learning_rate": 1.9404761904761906e-06, "loss": 48.4812, "step": 692 }, { "epoch": 16.501492537313432, "grad_norm": 21.407329559326172, "learning_rate": 1.928571428571429e-06, "loss": 48.2934, "step": 693 }, { "epoch": 16.52537313432836, "grad_norm": 15.673316955566406, "learning_rate": 1.916666666666667e-06, "loss": 46.7304, "step": 694 }, { "epoch": 16.549253731343285, "grad_norm": 24.577089309692383, "learning_rate": 1.904761904761905e-06, "loss": 47.9352, "step": 695 }, { "epoch": 16.573134328358208, "grad_norm": 24.46076774597168, "learning_rate": 1.892857142857143e-06, "loss": 48.8173, "step": 696 }, { "epoch": 16.597014925373134, "grad_norm": 14.248388290405273, "learning_rate": 1.880952380952381e-06, "loss": 48.5858, "step": 697 }, { "epoch": 16.62089552238806, "grad_norm": 16.925329208374023, "learning_rate": 1.8690476190476192e-06, "loss": 47.8278, "step": 698 }, { "epoch": 16.644776119402984, "grad_norm": 25.52614402770996, "learning_rate": 1.8571428571428573e-06, "loss": 48.1248, "step": 699 }, { "epoch": 16.66865671641791, "grad_norm": 21.011341094970703, "learning_rate": 1.8452380952380954e-06, "loss": 47.8154, "step": 700 }, { "epoch": 16.692537313432837, "grad_norm": 14.694896697998047, "learning_rate": 1.8333333333333333e-06, "loss": 47.9668, "step": 701 }, { "epoch": 16.71641791044776, "grad_norm": 22.32903480529785, "learning_rate": 1.8214285714285716e-06, "loss": 48.6784, "step": 702 }, { "epoch": 16.740298507462686, "grad_norm": 17.19482421875, "learning_rate": 1.8095238095238097e-06, "loss": 46.9973, "step": 703 }, { "epoch": 16.764179104477613, "grad_norm": 14.590733528137207, "learning_rate": 1.7976190476190478e-06, "loss": 47.2393, "step": 704 }, { "epoch": 16.788059701492536, "grad_norm": 17.131982803344727, "learning_rate": 1.7857142857142859e-06, "loss": 47.9412, "step": 705 }, { "epoch": 16.811940298507462, "grad_norm": 18.513992309570312, "learning_rate": 1.773809523809524e-06, "loss": 48.8777, "step": 706 }, { "epoch": 16.83582089552239, "grad_norm": 17.625539779663086, "learning_rate": 1.761904761904762e-06, "loss": 48.3885, "step": 707 }, { "epoch": 16.85970149253731, "grad_norm": 16.540056228637695, "learning_rate": 1.75e-06, "loss": 47.8561, "step": 708 }, { "epoch": 16.883582089552238, "grad_norm": 20.070533752441406, "learning_rate": 1.738095238095238e-06, "loss": 46.6418, "step": 709 }, { "epoch": 16.907462686567165, "grad_norm": 18.742460250854492, "learning_rate": 1.7261904761904764e-06, "loss": 46.7471, "step": 710 }, { "epoch": 16.93134328358209, "grad_norm": 17.491954803466797, "learning_rate": 1.7142857142857145e-06, "loss": 47.5558, "step": 711 }, { "epoch": 16.955223880597014, "grad_norm": 17.457130432128906, "learning_rate": 1.7023809523809526e-06, "loss": 47.4441, "step": 712 }, { "epoch": 16.97910447761194, "grad_norm": 21.053844451904297, "learning_rate": 1.6904761904761907e-06, "loss": 48.1931, "step": 713 }, { "epoch": 17.0, "grad_norm": 16.943801879882812, "learning_rate": 1.6785714285714286e-06, "loss": 41.9934, "step": 714 }, { "epoch": 17.023880597014927, "grad_norm": 21.56785011291504, "learning_rate": 1.6666666666666667e-06, "loss": 47.1652, "step": 715 }, { "epoch": 17.04776119402985, "grad_norm": 21.193382263183594, "learning_rate": 1.6547619047619048e-06, "loss": 47.6751, "step": 716 }, { "epoch": 17.071641791044776, "grad_norm": 16.245115280151367, "learning_rate": 1.642857142857143e-06, "loss": 47.4133, "step": 717 }, { "epoch": 17.095522388059702, "grad_norm": 18.834646224975586, "learning_rate": 1.6309523809523812e-06, "loss": 48.145, "step": 718 }, { "epoch": 17.119402985074625, "grad_norm": 15.769698143005371, "learning_rate": 1.6190476190476193e-06, "loss": 48.1181, "step": 719 }, { "epoch": 17.143283582089552, "grad_norm": 13.460511207580566, "learning_rate": 1.6071428571428574e-06, "loss": 49.1229, "step": 720 }, { "epoch": 17.16716417910448, "grad_norm": 18.58087158203125, "learning_rate": 1.5952380952380953e-06, "loss": 47.5095, "step": 721 }, { "epoch": 17.1910447761194, "grad_norm": 18.607332229614258, "learning_rate": 1.5833333333333333e-06, "loss": 49.1334, "step": 722 }, { "epoch": 17.214925373134328, "grad_norm": 15.046488761901855, "learning_rate": 1.5714285714285714e-06, "loss": 47.6151, "step": 723 }, { "epoch": 17.238805970149254, "grad_norm": 17.442358016967773, "learning_rate": 1.5595238095238098e-06, "loss": 47.3771, "step": 724 }, { "epoch": 17.262686567164177, "grad_norm": 11.690101623535156, "learning_rate": 1.5476190476190479e-06, "loss": 48.1095, "step": 725 }, { "epoch": 17.286567164179104, "grad_norm": 17.945192337036133, "learning_rate": 1.535714285714286e-06, "loss": 47.8941, "step": 726 }, { "epoch": 17.31044776119403, "grad_norm": 13.878116607666016, "learning_rate": 1.523809523809524e-06, "loss": 47.6422, "step": 727 }, { "epoch": 17.334328358208957, "grad_norm": 15.942928314208984, "learning_rate": 1.511904761904762e-06, "loss": 46.9964, "step": 728 }, { "epoch": 17.35820895522388, "grad_norm": 13.57482624053955, "learning_rate": 1.5e-06, "loss": 47.1832, "step": 729 }, { "epoch": 17.382089552238806, "grad_norm": 13.781617164611816, "learning_rate": 1.4880952380952381e-06, "loss": 48.9621, "step": 730 }, { "epoch": 17.405970149253733, "grad_norm": 14.26857852935791, "learning_rate": 1.4761904761904762e-06, "loss": 48.6631, "step": 731 }, { "epoch": 17.429850746268656, "grad_norm": 16.23444938659668, "learning_rate": 1.4642857142857145e-06, "loss": 46.84, "step": 732 }, { "epoch": 17.453731343283582, "grad_norm": 17.442630767822266, "learning_rate": 1.4523809523809526e-06, "loss": 48.2996, "step": 733 }, { "epoch": 17.47761194029851, "grad_norm": 14.329082489013672, "learning_rate": 1.4404761904761905e-06, "loss": 47.396, "step": 734 }, { "epoch": 17.501492537313432, "grad_norm": 14.772257804870605, "learning_rate": 1.4285714285714286e-06, "loss": 48.5733, "step": 735 }, { "epoch": 17.52537313432836, "grad_norm": 14.331324577331543, "learning_rate": 1.4166666666666667e-06, "loss": 48.2969, "step": 736 }, { "epoch": 17.549253731343285, "grad_norm": 17.498600006103516, "learning_rate": 1.4047619047619048e-06, "loss": 48.0221, "step": 737 }, { "epoch": 17.573134328358208, "grad_norm": 16.155025482177734, "learning_rate": 1.392857142857143e-06, "loss": 47.9848, "step": 738 }, { "epoch": 17.597014925373134, "grad_norm": 15.552813529968262, "learning_rate": 1.3809523809523812e-06, "loss": 48.4413, "step": 739 }, { "epoch": 17.62089552238806, "grad_norm": 15.887310981750488, "learning_rate": 1.3690476190476193e-06, "loss": 47.6463, "step": 740 }, { "epoch": 17.644776119402984, "grad_norm": 17.783411026000977, "learning_rate": 1.3571428571428572e-06, "loss": 47.8009, "step": 741 }, { "epoch": 17.66865671641791, "grad_norm": 17.108932495117188, "learning_rate": 1.3452380952380953e-06, "loss": 47.9888, "step": 742 }, { "epoch": 17.692537313432837, "grad_norm": 19.79203224182129, "learning_rate": 1.3333333333333334e-06, "loss": 48.5732, "step": 743 }, { "epoch": 17.71641791044776, "grad_norm": 17.06324005126953, "learning_rate": 1.3214285714285715e-06, "loss": 48.4815, "step": 744 }, { "epoch": 17.740298507462686, "grad_norm": 17.399097442626953, "learning_rate": 1.3095238095238096e-06, "loss": 47.5591, "step": 745 }, { "epoch": 17.764179104477613, "grad_norm": 15.836935997009277, "learning_rate": 1.297619047619048e-06, "loss": 47.994, "step": 746 }, { "epoch": 17.788059701492536, "grad_norm": 18.20856475830078, "learning_rate": 1.2857142857142856e-06, "loss": 47.9979, "step": 747 }, { "epoch": 17.811940298507462, "grad_norm": 19.10239601135254, "learning_rate": 1.273809523809524e-06, "loss": 48.1196, "step": 748 }, { "epoch": 17.83582089552239, "grad_norm": 17.21087646484375, "learning_rate": 1.261904761904762e-06, "loss": 47.8816, "step": 749 }, { "epoch": 17.85970149253731, "grad_norm": 14.792268753051758, "learning_rate": 1.25e-06, "loss": 47.8182, "step": 750 }, { "epoch": 17.883582089552238, "grad_norm": 13.695488929748535, "learning_rate": 1.2380952380952382e-06, "loss": 47.4298, "step": 751 }, { "epoch": 17.907462686567165, "grad_norm": 15.197646141052246, "learning_rate": 1.2261904761904763e-06, "loss": 47.7132, "step": 752 }, { "epoch": 17.93134328358209, "grad_norm": 19.13431739807129, "learning_rate": 1.2142857142857144e-06, "loss": 48.431, "step": 753 }, { "epoch": 17.955223880597014, "grad_norm": 15.690411567687988, "learning_rate": 1.2023809523809525e-06, "loss": 47.4529, "step": 754 }, { "epoch": 17.97910447761194, "grad_norm": 14.75414752960205, "learning_rate": 1.1904761904761906e-06, "loss": 47.9668, "step": 755 }, { "epoch": 18.0, "grad_norm": 11.497115135192871, "learning_rate": 1.1785714285714287e-06, "loss": 41.8653, "step": 756 }, { "epoch": 18.023880597014927, "grad_norm": 16.20159339904785, "learning_rate": 1.1666666666666668e-06, "loss": 47.2871, "step": 757 }, { "epoch": 18.04776119402985, "grad_norm": 15.400497436523438, "learning_rate": 1.1547619047619049e-06, "loss": 46.3673, "step": 758 }, { "epoch": 18.071641791044776, "grad_norm": 12.16773509979248, "learning_rate": 1.142857142857143e-06, "loss": 47.7463, "step": 759 }, { "epoch": 18.095522388059702, "grad_norm": 19.978351593017578, "learning_rate": 1.130952380952381e-06, "loss": 47.4632, "step": 760 }, { "epoch": 18.119402985074625, "grad_norm": 14.090561866760254, "learning_rate": 1.1190476190476192e-06, "loss": 48.9356, "step": 761 }, { "epoch": 18.143283582089552, "grad_norm": 13.143173217773438, "learning_rate": 1.1071428571428573e-06, "loss": 48.1129, "step": 762 }, { "epoch": 18.16716417910448, "grad_norm": 15.609000205993652, "learning_rate": 1.0952380952380954e-06, "loss": 48.8554, "step": 763 }, { "epoch": 18.1910447761194, "grad_norm": 14.012611389160156, "learning_rate": 1.0833333333333335e-06, "loss": 47.6785, "step": 764 }, { "epoch": 18.214925373134328, "grad_norm": 13.417494773864746, "learning_rate": 1.0714285714285714e-06, "loss": 48.2733, "step": 765 }, { "epoch": 18.238805970149254, "grad_norm": 15.562864303588867, "learning_rate": 1.0595238095238097e-06, "loss": 48.0488, "step": 766 }, { "epoch": 18.262686567164177, "grad_norm": 17.083723068237305, "learning_rate": 1.0476190476190478e-06, "loss": 49.3136, "step": 767 }, { "epoch": 18.286567164179104, "grad_norm": 16.564395904541016, "learning_rate": 1.0357142857142859e-06, "loss": 48.1835, "step": 768 }, { "epoch": 18.31044776119403, "grad_norm": 14.694929122924805, "learning_rate": 1.023809523809524e-06, "loss": 48.8632, "step": 769 }, { "epoch": 18.334328358208957, "grad_norm": 13.4928560256958, "learning_rate": 1.011904761904762e-06, "loss": 47.3333, "step": 770 }, { "epoch": 18.35820895522388, "grad_norm": 12.980204582214355, "learning_rate": 1.0000000000000002e-06, "loss": 47.7961, "step": 771 }, { "epoch": 18.382089552238806, "grad_norm": 14.19666862487793, "learning_rate": 9.88095238095238e-07, "loss": 47.7806, "step": 772 }, { "epoch": 18.405970149253733, "grad_norm": 13.2017183303833, "learning_rate": 9.761904761904764e-07, "loss": 46.1119, "step": 773 }, { "epoch": 18.429850746268656, "grad_norm": 15.064650535583496, "learning_rate": 9.642857142857145e-07, "loss": 47.8105, "step": 774 }, { "epoch": 18.453731343283582, "grad_norm": 14.713834762573242, "learning_rate": 9.523809523809525e-07, "loss": 47.5723, "step": 775 }, { "epoch": 18.47761194029851, "grad_norm": 13.394201278686523, "learning_rate": 9.404761904761906e-07, "loss": 48.1632, "step": 776 }, { "epoch": 18.501492537313432, "grad_norm": 10.742532730102539, "learning_rate": 9.285714285714287e-07, "loss": 48.6118, "step": 777 }, { "epoch": 18.52537313432836, "grad_norm": 12.402650833129883, "learning_rate": 9.166666666666666e-07, "loss": 48.6597, "step": 778 }, { "epoch": 18.549253731343285, "grad_norm": 15.73616886138916, "learning_rate": 9.047619047619048e-07, "loss": 47.9931, "step": 779 }, { "epoch": 18.573134328358208, "grad_norm": 14.188780784606934, "learning_rate": 8.928571428571429e-07, "loss": 47.0081, "step": 780 }, { "epoch": 18.597014925373134, "grad_norm": 12.516701698303223, "learning_rate": 8.80952380952381e-07, "loss": 49.0287, "step": 781 }, { "epoch": 18.62089552238806, "grad_norm": 15.069429397583008, "learning_rate": 8.69047619047619e-07, "loss": 47.8721, "step": 782 }, { "epoch": 18.644776119402984, "grad_norm": 13.091047286987305, "learning_rate": 8.571428571428572e-07, "loss": 48.1678, "step": 783 }, { "epoch": 18.66865671641791, "grad_norm": 15.017065048217773, "learning_rate": 8.452380952380953e-07, "loss": 47.1277, "step": 784 }, { "epoch": 18.692537313432837, "grad_norm": 12.091531753540039, "learning_rate": 8.333333333333333e-07, "loss": 47.4962, "step": 785 }, { "epoch": 18.71641791044776, "grad_norm": 15.20182991027832, "learning_rate": 8.214285714285715e-07, "loss": 48.6224, "step": 786 }, { "epoch": 18.740298507462686, "grad_norm": 11.17827320098877, "learning_rate": 8.095238095238096e-07, "loss": 47.9759, "step": 787 }, { "epoch": 18.764179104477613, "grad_norm": 14.884525299072266, "learning_rate": 7.976190476190476e-07, "loss": 47.9749, "step": 788 }, { "epoch": 18.788059701492536, "grad_norm": 14.360984802246094, "learning_rate": 7.857142857142857e-07, "loss": 48.9952, "step": 789 }, { "epoch": 18.811940298507462, "grad_norm": 11.265621185302734, "learning_rate": 7.738095238095239e-07, "loss": 47.4274, "step": 790 }, { "epoch": 18.83582089552239, "grad_norm": 18.072290420532227, "learning_rate": 7.61904761904762e-07, "loss": 47.8815, "step": 791 }, { "epoch": 18.85970149253731, "grad_norm": 15.310029029846191, "learning_rate": 7.5e-07, "loss": 47.3907, "step": 792 }, { "epoch": 18.883582089552238, "grad_norm": 14.032752990722656, "learning_rate": 7.380952380952381e-07, "loss": 48.0883, "step": 793 }, { "epoch": 18.907462686567165, "grad_norm": 12.853668212890625, "learning_rate": 7.261904761904763e-07, "loss": 47.1452, "step": 794 }, { "epoch": 18.93134328358209, "grad_norm": 15.6067476272583, "learning_rate": 7.142857142857143e-07, "loss": 46.6303, "step": 795 }, { "epoch": 18.955223880597014, "grad_norm": 12.828201293945312, "learning_rate": 7.023809523809524e-07, "loss": 47.9885, "step": 796 }, { "epoch": 18.97910447761194, "grad_norm": 13.336589813232422, "learning_rate": 6.904761904761906e-07, "loss": 48.2315, "step": 797 }, { "epoch": 19.0, "grad_norm": 13.629434585571289, "learning_rate": 6.785714285714286e-07, "loss": 41.9374, "step": 798 }, { "epoch": 19.023880597014927, "grad_norm": 13.237930297851562, "learning_rate": 6.666666666666667e-07, "loss": 46.6802, "step": 799 }, { "epoch": 19.04776119402985, "grad_norm": 13.715863227844238, "learning_rate": 6.547619047619048e-07, "loss": 49.0494, "step": 800 }, { "epoch": 19.071641791044776, "grad_norm": 13.439970016479492, "learning_rate": 6.428571428571428e-07, "loss": 46.3647, "step": 801 }, { "epoch": 19.095522388059702, "grad_norm": 15.468942642211914, "learning_rate": 6.30952380952381e-07, "loss": 48.4725, "step": 802 }, { "epoch": 19.119402985074625, "grad_norm": 14.160257339477539, "learning_rate": 6.190476190476191e-07, "loss": 47.4033, "step": 803 }, { "epoch": 19.143283582089552, "grad_norm": 13.667155265808105, "learning_rate": 6.071428571428572e-07, "loss": 48.4729, "step": 804 }, { "epoch": 19.16716417910448, "grad_norm": 12.428313255310059, "learning_rate": 5.952380952380953e-07, "loss": 48.8939, "step": 805 }, { "epoch": 19.1910447761194, "grad_norm": 12.985882759094238, "learning_rate": 5.833333333333334e-07, "loss": 47.0663, "step": 806 }, { "epoch": 19.214925373134328, "grad_norm": 12.827404975891113, "learning_rate": 5.714285714285715e-07, "loss": 47.5614, "step": 807 }, { "epoch": 19.238805970149254, "grad_norm": 11.078653335571289, "learning_rate": 5.595238095238096e-07, "loss": 48.564, "step": 808 }, { "epoch": 19.262686567164177, "grad_norm": 13.346016883850098, "learning_rate": 5.476190476190477e-07, "loss": 48.0823, "step": 809 }, { "epoch": 19.286567164179104, "grad_norm": 14.523963928222656, "learning_rate": 5.357142857142857e-07, "loss": 48.4225, "step": 810 }, { "epoch": 19.31044776119403, "grad_norm": 12.598445892333984, "learning_rate": 5.238095238095239e-07, "loss": 47.2514, "step": 811 }, { "epoch": 19.334328358208957, "grad_norm": 12.203497886657715, "learning_rate": 5.11904761904762e-07, "loss": 47.217, "step": 812 }, { "epoch": 19.35820895522388, "grad_norm": 12.144754409790039, "learning_rate": 5.000000000000001e-07, "loss": 47.191, "step": 813 }, { "epoch": 19.382089552238806, "grad_norm": 12.585047721862793, "learning_rate": 4.880952380952382e-07, "loss": 48.4947, "step": 814 }, { "epoch": 19.405970149253733, "grad_norm": 11.295561790466309, "learning_rate": 4.7619047619047623e-07, "loss": 46.9444, "step": 815 }, { "epoch": 19.429850746268656, "grad_norm": 13.055256843566895, "learning_rate": 4.642857142857143e-07, "loss": 48.4469, "step": 816 }, { "epoch": 19.453731343283582, "grad_norm": 12.051807403564453, "learning_rate": 4.523809523809524e-07, "loss": 48.0547, "step": 817 }, { "epoch": 19.47761194029851, "grad_norm": 13.44185733795166, "learning_rate": 4.404761904761905e-07, "loss": 48.3155, "step": 818 }, { "epoch": 19.501492537313432, "grad_norm": 12.405723571777344, "learning_rate": 4.285714285714286e-07, "loss": 48.3982, "step": 819 }, { "epoch": 19.52537313432836, "grad_norm": 14.900402069091797, "learning_rate": 4.1666666666666667e-07, "loss": 48.2653, "step": 820 }, { "epoch": 19.549253731343285, "grad_norm": 10.70801067352295, "learning_rate": 4.047619047619048e-07, "loss": 48.0384, "step": 821 }, { "epoch": 19.573134328358208, "grad_norm": 12.318074226379395, "learning_rate": 3.9285714285714286e-07, "loss": 47.554, "step": 822 }, { "epoch": 19.597014925373134, "grad_norm": 12.898431777954102, "learning_rate": 3.80952380952381e-07, "loss": 48.3586, "step": 823 }, { "epoch": 19.62089552238806, "grad_norm": 15.45779800415039, "learning_rate": 3.6904761904761906e-07, "loss": 48.4193, "step": 824 }, { "epoch": 19.644776119402984, "grad_norm": 11.230570793151855, "learning_rate": 3.5714285714285716e-07, "loss": 48.5294, "step": 825 }, { "epoch": 19.66865671641791, "grad_norm": 13.647272109985352, "learning_rate": 3.452380952380953e-07, "loss": 47.2569, "step": 826 }, { "epoch": 19.692537313432837, "grad_norm": 11.521178245544434, "learning_rate": 3.3333333333333335e-07, "loss": 47.2899, "step": 827 }, { "epoch": 19.71641791044776, "grad_norm": 11.537907600402832, "learning_rate": 3.214285714285714e-07, "loss": 46.6462, "step": 828 }, { "epoch": 19.740298507462686, "grad_norm": 11.670267105102539, "learning_rate": 3.0952380952380955e-07, "loss": 47.9797, "step": 829 }, { "epoch": 19.764179104477613, "grad_norm": 11.660557746887207, "learning_rate": 2.9761904761904765e-07, "loss": 47.9744, "step": 830 }, { "epoch": 19.788059701492536, "grad_norm": 12.332269668579102, "learning_rate": 2.8571428571428575e-07, "loss": 48.6015, "step": 831 }, { "epoch": 19.811940298507462, "grad_norm": 12.228848457336426, "learning_rate": 2.7380952380952385e-07, "loss": 47.3215, "step": 832 }, { "epoch": 19.83582089552239, "grad_norm": 13.780754089355469, "learning_rate": 2.6190476190476194e-07, "loss": 48.853, "step": 833 }, { "epoch": 19.85970149253731, "grad_norm": 11.639240264892578, "learning_rate": 2.5000000000000004e-07, "loss": 48.8199, "step": 834 }, { "epoch": 19.883582089552238, "grad_norm": 10.796862602233887, "learning_rate": 2.3809523809523811e-07, "loss": 47.5373, "step": 835 }, { "epoch": 19.907462686567165, "grad_norm": 13.573180198669434, "learning_rate": 2.261904761904762e-07, "loss": 47.8368, "step": 836 }, { "epoch": 19.93134328358209, "grad_norm": 11.497776985168457, "learning_rate": 2.142857142857143e-07, "loss": 47.8226, "step": 837 }, { "epoch": 19.955223880597014, "grad_norm": 10.777889251708984, "learning_rate": 2.023809523809524e-07, "loss": 47.6424, "step": 838 }, { "epoch": 19.97910447761194, "grad_norm": 10.77852725982666, "learning_rate": 1.904761904761905e-07, "loss": 46.379, "step": 839 }, { "epoch": 20.0, "grad_norm": 13.582564353942871, "learning_rate": 1.7857142857142858e-07, "loss": 42.5239, "step": 840 }, { "epoch": 20.0, "step": 840, "total_flos": 4.130470305428237e+16, "train_loss": 49.47331008002872, "train_runtime": 26137.3223, "train_samples_per_second": 4.095, "train_steps_per_second": 0.032 }, { "epoch": 20.023880597014927, "grad_norm": 21.18770408630371, "learning_rate": 1e-05, "loss": 48.1474, "step": 841 }, { "epoch": 20.04776119402985, "grad_norm": Infinity, "learning_rate": 9.99404761904762e-06, "loss": 60.2758, "step": 842 }, { "epoch": 20.071641791044776, "grad_norm": Infinity, "learning_rate": 9.99404761904762e-06, "loss": 61.7211, "step": 843 }, { "epoch": 20.095522388059702, "grad_norm": 504.4407958984375, "learning_rate": 9.99404761904762e-06, "loss": 60.8189, "step": 844 }, { "epoch": 20.119402985074625, "grad_norm": 221.96849060058594, "learning_rate": 9.988095238095239e-06, "loss": 54.4658, "step": 845 }, { "epoch": 20.143283582089552, "grad_norm": 110.4036865234375, "learning_rate": 9.982142857142858e-06, "loss": 52.4242, "step": 846 }, { "epoch": 20.16716417910448, "grad_norm": 82.75493621826172, "learning_rate": 9.976190476190477e-06, "loss": 50.3129, "step": 847 }, { "epoch": 20.1910447761194, "grad_norm": 62.56040573120117, "learning_rate": 9.970238095238096e-06, "loss": 49.7171, "step": 848 }, { "epoch": 20.214925373134328, "grad_norm": 70.04007720947266, "learning_rate": 9.964285714285714e-06, "loss": 48.185, "step": 849 }, { "epoch": 20.238805970149254, "grad_norm": 56.70342254638672, "learning_rate": 9.958333333333334e-06, "loss": 49.5787, "step": 850 }, { "epoch": 20.262686567164177, "grad_norm": 64.66405487060547, "learning_rate": 9.952380952380954e-06, "loss": 49.6106, "step": 851 }, { "epoch": 20.286567164179104, "grad_norm": 43.37612533569336, "learning_rate": 9.946428571428572e-06, "loss": 49.2966, "step": 852 }, { "epoch": 20.31044776119403, "grad_norm": 42.66206359863281, "learning_rate": 9.940476190476192e-06, "loss": 48.7073, "step": 853 }, { "epoch": 20.334328358208957, "grad_norm": 37.17741775512695, "learning_rate": 9.93452380952381e-06, "loss": 48.7592, "step": 854 }, { "epoch": 20.35820895522388, "grad_norm": 39.27332305908203, "learning_rate": 9.92857142857143e-06, "loss": 48.1181, "step": 855 }, { "epoch": 20.382089552238806, "grad_norm": 31.37261390686035, "learning_rate": 9.922619047619048e-06, "loss": 47.4873, "step": 856 }, { "epoch": 20.405970149253733, "grad_norm": 41.693809509277344, "learning_rate": 9.916666666666668e-06, "loss": 48.9428, "step": 857 }, { "epoch": 20.429850746268656, "grad_norm": 29.33939552307129, "learning_rate": 9.910714285714288e-06, "loss": 49.2928, "step": 858 }, { "epoch": 20.453731343283582, "grad_norm": 30.606157302856445, "learning_rate": 9.904761904761906e-06, "loss": 49.1506, "step": 859 }, { "epoch": 20.47761194029851, "grad_norm": 27.273784637451172, "learning_rate": 9.898809523809525e-06, "loss": 46.6136, "step": 860 }, { "epoch": 20.501492537313432, "grad_norm": 24.410682678222656, "learning_rate": 9.892857142857143e-06, "loss": 48.3989, "step": 861 }, { "epoch": 20.52537313432836, "grad_norm": 24.138607025146484, "learning_rate": 9.886904761904763e-06, "loss": 49.3858, "step": 862 }, { "epoch": 20.549253731343285, "grad_norm": 27.50669288635254, "learning_rate": 9.880952380952381e-06, "loss": 48.5058, "step": 863 }, { "epoch": 20.573134328358208, "grad_norm": 27.739347457885742, "learning_rate": 9.875000000000001e-06, "loss": 49.676, "step": 864 }, { "epoch": 20.597014925373134, "grad_norm": 22.63895034790039, "learning_rate": 9.869047619047621e-06, "loss": 47.6998, "step": 865 }, { "epoch": 20.62089552238806, "grad_norm": 26.80891990661621, "learning_rate": 9.863095238095239e-06, "loss": 47.9571, "step": 866 }, { "epoch": 20.644776119402984, "grad_norm": 26.259008407592773, "learning_rate": 9.857142857142859e-06, "loss": 48.8771, "step": 867 }, { "epoch": 20.66865671641791, "grad_norm": 23.716773986816406, "learning_rate": 9.851190476190477e-06, "loss": 47.1255, "step": 868 }, { "epoch": 20.692537313432837, "grad_norm": 26.96156120300293, "learning_rate": 9.845238095238097e-06, "loss": 47.2227, "step": 869 }, { "epoch": 20.71641791044776, "grad_norm": 25.1954345703125, "learning_rate": 9.839285714285715e-06, "loss": 47.6847, "step": 870 }, { "epoch": 20.740298507462686, "grad_norm": 21.56642723083496, "learning_rate": 9.833333333333333e-06, "loss": 47.7292, "step": 871 }, { "epoch": 20.764179104477613, "grad_norm": 25.091773986816406, "learning_rate": 9.827380952380953e-06, "loss": 46.6588, "step": 872 }, { "epoch": 20.788059701492536, "grad_norm": 26.45799446105957, "learning_rate": 9.821428571428573e-06, "loss": 47.3963, "step": 873 }, { "epoch": 20.811940298507462, "grad_norm": 25.865068435668945, "learning_rate": 9.81547619047619e-06, "loss": 48.8823, "step": 874 }, { "epoch": 20.83582089552239, "grad_norm": 27.056106567382812, "learning_rate": 9.80952380952381e-06, "loss": 47.2222, "step": 875 }, { "epoch": 20.85970149253731, "grad_norm": 27.02417755126953, "learning_rate": 9.803571428571428e-06, "loss": 47.4543, "step": 876 }, { "epoch": 20.883582089552238, "grad_norm": 23.681915283203125, "learning_rate": 9.797619047619048e-06, "loss": 47.7518, "step": 877 }, { "epoch": 20.907462686567165, "grad_norm": 20.77193260192871, "learning_rate": 9.791666666666666e-06, "loss": 46.3164, "step": 878 }, { "epoch": 20.93134328358209, "grad_norm": 24.61642837524414, "learning_rate": 9.785714285714286e-06, "loss": 48.6711, "step": 879 }, { "epoch": 20.955223880597014, "grad_norm": 20.59898567199707, "learning_rate": 9.779761904761906e-06, "loss": 49.114, "step": 880 }, { "epoch": 20.97910447761194, "grad_norm": 24.815736770629883, "learning_rate": 9.773809523809524e-06, "loss": 48.4315, "step": 881 }, { "epoch": 21.0, "grad_norm": 17.920352935791016, "learning_rate": 9.767857142857144e-06, "loss": 41.3634, "step": 882 }, { "epoch": 21.023880597014927, "grad_norm": 26.69571876525879, "learning_rate": 9.761904761904762e-06, "loss": 47.8968, "step": 883 }, { "epoch": 21.04776119402985, "grad_norm": 23.156524658203125, "learning_rate": 9.755952380952382e-06, "loss": 48.5914, "step": 884 }, { "epoch": 21.071641791044776, "grad_norm": 21.612483978271484, "learning_rate": 9.75e-06, "loss": 47.8711, "step": 885 }, { "epoch": 21.095522388059702, "grad_norm": 24.346399307250977, "learning_rate": 9.74404761904762e-06, "loss": 48.8689, "step": 886 }, { "epoch": 21.119402985074625, "grad_norm": 21.973896026611328, "learning_rate": 9.73809523809524e-06, "loss": 46.7465, "step": 887 }, { "epoch": 21.143283582089552, "grad_norm": 20.034557342529297, "learning_rate": 9.732142857142858e-06, "loss": 47.1505, "step": 888 }, { "epoch": 21.16716417910448, "grad_norm": 20.113008499145508, "learning_rate": 9.726190476190477e-06, "loss": 47.9234, "step": 889 }, { "epoch": 21.1910447761194, "grad_norm": 24.743249893188477, "learning_rate": 9.720238095238095e-06, "loss": 47.4432, "step": 890 }, { "epoch": 21.214925373134328, "grad_norm": 25.538530349731445, "learning_rate": 9.714285714285715e-06, "loss": 47.0015, "step": 891 }, { "epoch": 21.238805970149254, "grad_norm": 27.43077278137207, "learning_rate": 9.708333333333333e-06, "loss": 48.6757, "step": 892 }, { "epoch": 21.262686567164177, "grad_norm": 25.34470558166504, "learning_rate": 9.702380952380953e-06, "loss": 46.8118, "step": 893 }, { "epoch": 21.286567164179104, "grad_norm": 29.590490341186523, "learning_rate": 9.696428571428573e-06, "loss": 47.5079, "step": 894 }, { "epoch": 21.31044776119403, "grad_norm": 16.418222427368164, "learning_rate": 9.690476190476191e-06, "loss": 48.7679, "step": 895 }, { "epoch": 21.334328358208957, "grad_norm": 30.906719207763672, "learning_rate": 9.68452380952381e-06, "loss": 48.4926, "step": 896 }, { "epoch": 21.35820895522388, "grad_norm": 30.252347946166992, "learning_rate": 9.678571428571429e-06, "loss": 48.9318, "step": 897 }, { "epoch": 21.382089552238806, "grad_norm": 30.137592315673828, "learning_rate": 9.672619047619049e-06, "loss": 47.0388, "step": 898 }, { "epoch": 21.405970149253733, "grad_norm": 25.297151565551758, "learning_rate": 9.666666666666667e-06, "loss": 47.334, "step": 899 }, { "epoch": 21.429850746268656, "grad_norm": 31.72736358642578, "learning_rate": 9.660714285714287e-06, "loss": 47.8769, "step": 900 }, { "epoch": 21.453731343283582, "grad_norm": 24.4852294921875, "learning_rate": 9.654761904761906e-06, "loss": 47.4009, "step": 901 }, { "epoch": 21.47761194029851, "grad_norm": 31.223567962646484, "learning_rate": 9.648809523809524e-06, "loss": 48.4972, "step": 902 }, { "epoch": 21.501492537313432, "grad_norm": 24.1851806640625, "learning_rate": 9.642857142857144e-06, "loss": 46.1818, "step": 903 }, { "epoch": 21.52537313432836, "grad_norm": NaN, "learning_rate": 9.636904761904762e-06, "loss": 70.1176, "step": 904 }, { "epoch": 21.549253731343285, "grad_norm": 29.140161514282227, "learning_rate": 9.636904761904762e-06, "loss": 47.1614, "step": 905 }, { "epoch": 21.573134328358208, "grad_norm": 31.186546325683594, "learning_rate": 9.630952380952382e-06, "loss": 47.3643, "step": 906 }, { "epoch": 21.597014925373134, "grad_norm": 24.395353317260742, "learning_rate": 9.625e-06, "loss": 48.3591, "step": 907 }, { "epoch": 21.62089552238806, "grad_norm": 29.287492752075195, "learning_rate": 9.61904761904762e-06, "loss": 47.261, "step": 908 }, { "epoch": 21.644776119402984, "grad_norm": 26.76996612548828, "learning_rate": 9.61309523809524e-06, "loss": 48.7017, "step": 909 }, { "epoch": 21.66865671641791, "grad_norm": 29.820920944213867, "learning_rate": 9.607142857142858e-06, "loss": 48.5165, "step": 910 }, { "epoch": 21.692537313432837, "grad_norm": 30.011823654174805, "learning_rate": 9.601190476190478e-06, "loss": 46.5558, "step": 911 }, { "epoch": 21.71641791044776, "grad_norm": 32.796905517578125, "learning_rate": 9.595238095238096e-06, "loss": 47.276, "step": 912 }, { "epoch": 21.740298507462686, "grad_norm": 28.798233032226562, "learning_rate": 9.589285714285716e-06, "loss": 47.6033, "step": 913 }, { "epoch": 21.764179104477613, "grad_norm": 31.51072120666504, "learning_rate": 9.583333333333335e-06, "loss": 48.1236, "step": 914 }, { "epoch": 21.788059701492536, "grad_norm": 20.611305236816406, "learning_rate": 9.577380952380953e-06, "loss": 48.2839, "step": 915 }, { "epoch": 21.811940298507462, "grad_norm": 26.748571395874023, "learning_rate": 9.571428571428573e-06, "loss": 48.2225, "step": 916 }, { "epoch": 21.83582089552239, "grad_norm": 22.262859344482422, "learning_rate": 9.565476190476191e-06, "loss": 46.661, "step": 917 }, { "epoch": 21.85970149253731, "grad_norm": 34.15045166015625, "learning_rate": 9.559523809523811e-06, "loss": 47.3229, "step": 918 }, { "epoch": 21.883582089552238, "grad_norm": 24.26387596130371, "learning_rate": 9.55357142857143e-06, "loss": 47.4686, "step": 919 }, { "epoch": 21.907462686567165, "grad_norm": 29.463472366333008, "learning_rate": 9.547619047619049e-06, "loss": 47.6019, "step": 920 }, { "epoch": 21.93134328358209, "grad_norm": 31.184497833251953, "learning_rate": 9.541666666666669e-06, "loss": 47.3228, "step": 921 }, { "epoch": 21.955223880597014, "grad_norm": 26.506031036376953, "learning_rate": 9.535714285714287e-06, "loss": 47.9961, "step": 922 }, { "epoch": 21.97910447761194, "grad_norm": 30.547340393066406, "learning_rate": 9.529761904761905e-06, "loss": 47.8973, "step": 923 }, { "epoch": 22.0, "grad_norm": 22.91999053955078, "learning_rate": 9.523809523809525e-06, "loss": 41.3426, "step": 924 }, { "epoch": 22.023880597014927, "grad_norm": 28.242450714111328, "learning_rate": 9.517857142857143e-06, "loss": 47.2478, "step": 925 }, { "epoch": 22.04776119402985, "grad_norm": 33.07649612426758, "learning_rate": 9.511904761904763e-06, "loss": 47.6489, "step": 926 }, { "epoch": 22.071641791044776, "grad_norm": 28.14696502685547, "learning_rate": 9.50595238095238e-06, "loss": 46.521, "step": 927 }, { "epoch": 22.095522388059702, "grad_norm": 34.472206115722656, "learning_rate": 9.5e-06, "loss": 47.6476, "step": 928 }, { "epoch": 22.119402985074625, "grad_norm": 25.370718002319336, "learning_rate": 9.494047619047619e-06, "loss": 47.7215, "step": 929 }, { "epoch": 22.143283582089552, "grad_norm": 31.77129554748535, "learning_rate": 9.488095238095238e-06, "loss": 46.5566, "step": 930 }, { "epoch": 22.16716417910448, "grad_norm": 25.42667579650879, "learning_rate": 9.482142857142858e-06, "loss": 47.9832, "step": 931 }, { "epoch": 22.1910447761194, "grad_norm": 26.3134765625, "learning_rate": 9.476190476190476e-06, "loss": 47.9402, "step": 932 }, { "epoch": 22.214925373134328, "grad_norm": 31.683523178100586, "learning_rate": 9.470238095238096e-06, "loss": 47.4404, "step": 933 }, { "epoch": 22.238805970149254, "grad_norm": 31.90761375427246, "learning_rate": 9.464285714285714e-06, "loss": 47.7601, "step": 934 }, { "epoch": 22.262686567164177, "grad_norm": 24.635921478271484, "learning_rate": 9.458333333333334e-06, "loss": 46.2573, "step": 935 }, { "epoch": 22.286567164179104, "grad_norm": 25.32915496826172, "learning_rate": 9.452380952380952e-06, "loss": 48.4756, "step": 936 }, { "epoch": 22.31044776119403, "grad_norm": 28.117773056030273, "learning_rate": 9.446428571428572e-06, "loss": 48.6971, "step": 937 }, { "epoch": 22.334328358208957, "grad_norm": 22.504152297973633, "learning_rate": 9.440476190476192e-06, "loss": 47.4534, "step": 938 }, { "epoch": 22.35820895522388, "grad_norm": 31.765676498413086, "learning_rate": 9.43452380952381e-06, "loss": 48.0168, "step": 939 }, { "epoch": 22.382089552238806, "grad_norm": 27.647945404052734, "learning_rate": 9.42857142857143e-06, "loss": 48.0918, "step": 940 }, { "epoch": 22.405970149253733, "grad_norm": 33.35643005371094, "learning_rate": 9.422619047619048e-06, "loss": 48.295, "step": 941 }, { "epoch": 22.429850746268656, "grad_norm": 26.12603187561035, "learning_rate": 9.416666666666667e-06, "loss": 48.8921, "step": 942 }, { "epoch": 22.453731343283582, "grad_norm": 23.728809356689453, "learning_rate": 9.410714285714286e-06, "loss": 47.3206, "step": 943 }, { "epoch": 22.47761194029851, "grad_norm": 28.772401809692383, "learning_rate": 9.404761904761905e-06, "loss": 47.6536, "step": 944 }, { "epoch": 22.501492537313432, "grad_norm": 28.205202102661133, "learning_rate": 9.398809523809525e-06, "loss": 47.1952, "step": 945 }, { "epoch": 22.52537313432836, "grad_norm": 33.80730438232422, "learning_rate": 9.392857142857143e-06, "loss": 47.1336, "step": 946 }, { "epoch": 22.549253731343285, "grad_norm": 25.538846969604492, "learning_rate": 9.386904761904763e-06, "loss": 46.4229, "step": 947 }, { "epoch": 22.573134328358208, "grad_norm": 41.13503646850586, "learning_rate": 9.380952380952381e-06, "loss": 46.8325, "step": 948 }, { "epoch": 22.597014925373134, "grad_norm": 36.823001861572266, "learning_rate": 9.375000000000001e-06, "loss": 47.205, "step": 949 }, { "epoch": 22.62089552238806, "grad_norm": 29.992229461669922, "learning_rate": 9.36904761904762e-06, "loss": 46.683, "step": 950 }, { "epoch": 22.644776119402984, "grad_norm": 40.20172882080078, "learning_rate": 9.363095238095239e-06, "loss": 48.4859, "step": 951 }, { "epoch": 22.66865671641791, "grad_norm": 27.357097625732422, "learning_rate": 9.357142857142859e-06, "loss": 47.2987, "step": 952 }, { "epoch": 22.692537313432837, "grad_norm": 40.66689682006836, "learning_rate": 9.351190476190477e-06, "loss": 46.3579, "step": 953 }, { "epoch": 22.71641791044776, "grad_norm": 35.37788391113281, "learning_rate": 9.345238095238096e-06, "loss": 47.3369, "step": 954 }, { "epoch": 22.740298507462686, "grad_norm": 36.279151916503906, "learning_rate": 9.339285714285715e-06, "loss": 47.1137, "step": 955 }, { "epoch": 22.764179104477613, "grad_norm": 27.949628829956055, "learning_rate": 9.333333333333334e-06, "loss": 47.1438, "step": 956 }, { "epoch": 22.788059701492536, "grad_norm": 45.424556732177734, "learning_rate": 9.327380952380954e-06, "loss": 48.3171, "step": 957 }, { "epoch": 22.811940298507462, "grad_norm": 27.726537704467773, "learning_rate": 9.321428571428572e-06, "loss": 47.1718, "step": 958 }, { "epoch": 22.83582089552239, "grad_norm": 58.36731719970703, "learning_rate": 9.315476190476192e-06, "loss": 47.5895, "step": 959 }, { "epoch": 22.85970149253731, "grad_norm": 58.96028137207031, "learning_rate": 9.30952380952381e-06, "loss": 47.4109, "step": 960 }, { "epoch": 22.883582089552238, "grad_norm": 24.928117752075195, "learning_rate": 9.30357142857143e-06, "loss": 48.1841, "step": 961 }, { "epoch": 22.907462686567165, "grad_norm": 38.36846160888672, "learning_rate": 9.297619047619048e-06, "loss": 47.7438, "step": 962 }, { "epoch": 22.93134328358209, "grad_norm": 37.60481643676758, "learning_rate": 9.291666666666668e-06, "loss": 46.5067, "step": 963 }, { "epoch": 22.955223880597014, "grad_norm": NaN, "learning_rate": 9.285714285714288e-06, "loss": 78.3124, "step": 964 }, { "epoch": 22.97910447761194, "grad_norm": 28.587425231933594, "learning_rate": 9.285714285714288e-06, "loss": 47.1599, "step": 965 }, { "epoch": 23.0, "grad_norm": 41.493404388427734, "learning_rate": 9.279761904761906e-06, "loss": 41.2983, "step": 966 }, { "epoch": 23.023880597014927, "grad_norm": 41.00606918334961, "learning_rate": 9.273809523809525e-06, "loss": 46.8696, "step": 967 }, { "epoch": 23.04776119402985, "grad_norm": 31.043148040771484, "learning_rate": 9.267857142857144e-06, "loss": 46.4614, "step": 968 }, { "epoch": 23.071641791044776, "grad_norm": 36.815940856933594, "learning_rate": 9.261904761904763e-06, "loss": 47.5987, "step": 969 }, { "epoch": 23.095522388059702, "grad_norm": 35.73536682128906, "learning_rate": 9.255952380952381e-06, "loss": 47.8339, "step": 970 }, { "epoch": 23.119402985074625, "grad_norm": 26.95656967163086, "learning_rate": 9.250000000000001e-06, "loss": 48.0632, "step": 971 }, { "epoch": 23.143283582089552, "grad_norm": 40.408348083496094, "learning_rate": 9.244047619047621e-06, "loss": 47.5458, "step": 972 }, { "epoch": 23.16716417910448, "grad_norm": 41.97018051147461, "learning_rate": 9.238095238095239e-06, "loss": 48.8528, "step": 973 }, { "epoch": 23.1910447761194, "grad_norm": 23.809162139892578, "learning_rate": 9.232142857142859e-06, "loss": 47.663, "step": 974 }, { "epoch": 23.214925373134328, "grad_norm": 36.0232048034668, "learning_rate": 9.226190476190477e-06, "loss": 47.496, "step": 975 }, { "epoch": 23.238805970149254, "grad_norm": 32.06623077392578, "learning_rate": 9.220238095238097e-06, "loss": 47.4472, "step": 976 }, { "epoch": 23.262686567164177, "grad_norm": 30.663307189941406, "learning_rate": 9.214285714285715e-06, "loss": 47.1342, "step": 977 }, { "epoch": 23.286567164179104, "grad_norm": 39.121437072753906, "learning_rate": 9.208333333333333e-06, "loss": 47.9977, "step": 978 }, { "epoch": 23.31044776119403, "grad_norm": 31.75649642944336, "learning_rate": 9.202380952380953e-06, "loss": 49.2196, "step": 979 }, { "epoch": 23.334328358208957, "grad_norm": 50.10381317138672, "learning_rate": 9.196428571428571e-06, "loss": 47.6487, "step": 980 }, { "epoch": 23.35820895522388, "grad_norm": 36.412906646728516, "learning_rate": 9.19047619047619e-06, "loss": 47.0012, "step": 981 }, { "epoch": 23.382089552238806, "grad_norm": 40.47570037841797, "learning_rate": 9.18452380952381e-06, "loss": 45.4449, "step": 982 }, { "epoch": 23.405970149253733, "grad_norm": 43.92324447631836, "learning_rate": 9.178571428571429e-06, "loss": 47.8727, "step": 983 }, { "epoch": 23.429850746268656, "grad_norm": 28.896121978759766, "learning_rate": 9.172619047619048e-06, "loss": 47.8489, "step": 984 }, { "epoch": 23.453731343283582, "grad_norm": 37.02536392211914, "learning_rate": 9.166666666666666e-06, "loss": 48.4484, "step": 985 }, { "epoch": 23.47761194029851, "grad_norm": 26.289518356323242, "learning_rate": 9.160714285714286e-06, "loss": 47.0221, "step": 986 }, { "epoch": 23.501492537313432, "grad_norm": 33.60945129394531, "learning_rate": 9.154761904761906e-06, "loss": 47.7454, "step": 987 }, { "epoch": 23.52537313432836, "grad_norm": 31.284311294555664, "learning_rate": 9.148809523809524e-06, "loss": 47.0558, "step": 988 }, { "epoch": 23.549253731343285, "grad_norm": 30.488906860351562, "learning_rate": 9.142857142857144e-06, "loss": 46.4408, "step": 989 }, { "epoch": 23.573134328358208, "grad_norm": 34.29289245605469, "learning_rate": 9.136904761904762e-06, "loss": 46.2796, "step": 990 }, { "epoch": 23.597014925373134, "grad_norm": 22.803457260131836, "learning_rate": 9.130952380952382e-06, "loss": 47.2684, "step": 991 }, { "epoch": 23.62089552238806, "grad_norm": 33.18730926513672, "learning_rate": 9.125e-06, "loss": 47.5, "step": 992 }, { "epoch": 23.644776119402984, "grad_norm": 28.421592712402344, "learning_rate": 9.11904761904762e-06, "loss": 46.7508, "step": 993 }, { "epoch": 23.66865671641791, "grad_norm": 23.492319107055664, "learning_rate": 9.11309523809524e-06, "loss": 46.6042, "step": 994 }, { "epoch": 23.692537313432837, "grad_norm": 30.10877227783203, "learning_rate": 9.107142857142858e-06, "loss": 46.2632, "step": 995 }, { "epoch": 23.71641791044776, "grad_norm": 23.64444351196289, "learning_rate": 9.101190476190477e-06, "loss": 47.2817, "step": 996 }, { "epoch": 23.740298507462686, "grad_norm": 28.243606567382812, "learning_rate": 9.095238095238095e-06, "loss": 47.1196, "step": 997 }, { "epoch": 23.764179104477613, "grad_norm": 26.84208869934082, "learning_rate": 9.089285714285715e-06, "loss": 46.6631, "step": 998 }, { "epoch": 23.788059701492536, "grad_norm": 29.558794021606445, "learning_rate": 9.083333333333333e-06, "loss": 45.8711, "step": 999 }, { "epoch": 23.811940298507462, "grad_norm": 25.105928421020508, "learning_rate": 9.077380952380953e-06, "loss": 47.8253, "step": 1000 }, { "epoch": 23.83582089552239, "grad_norm": NaN, "learning_rate": 9.071428571428573e-06, "loss": 82.5048, "step": 1001 }, { "epoch": 23.85970149253731, "grad_norm": 25.548643112182617, "learning_rate": 9.071428571428573e-06, "loss": 47.5042, "step": 1002 }, { "epoch": 23.883582089552238, "grad_norm": 28.8011531829834, "learning_rate": 9.065476190476191e-06, "loss": 47.0084, "step": 1003 }, { "epoch": 23.907462686567165, "grad_norm": 31.907651901245117, "learning_rate": 9.05952380952381e-06, "loss": 48.287, "step": 1004 }, { "epoch": 23.93134328358209, "grad_norm": 32.044986724853516, "learning_rate": 9.053571428571429e-06, "loss": 47.276, "step": 1005 }, { "epoch": 23.955223880597014, "grad_norm": 31.224260330200195, "learning_rate": 9.047619047619049e-06, "loss": 47.4774, "step": 1006 }, { "epoch": 23.97910447761194, "grad_norm": 29.830835342407227, "learning_rate": 9.041666666666667e-06, "loss": 47.7031, "step": 1007 }, { "epoch": 24.0, "grad_norm": 25.12934112548828, "learning_rate": 9.035714285714287e-06, "loss": 41.8156, "step": 1008 }, { "epoch": 24.023880597014927, "grad_norm": 31.172348022460938, "learning_rate": 9.029761904761906e-06, "loss": 48.0591, "step": 1009 }, { "epoch": 24.04776119402985, "grad_norm": 26.59412956237793, "learning_rate": 9.023809523809524e-06, "loss": 47.6291, "step": 1010 }, { "epoch": 24.071641791044776, "grad_norm": 29.16905975341797, "learning_rate": 9.017857142857144e-06, "loss": 47.4587, "step": 1011 }, { "epoch": 24.095522388059702, "grad_norm": 33.05836868286133, "learning_rate": 9.011904761904762e-06, "loss": 47.8748, "step": 1012 }, { "epoch": 24.119402985074625, "grad_norm": 26.13016700744629, "learning_rate": 9.005952380952382e-06, "loss": 48.0003, "step": 1013 }, { "epoch": 24.143283582089552, "grad_norm": 29.883411407470703, "learning_rate": 9e-06, "loss": 47.0188, "step": 1014 }, { "epoch": 24.16716417910448, "grad_norm": 29.039255142211914, "learning_rate": 8.99404761904762e-06, "loss": 46.8844, "step": 1015 }, { "epoch": 24.1910447761194, "grad_norm": 26.532760620117188, "learning_rate": 8.98809523809524e-06, "loss": 47.1817, "step": 1016 }, { "epoch": 24.214925373134328, "grad_norm": 30.146087646484375, "learning_rate": 8.982142857142858e-06, "loss": 46.4863, "step": 1017 }, { "epoch": 24.238805970149254, "grad_norm": 27.027935028076172, "learning_rate": 8.976190476190478e-06, "loss": 45.7162, "step": 1018 }, { "epoch": 24.262686567164177, "grad_norm": 27.315515518188477, "learning_rate": 8.970238095238096e-06, "loss": 46.6337, "step": 1019 }, { "epoch": 24.286567164179104, "grad_norm": 25.63303565979004, "learning_rate": 8.964285714285716e-06, "loss": 46.5452, "step": 1020 }, { "epoch": 24.31044776119403, "grad_norm": 22.407268524169922, "learning_rate": 8.958333333333334e-06, "loss": 47.3262, "step": 1021 }, { "epoch": 24.334328358208957, "grad_norm": 19.4051513671875, "learning_rate": 8.952380952380953e-06, "loss": 46.8407, "step": 1022 }, { "epoch": 24.35820895522388, "grad_norm": NaN, "learning_rate": 8.946428571428573e-06, "loss": 77.1735, "step": 1023 }, { "epoch": 24.382089552238806, "grad_norm": 26.870166778564453, "learning_rate": 8.946428571428573e-06, "loss": 46.9395, "step": 1024 }, { "epoch": 24.405970149253733, "grad_norm": 21.54165267944336, "learning_rate": 8.940476190476191e-06, "loss": 47.2505, "step": 1025 }, { "epoch": 24.429850746268656, "grad_norm": 29.317501068115234, "learning_rate": 8.934523809523811e-06, "loss": 48.0554, "step": 1026 }, { "epoch": 24.453731343283582, "grad_norm": 27.45809555053711, "learning_rate": 8.92857142857143e-06, "loss": 47.0495, "step": 1027 }, { "epoch": 24.47761194029851, "grad_norm": NaN, "learning_rate": 8.922619047619049e-06, "loss": 66.51, "step": 1028 }, { "epoch": 24.501492537313432, "grad_norm": 23.169204711914062, "learning_rate": 8.922619047619049e-06, "loss": 47.5902, "step": 1029 }, { "epoch": 24.52537313432836, "grad_norm": 31.986774444580078, "learning_rate": 8.916666666666667e-06, "loss": 47.4281, "step": 1030 }, { "epoch": 24.549253731343285, "grad_norm": 27.15190315246582, "learning_rate": 8.910714285714287e-06, "loss": 46.3638, "step": 1031 }, { "epoch": 24.573134328358208, "grad_norm": 26.88028335571289, "learning_rate": 8.904761904761905e-06, "loss": 45.0491, "step": 1032 }, { "epoch": 24.597014925373134, "grad_norm": 27.693952560424805, "learning_rate": 8.898809523809525e-06, "loss": 47.6471, "step": 1033 }, { "epoch": 24.62089552238806, "grad_norm": 33.45442581176758, "learning_rate": 8.892857142857143e-06, "loss": 47.1459, "step": 1034 }, { "epoch": 24.644776119402984, "grad_norm": 29.933320999145508, "learning_rate": 8.886904761904763e-06, "loss": 46.9218, "step": 1035 }, { "epoch": 24.66865671641791, "grad_norm": 26.401569366455078, "learning_rate": 8.88095238095238e-06, "loss": 47.7027, "step": 1036 }, { "epoch": 24.692537313432837, "grad_norm": 26.92498016357422, "learning_rate": 8.875e-06, "loss": 47.2302, "step": 1037 }, { "epoch": 24.71641791044776, "grad_norm": 28.368043899536133, "learning_rate": 8.869047619047619e-06, "loss": 47.4479, "step": 1038 }, { "epoch": 24.740298507462686, "grad_norm": 27.319650650024414, "learning_rate": 8.863095238095238e-06, "loss": 47.4652, "step": 1039 }, { "epoch": 24.764179104477613, "grad_norm": 37.10929870605469, "learning_rate": 8.857142857142858e-06, "loss": 47.3543, "step": 1040 }, { "epoch": 24.788059701492536, "grad_norm": 32.430416107177734, "learning_rate": 8.851190476190476e-06, "loss": 46.1406, "step": 1041 }, { "epoch": 24.811940298507462, "grad_norm": 33.29399108886719, "learning_rate": 8.845238095238096e-06, "loss": 47.1917, "step": 1042 }, { "epoch": 24.83582089552239, "grad_norm": 32.72507095336914, "learning_rate": 8.839285714285714e-06, "loss": 47.677, "step": 1043 }, { "epoch": 24.85970149253731, "grad_norm": 25.997148513793945, "learning_rate": 8.833333333333334e-06, "loss": 47.6851, "step": 1044 }, { "epoch": 24.883582089552238, "grad_norm": 33.00047302246094, "learning_rate": 8.827380952380952e-06, "loss": 47.5326, "step": 1045 }, { "epoch": 24.907462686567165, "grad_norm": 33.95719528198242, "learning_rate": 8.821428571428572e-06, "loss": 47.2836, "step": 1046 }, { "epoch": 24.93134328358209, "grad_norm": 31.353008270263672, "learning_rate": 8.815476190476192e-06, "loss": 47.8318, "step": 1047 }, { "epoch": 24.955223880597014, "grad_norm": 27.4250545501709, "learning_rate": 8.80952380952381e-06, "loss": 47.0066, "step": 1048 }, { "epoch": 24.97910447761194, "grad_norm": 30.22010612487793, "learning_rate": 8.80357142857143e-06, "loss": 46.6991, "step": 1049 }, { "epoch": 25.0, "grad_norm": 19.693180084228516, "learning_rate": 8.797619047619048e-06, "loss": 41.6055, "step": 1050 }, { "epoch": 25.023880597014927, "grad_norm": 24.590662002563477, "learning_rate": 8.791666666666667e-06, "loss": 46.3536, "step": 1051 }, { "epoch": 25.04776119402985, "grad_norm": 28.37199592590332, "learning_rate": 8.785714285714286e-06, "loss": 47.8334, "step": 1052 }, { "epoch": 25.071641791044776, "grad_norm": 26.38755226135254, "learning_rate": 8.779761904761905e-06, "loss": 47.6486, "step": 1053 }, { "epoch": 25.095522388059702, "grad_norm": 27.338485717773438, "learning_rate": 8.773809523809525e-06, "loss": 47.3044, "step": 1054 }, { "epoch": 25.119402985074625, "grad_norm": 25.308486938476562, "learning_rate": 8.767857142857143e-06, "loss": 45.6873, "step": 1055 }, { "epoch": 25.143283582089552, "grad_norm": 30.886962890625, "learning_rate": 8.761904761904763e-06, "loss": 46.8938, "step": 1056 }, { "epoch": 25.16716417910448, "grad_norm": 25.25688934326172, "learning_rate": 8.755952380952381e-06, "loss": 47.4858, "step": 1057 }, { "epoch": 25.1910447761194, "grad_norm": 30.462963104248047, "learning_rate": 8.750000000000001e-06, "loss": 46.6334, "step": 1058 }, { "epoch": 25.214925373134328, "grad_norm": 22.87471580505371, "learning_rate": 8.744047619047619e-06, "loss": 46.0966, "step": 1059 }, { "epoch": 25.238805970149254, "grad_norm": 23.413904190063477, "learning_rate": 8.738095238095239e-06, "loss": 46.8938, "step": 1060 }, { "epoch": 25.262686567164177, "grad_norm": 26.926279067993164, "learning_rate": 8.732142857142859e-06, "loss": 46.3773, "step": 1061 }, { "epoch": 25.286567164179104, "grad_norm": 27.595348358154297, "learning_rate": 8.726190476190477e-06, "loss": 48.0235, "step": 1062 }, { "epoch": 25.31044776119403, "grad_norm": 26.124523162841797, "learning_rate": 8.720238095238096e-06, "loss": 46.6863, "step": 1063 }, { "epoch": 25.334328358208957, "grad_norm": 28.308120727539062, "learning_rate": 8.714285714285715e-06, "loss": 47.7158, "step": 1064 }, { "epoch": 25.35820895522388, "grad_norm": 23.434846878051758, "learning_rate": 8.708333333333334e-06, "loss": 47.2951, "step": 1065 }, { "epoch": 25.382089552238806, "grad_norm": 26.917911529541016, "learning_rate": 8.702380952380952e-06, "loss": 45.7266, "step": 1066 }, { "epoch": 25.405970149253733, "grad_norm": 19.7725772857666, "learning_rate": 8.696428571428572e-06, "loss": 46.5458, "step": 1067 }, { "epoch": 25.429850746268656, "grad_norm": 27.18629264831543, "learning_rate": 8.690476190476192e-06, "loss": 46.3133, "step": 1068 }, { "epoch": 25.453731343283582, "grad_norm": 26.112865447998047, "learning_rate": 8.68452380952381e-06, "loss": 46.6383, "step": 1069 }, { "epoch": 25.47761194029851, "grad_norm": 19.385990142822266, "learning_rate": 8.67857142857143e-06, "loss": 46.5541, "step": 1070 }, { "epoch": 25.501492537313432, "grad_norm": 26.713350296020508, "learning_rate": 8.672619047619048e-06, "loss": 48.045, "step": 1071 }, { "epoch": 25.52537313432836, "grad_norm": 29.80147933959961, "learning_rate": 8.666666666666668e-06, "loss": 47.4443, "step": 1072 }, { "epoch": 25.549253731343285, "grad_norm": 23.674266815185547, "learning_rate": 8.660714285714286e-06, "loss": 46.6662, "step": 1073 }, { "epoch": 25.573134328358208, "grad_norm": 46.435401916503906, "learning_rate": 8.654761904761906e-06, "loss": 46.9276, "step": 1074 }, { "epoch": 25.597014925373134, "grad_norm": 35.016502380371094, "learning_rate": 8.648809523809526e-06, "loss": 47.6811, "step": 1075 }, { "epoch": 25.62089552238806, "grad_norm": 42.57990646362305, "learning_rate": 8.642857142857144e-06, "loss": 46.5684, "step": 1076 }, { "epoch": 25.644776119402984, "grad_norm": 36.2376708984375, "learning_rate": 8.636904761904763e-06, "loss": 46.1807, "step": 1077 }, { "epoch": 25.66865671641791, "grad_norm": 41.44023895263672, "learning_rate": 8.630952380952381e-06, "loss": 46.0823, "step": 1078 }, { "epoch": 25.692537313432837, "grad_norm": 43.62863540649414, "learning_rate": 8.625000000000001e-06, "loss": 47.9958, "step": 1079 }, { "epoch": 25.71641791044776, "grad_norm": 34.232120513916016, "learning_rate": 8.61904761904762e-06, "loss": 47.9585, "step": 1080 }, { "epoch": 25.740298507462686, "grad_norm": 38.023197174072266, "learning_rate": 8.61309523809524e-06, "loss": 47.7344, "step": 1081 }, { "epoch": 25.764179104477613, "grad_norm": 37.89833068847656, "learning_rate": 8.607142857142859e-06, "loss": 47.8956, "step": 1082 }, { "epoch": 25.788059701492536, "grad_norm": 33.03269958496094, "learning_rate": 8.601190476190477e-06, "loss": 47.7408, "step": 1083 }, { "epoch": 25.811940298507462, "grad_norm": 44.31171798706055, "learning_rate": 8.595238095238097e-06, "loss": 47.6232, "step": 1084 }, { "epoch": 25.83582089552239, "grad_norm": 42.54961395263672, "learning_rate": 8.589285714285715e-06, "loss": 47.9757, "step": 1085 }, { "epoch": 25.85970149253731, "grad_norm": 27.695526123046875, "learning_rate": 8.583333333333333e-06, "loss": 47.0934, "step": 1086 }, { "epoch": 25.883582089552238, "grad_norm": 32.62801742553711, "learning_rate": 8.577380952380953e-06, "loss": 47.1186, "step": 1087 }, { "epoch": 25.907462686567165, "grad_norm": 26.777305603027344, "learning_rate": 8.571428571428571e-06, "loss": 47.2931, "step": 1088 }, { "epoch": 25.93134328358209, "grad_norm": 24.382678985595703, "learning_rate": 8.56547619047619e-06, "loss": 46.4698, "step": 1089 }, { "epoch": 25.955223880597014, "grad_norm": 34.310150146484375, "learning_rate": 8.55952380952381e-06, "loss": 46.0509, "step": 1090 }, { "epoch": 25.97910447761194, "grad_norm": 27.468976974487305, "learning_rate": 8.553571428571429e-06, "loss": 46.9123, "step": 1091 }, { "epoch": 26.0, "grad_norm": 27.90901756286621, "learning_rate": 8.547619047619048e-06, "loss": 41.8265, "step": 1092 }, { "epoch": 26.023880597014927, "grad_norm": 28.853416442871094, "learning_rate": 8.541666666666666e-06, "loss": 47.9612, "step": 1093 }, { "epoch": 26.04776119402985, "grad_norm": 31.96144676208496, "learning_rate": 8.535714285714286e-06, "loss": 46.8167, "step": 1094 }, { "epoch": 26.071641791044776, "grad_norm": 33.179141998291016, "learning_rate": 8.529761904761904e-06, "loss": 48.0464, "step": 1095 }, { "epoch": 26.095522388059702, "grad_norm": 32.18705368041992, "learning_rate": 8.523809523809524e-06, "loss": 45.6743, "step": 1096 }, { "epoch": 26.119402985074625, "grad_norm": 26.125934600830078, "learning_rate": 8.517857142857144e-06, "loss": 46.4944, "step": 1097 }, { "epoch": 26.143283582089552, "grad_norm": 31.666461944580078, "learning_rate": 8.511904761904762e-06, "loss": 47.6152, "step": 1098 }, { "epoch": 26.16716417910448, "grad_norm": 29.90437889099121, "learning_rate": 8.505952380952382e-06, "loss": 46.4497, "step": 1099 }, { "epoch": 26.1910447761194, "grad_norm": 32.59938430786133, "learning_rate": 8.5e-06, "loss": 47.1877, "step": 1100 }, { "epoch": 26.214925373134328, "grad_norm": 28.368562698364258, "learning_rate": 8.49404761904762e-06, "loss": 46.4898, "step": 1101 }, { "epoch": 26.238805970149254, "grad_norm": 31.274070739746094, "learning_rate": 8.488095238095238e-06, "loss": 47.1507, "step": 1102 }, { "epoch": 26.262686567164177, "grad_norm": 24.63444709777832, "learning_rate": 8.482142857142858e-06, "loss": 47.3659, "step": 1103 }, { "epoch": 26.286567164179104, "grad_norm": 35.413970947265625, "learning_rate": 8.476190476190477e-06, "loss": 46.6459, "step": 1104 }, { "epoch": 26.31044776119403, "grad_norm": 27.774656295776367, "learning_rate": 8.470238095238095e-06, "loss": 47.4369, "step": 1105 }, { "epoch": 26.334328358208957, "grad_norm": 32.258155822753906, "learning_rate": 8.464285714285715e-06, "loss": 47.487, "step": 1106 }, { "epoch": 26.35820895522388, "grad_norm": 18.22418785095215, "learning_rate": 8.458333333333333e-06, "loss": 46.948, "step": 1107 }, { "epoch": 26.382089552238806, "grad_norm": 24.50945472717285, "learning_rate": 8.452380952380953e-06, "loss": 45.7291, "step": 1108 }, { "epoch": 26.405970149253733, "grad_norm": 23.20486831665039, "learning_rate": 8.446428571428571e-06, "loss": 46.8704, "step": 1109 }, { "epoch": 26.429850746268656, "grad_norm": 20.810514450073242, "learning_rate": 8.440476190476191e-06, "loss": 47.3419, "step": 1110 }, { "epoch": 26.453731343283582, "grad_norm": 27.68440818786621, "learning_rate": 8.434523809523811e-06, "loss": 47.0641, "step": 1111 }, { "epoch": 26.47761194029851, "grad_norm": 26.989046096801758, "learning_rate": 8.428571428571429e-06, "loss": 46.9324, "step": 1112 }, { "epoch": 26.501492537313432, "grad_norm": 25.986888885498047, "learning_rate": 8.422619047619049e-06, "loss": 48.6179, "step": 1113 }, { "epoch": 26.52537313432836, "grad_norm": 28.111356735229492, "learning_rate": 8.416666666666667e-06, "loss": 46.4251, "step": 1114 }, { "epoch": 26.549253731343285, "grad_norm": 24.306228637695312, "learning_rate": 8.410714285714287e-06, "loss": 46.4379, "step": 1115 }, { "epoch": 26.573134328358208, "grad_norm": 23.894895553588867, "learning_rate": 8.404761904761905e-06, "loss": 46.665, "step": 1116 }, { "epoch": 26.597014925373134, "grad_norm": 25.917400360107422, "learning_rate": 8.398809523809525e-06, "loss": 46.6619, "step": 1117 }, { "epoch": 26.62089552238806, "grad_norm": 21.423585891723633, "learning_rate": 8.392857142857144e-06, "loss": 46.3447, "step": 1118 }, { "epoch": 26.644776119402984, "grad_norm": 29.13437271118164, "learning_rate": 8.386904761904762e-06, "loss": 46.4292, "step": 1119 }, { "epoch": 26.66865671641791, "grad_norm": 25.711469650268555, "learning_rate": 8.380952380952382e-06, "loss": 46.6156, "step": 1120 }, { "epoch": 26.692537313432837, "grad_norm": 26.55695915222168, "learning_rate": 8.375e-06, "loss": 46.7429, "step": 1121 }, { "epoch": 26.71641791044776, "grad_norm": 27.66262435913086, "learning_rate": 8.36904761904762e-06, "loss": 47.1275, "step": 1122 }, { "epoch": 26.740298507462686, "grad_norm": 33.85395050048828, "learning_rate": 8.36309523809524e-06, "loss": 46.7244, "step": 1123 }, { "epoch": 26.764179104477613, "grad_norm": 29.51833152770996, "learning_rate": 8.357142857142858e-06, "loss": 47.2072, "step": 1124 }, { "epoch": 26.788059701492536, "grad_norm": 26.21416664123535, "learning_rate": 8.351190476190478e-06, "loss": 47.8298, "step": 1125 }, { "epoch": 26.811940298507462, "grad_norm": 31.24039649963379, "learning_rate": 8.345238095238096e-06, "loss": 46.8069, "step": 1126 }, { "epoch": 26.83582089552239, "grad_norm": 32.19520568847656, "learning_rate": 8.339285714285716e-06, "loss": 47.1494, "step": 1127 }, { "epoch": 26.85970149253731, "grad_norm": 29.194063186645508, "learning_rate": 8.333333333333334e-06, "loss": 46.1827, "step": 1128 }, { "epoch": 26.883582089552238, "grad_norm": 28.723541259765625, "learning_rate": 8.327380952380954e-06, "loss": 46.8513, "step": 1129 }, { "epoch": 26.907462686567165, "grad_norm": 26.91135597229004, "learning_rate": 8.321428571428573e-06, "loss": 46.561, "step": 1130 }, { "epoch": 26.93134328358209, "grad_norm": 25.60898208618164, "learning_rate": 8.315476190476191e-06, "loss": 46.4706, "step": 1131 }, { "epoch": 26.955223880597014, "grad_norm": 23.72539520263672, "learning_rate": 8.309523809523811e-06, "loss": 46.7871, "step": 1132 }, { "epoch": 26.97910447761194, "grad_norm": 25.030731201171875, "learning_rate": 8.30357142857143e-06, "loss": 46.2433, "step": 1133 }, { "epoch": 27.0, "grad_norm": 25.439281463623047, "learning_rate": 8.297619047619049e-06, "loss": 40.9101, "step": 1134 }, { "epoch": 27.023880597014927, "grad_norm": 29.52981948852539, "learning_rate": 8.291666666666667e-06, "loss": 46.9385, "step": 1135 }, { "epoch": 27.04776119402985, "grad_norm": 22.007299423217773, "learning_rate": 8.285714285714287e-06, "loss": 47.2751, "step": 1136 }, { "epoch": 27.071641791044776, "grad_norm": 31.965675354003906, "learning_rate": 8.279761904761905e-06, "loss": 47.7763, "step": 1137 }, { "epoch": 27.095522388059702, "grad_norm": 23.38637351989746, "learning_rate": 8.273809523809523e-06, "loss": 46.0412, "step": 1138 }, { "epoch": 27.119402985074625, "grad_norm": 24.295711517333984, "learning_rate": 8.267857142857143e-06, "loss": 46.8552, "step": 1139 }, { "epoch": 27.143283582089552, "grad_norm": 20.915624618530273, "learning_rate": 8.261904761904763e-06, "loss": 46.3041, "step": 1140 }, { "epoch": 27.16716417910448, "grad_norm": 28.25569725036621, "learning_rate": 8.25595238095238e-06, "loss": 45.8736, "step": 1141 }, { "epoch": 27.1910447761194, "grad_norm": 24.8399658203125, "learning_rate": 8.25e-06, "loss": 46.0629, "step": 1142 }, { "epoch": 27.214925373134328, "grad_norm": 21.237272262573242, "learning_rate": 8.244047619047619e-06, "loss": 47.1252, "step": 1143 }, { "epoch": 27.238805970149254, "grad_norm": 24.35887336730957, "learning_rate": 8.238095238095239e-06, "loss": 46.9687, "step": 1144 }, { "epoch": 27.262686567164177, "grad_norm": 26.583545684814453, "learning_rate": 8.232142857142857e-06, "loss": 47.1486, "step": 1145 }, { "epoch": 27.286567164179104, "grad_norm": 23.712989807128906, "learning_rate": 8.226190476190476e-06, "loss": 46.4769, "step": 1146 }, { "epoch": 27.31044776119403, "grad_norm": 18.466094970703125, "learning_rate": 8.220238095238096e-06, "loss": 46.6911, "step": 1147 }, { "epoch": 27.334328358208957, "grad_norm": 31.812236785888672, "learning_rate": 8.214285714285714e-06, "loss": 47.6261, "step": 1148 }, { "epoch": 27.35820895522388, "grad_norm": 23.437780380249023, "learning_rate": 8.208333333333334e-06, "loss": 46.1776, "step": 1149 }, { "epoch": 27.382089552238806, "grad_norm": 27.252187728881836, "learning_rate": 8.202380952380952e-06, "loss": 44.8499, "step": 1150 }, { "epoch": 27.405970149253733, "grad_norm": 24.425500869750977, "learning_rate": 8.196428571428572e-06, "loss": 46.5397, "step": 1151 }, { "epoch": 27.429850746268656, "grad_norm": 28.237712860107422, "learning_rate": 8.190476190476192e-06, "loss": 46.8922, "step": 1152 }, { "epoch": 27.453731343283582, "grad_norm": 23.262300491333008, "learning_rate": 8.18452380952381e-06, "loss": 47.4204, "step": 1153 }, { "epoch": 27.47761194029851, "grad_norm": 20.69318389892578, "learning_rate": 8.17857142857143e-06, "loss": 47.1507, "step": 1154 }, { "epoch": 27.501492537313432, "grad_norm": 30.182701110839844, "learning_rate": 8.172619047619048e-06, "loss": 46.198, "step": 1155 }, { "epoch": 27.52537313432836, "grad_norm": 28.804855346679688, "learning_rate": 8.166666666666668e-06, "loss": 46.1366, "step": 1156 }, { "epoch": 27.549253731343285, "grad_norm": 26.992097854614258, "learning_rate": 8.160714285714286e-06, "loss": 47.3639, "step": 1157 }, { "epoch": 27.573134328358208, "grad_norm": 27.22978401184082, "learning_rate": 8.154761904761905e-06, "loss": 46.7295, "step": 1158 }, { "epoch": 27.597014925373134, "grad_norm": 24.036380767822266, "learning_rate": 8.148809523809525e-06, "loss": 46.7824, "step": 1159 }, { "epoch": 27.62089552238806, "grad_norm": 28.67648696899414, "learning_rate": 8.142857142857143e-06, "loss": 46.9712, "step": 1160 }, { "epoch": 27.644776119402984, "grad_norm": 27.389991760253906, "learning_rate": 8.136904761904763e-06, "loss": 47.6263, "step": 1161 }, { "epoch": 27.66865671641791, "grad_norm": 28.887022018432617, "learning_rate": 8.130952380952381e-06, "loss": 47.7553, "step": 1162 }, { "epoch": 27.692537313432837, "grad_norm": 17.354753494262695, "learning_rate": 8.125000000000001e-06, "loss": 46.1359, "step": 1163 }, { "epoch": 27.71641791044776, "grad_norm": 26.72220230102539, "learning_rate": 8.119047619047619e-06, "loss": 46.3653, "step": 1164 }, { "epoch": 27.740298507462686, "grad_norm": 22.09579849243164, "learning_rate": 8.113095238095239e-06, "loss": 46.856, "step": 1165 }, { "epoch": 27.764179104477613, "grad_norm": 30.197189331054688, "learning_rate": 8.107142857142859e-06, "loss": 46.4435, "step": 1166 }, { "epoch": 27.788059701492536, "grad_norm": 28.597610473632812, "learning_rate": 8.101190476190477e-06, "loss": 48.3097, "step": 1167 }, { "epoch": 27.811940298507462, "grad_norm": 22.391801834106445, "learning_rate": 8.095238095238097e-06, "loss": 47.2598, "step": 1168 }, { "epoch": 27.83582089552239, "grad_norm": 28.523584365844727, "learning_rate": 8.089285714285715e-06, "loss": 46.3123, "step": 1169 }, { "epoch": 27.85970149253731, "grad_norm": 21.646997451782227, "learning_rate": 8.083333333333334e-06, "loss": 46.2853, "step": 1170 }, { "epoch": 27.883582089552238, "grad_norm": 22.68369483947754, "learning_rate": 8.077380952380953e-06, "loss": 46.1355, "step": 1171 }, { "epoch": 27.907462686567165, "grad_norm": 25.581987380981445, "learning_rate": 8.071428571428572e-06, "loss": 45.5431, "step": 1172 }, { "epoch": 27.93134328358209, "grad_norm": 26.512523651123047, "learning_rate": 8.065476190476192e-06, "loss": 47.1898, "step": 1173 }, { "epoch": 27.955223880597014, "grad_norm": 26.89809226989746, "learning_rate": 8.05952380952381e-06, "loss": 46.8108, "step": 1174 }, { "epoch": 27.97910447761194, "grad_norm": 23.638704299926758, "learning_rate": 8.05357142857143e-06, "loss": 47.1454, "step": 1175 }, { "epoch": 28.0, "grad_norm": 28.61042022705078, "learning_rate": 8.047619047619048e-06, "loss": 41.7161, "step": 1176 }, { "epoch": 28.023880597014927, "grad_norm": 31.2153377532959, "learning_rate": 8.041666666666668e-06, "loss": 46.8925, "step": 1177 }, { "epoch": 28.04776119402985, "grad_norm": 26.284482955932617, "learning_rate": 8.035714285714286e-06, "loss": 45.7065, "step": 1178 }, { "epoch": 28.071641791044776, "grad_norm": 30.96581268310547, "learning_rate": 8.029761904761906e-06, "loss": 46.5612, "step": 1179 }, { "epoch": 28.095522388059702, "grad_norm": 24.686336517333984, "learning_rate": 8.023809523809526e-06, "loss": 46.4376, "step": 1180 }, { "epoch": 28.119402985074625, "grad_norm": 27.783416748046875, "learning_rate": 8.017857142857144e-06, "loss": 47.247, "step": 1181 }, { "epoch": 28.143283582089552, "grad_norm": 33.3108024597168, "learning_rate": 8.011904761904763e-06, "loss": 47.3171, "step": 1182 }, { "epoch": 28.16716417910448, "grad_norm": 30.010536193847656, "learning_rate": 8.005952380952382e-06, "loss": 45.9761, "step": 1183 }, { "epoch": 28.1910447761194, "grad_norm": 29.399965286254883, "learning_rate": 8.000000000000001e-06, "loss": 47.3345, "step": 1184 }, { "epoch": 28.214925373134328, "grad_norm": 25.835142135620117, "learning_rate": 7.99404761904762e-06, "loss": 46.3395, "step": 1185 }, { "epoch": 28.238805970149254, "grad_norm": 26.06570053100586, "learning_rate": 7.98809523809524e-06, "loss": 45.842, "step": 1186 }, { "epoch": 28.262686567164177, "grad_norm": 28.64603042602539, "learning_rate": 7.982142857142859e-06, "loss": 46.4802, "step": 1187 }, { "epoch": 28.286567164179104, "grad_norm": 27.157583236694336, "learning_rate": 7.976190476190477e-06, "loss": 45.7376, "step": 1188 }, { "epoch": 28.31044776119403, "grad_norm": 22.073328018188477, "learning_rate": 7.970238095238097e-06, "loss": 47.0787, "step": 1189 }, { "epoch": 28.334328358208957, "grad_norm": 21.545568466186523, "learning_rate": 7.964285714285715e-06, "loss": 45.6691, "step": 1190 }, { "epoch": 28.35820895522388, "grad_norm": 26.17327880859375, "learning_rate": 7.958333333333333e-06, "loss": 46.2058, "step": 1191 }, { "epoch": 28.382089552238806, "grad_norm": 24.443920135498047, "learning_rate": 7.952380952380953e-06, "loss": 45.3531, "step": 1192 }, { "epoch": 28.405970149253733, "grad_norm": 27.207778930664062, "learning_rate": 7.946428571428571e-06, "loss": 46.5519, "step": 1193 }, { "epoch": 28.429850746268656, "grad_norm": 23.15156364440918, "learning_rate": 7.94047619047619e-06, "loss": 46.9284, "step": 1194 }, { "epoch": 28.453731343283582, "grad_norm": 27.96567153930664, "learning_rate": 7.93452380952381e-06, "loss": 46.3696, "step": 1195 }, { "epoch": 28.47761194029851, "grad_norm": 25.828689575195312, "learning_rate": 7.928571428571429e-06, "loss": 46.2933, "step": 1196 }, { "epoch": 28.501492537313432, "grad_norm": 30.69676971435547, "learning_rate": 7.922619047619048e-06, "loss": 46.7471, "step": 1197 }, { "epoch": 28.52537313432836, "grad_norm": 24.977018356323242, "learning_rate": 7.916666666666667e-06, "loss": 47.045, "step": 1198 }, { "epoch": 28.549253731343285, "grad_norm": 26.286821365356445, "learning_rate": 7.910714285714286e-06, "loss": 47.0185, "step": 1199 }, { "epoch": 28.573134328358208, "grad_norm": 25.324783325195312, "learning_rate": 7.904761904761904e-06, "loss": 46.4036, "step": 1200 }, { "epoch": 28.597014925373134, "grad_norm": 34.25847625732422, "learning_rate": 7.898809523809524e-06, "loss": 46.8307, "step": 1201 }, { "epoch": 28.62089552238806, "grad_norm": 24.739521026611328, "learning_rate": 7.892857142857144e-06, "loss": 46.515, "step": 1202 }, { "epoch": 28.644776119402984, "grad_norm": 36.694252014160156, "learning_rate": 7.886904761904762e-06, "loss": 45.7078, "step": 1203 }, { "epoch": 28.66865671641791, "grad_norm": 34.95314025878906, "learning_rate": 7.880952380952382e-06, "loss": 47.6302, "step": 1204 }, { "epoch": 28.692537313432837, "grad_norm": 26.474821090698242, "learning_rate": 7.875e-06, "loss": 47.2158, "step": 1205 }, { "epoch": 28.71641791044776, "grad_norm": 30.19892692565918, "learning_rate": 7.86904761904762e-06, "loss": 46.1515, "step": 1206 }, { "epoch": 28.740298507462686, "grad_norm": 32.16860580444336, "learning_rate": 7.863095238095238e-06, "loss": 46.3963, "step": 1207 }, { "epoch": 28.764179104477613, "grad_norm": 27.323444366455078, "learning_rate": 7.857142857142858e-06, "loss": 47.6511, "step": 1208 }, { "epoch": 28.788059701492536, "grad_norm": 24.073701858520508, "learning_rate": 7.851190476190477e-06, "loss": 46.7283, "step": 1209 }, { "epoch": 28.811940298507462, "grad_norm": 25.73206329345703, "learning_rate": 7.845238095238096e-06, "loss": 47.3957, "step": 1210 }, { "epoch": 28.83582089552239, "grad_norm": 23.368709564208984, "learning_rate": 7.839285714285715e-06, "loss": 46.1844, "step": 1211 }, { "epoch": 28.85970149253731, "grad_norm": 24.563371658325195, "learning_rate": 7.833333333333333e-06, "loss": 47.0752, "step": 1212 }, { "epoch": 28.883582089552238, "grad_norm": 20.747081756591797, "learning_rate": 7.827380952380953e-06, "loss": 45.9425, "step": 1213 }, { "epoch": 28.907462686567165, "grad_norm": 22.27573013305664, "learning_rate": 7.821428571428571e-06, "loss": 46.746, "step": 1214 }, { "epoch": 28.93134328358209, "grad_norm": 23.162179946899414, "learning_rate": 7.815476190476191e-06, "loss": 46.808, "step": 1215 }, { "epoch": 28.955223880597014, "grad_norm": 23.585325241088867, "learning_rate": 7.809523809523811e-06, "loss": 47.2584, "step": 1216 }, { "epoch": 28.97910447761194, "grad_norm": 29.979564666748047, "learning_rate": 7.803571428571429e-06, "loss": 47.3433, "step": 1217 }, { "epoch": 29.0, "grad_norm": 25.872072219848633, "learning_rate": 7.797619047619049e-06, "loss": 41.1844, "step": 1218 }, { "epoch": 29.023880597014927, "grad_norm": 25.673351287841797, "learning_rate": 7.791666666666667e-06, "loss": 46.4546, "step": 1219 }, { "epoch": 29.04776119402985, "grad_norm": 29.831058502197266, "learning_rate": 7.785714285714287e-06, "loss": 46.8122, "step": 1220 }, { "epoch": 29.071641791044776, "grad_norm": 27.548316955566406, "learning_rate": 7.779761904761905e-06, "loss": 47.748, "step": 1221 }, { "epoch": 29.095522388059702, "grad_norm": 26.399370193481445, "learning_rate": 7.773809523809525e-06, "loss": 46.736, "step": 1222 }, { "epoch": 29.119402985074625, "grad_norm": 25.127031326293945, "learning_rate": 7.767857142857144e-06, "loss": 46.8307, "step": 1223 }, { "epoch": 29.143283582089552, "grad_norm": 26.624732971191406, "learning_rate": 7.761904761904762e-06, "loss": 46.2401, "step": 1224 }, { "epoch": 29.16716417910448, "grad_norm": 30.770824432373047, "learning_rate": 7.755952380952382e-06, "loss": 46.7194, "step": 1225 }, { "epoch": 29.1910447761194, "grad_norm": 23.830007553100586, "learning_rate": 7.75e-06, "loss": 46.4737, "step": 1226 }, { "epoch": 29.214925373134328, "grad_norm": 32.90129470825195, "learning_rate": 7.74404761904762e-06, "loss": 47.4361, "step": 1227 }, { "epoch": 29.238805970149254, "grad_norm": 23.381397247314453, "learning_rate": 7.738095238095238e-06, "loss": 45.3297, "step": 1228 }, { "epoch": 29.262686567164177, "grad_norm": 32.836387634277344, "learning_rate": 7.732142857142858e-06, "loss": 46.0574, "step": 1229 }, { "epoch": 29.286567164179104, "grad_norm": 25.803264617919922, "learning_rate": 7.726190476190478e-06, "loss": 45.757, "step": 1230 }, { "epoch": 29.31044776119403, "grad_norm": 29.38982391357422, "learning_rate": 7.720238095238096e-06, "loss": 46.7099, "step": 1231 }, { "epoch": 29.334328358208957, "grad_norm": 26.39947509765625, "learning_rate": 7.714285714285716e-06, "loss": 47.5944, "step": 1232 }, { "epoch": 29.35820895522388, "grad_norm": 25.958354949951172, "learning_rate": 7.708333333333334e-06, "loss": 46.1395, "step": 1233 }, { "epoch": 29.382089552238806, "grad_norm": 28.697542190551758, "learning_rate": 7.702380952380954e-06, "loss": 46.1713, "step": 1234 }, { "epoch": 29.405970149253733, "grad_norm": 19.471586227416992, "learning_rate": 7.696428571428572e-06, "loss": 47.7724, "step": 1235 }, { "epoch": 29.429850746268656, "grad_norm": 29.924991607666016, "learning_rate": 7.690476190476191e-06, "loss": 47.2203, "step": 1236 }, { "epoch": 29.453731343283582, "grad_norm": 20.516891479492188, "learning_rate": 7.684523809523811e-06, "loss": 46.2945, "step": 1237 }, { "epoch": 29.47761194029851, "grad_norm": 30.605262756347656, "learning_rate": 7.67857142857143e-06, "loss": 47.1786, "step": 1238 }, { "epoch": 29.501492537313432, "grad_norm": 16.288013458251953, "learning_rate": 7.672619047619049e-06, "loss": 45.3413, "step": 1239 }, { "epoch": 29.52537313432836, "grad_norm": 23.54091453552246, "learning_rate": 7.666666666666667e-06, "loss": 45.9196, "step": 1240 }, { "epoch": 29.549253731343285, "grad_norm": 20.33724021911621, "learning_rate": 7.660714285714287e-06, "loss": 47.0275, "step": 1241 }, { "epoch": 29.573134328358208, "grad_norm": 27.460975646972656, "learning_rate": 7.654761904761905e-06, "loss": 44.8995, "step": 1242 }, { "epoch": 29.597014925373134, "grad_norm": 25.58623695373535, "learning_rate": 7.648809523809523e-06, "loss": 46.0706, "step": 1243 }, { "epoch": 29.62089552238806, "grad_norm": 27.997203826904297, "learning_rate": 7.642857142857143e-06, "loss": 47.2368, "step": 1244 }, { "epoch": 29.644776119402984, "grad_norm": 31.361181259155273, "learning_rate": 7.636904761904763e-06, "loss": 46.8056, "step": 1245 }, { "epoch": 29.66865671641791, "grad_norm": 29.266433715820312, "learning_rate": 7.630952380952381e-06, "loss": 45.323, "step": 1246 }, { "epoch": 29.692537313432837, "grad_norm": 24.066415786743164, "learning_rate": 7.625e-06, "loss": 46.9221, "step": 1247 }, { "epoch": 29.71641791044776, "grad_norm": 25.790491104125977, "learning_rate": 7.61904761904762e-06, "loss": 45.7051, "step": 1248 }, { "epoch": 29.740298507462686, "grad_norm": 24.202716827392578, "learning_rate": 7.6130952380952386e-06, "loss": 47.1067, "step": 1249 }, { "epoch": 29.764179104477613, "grad_norm": 27.302003860473633, "learning_rate": 7.6071428571428575e-06, "loss": 46.6039, "step": 1250 }, { "epoch": 29.788059701492536, "grad_norm": 22.75196075439453, "learning_rate": 7.6011904761904765e-06, "loss": 46.3265, "step": 1251 }, { "epoch": 29.811940298507462, "grad_norm": 30.963153839111328, "learning_rate": 7.595238095238095e-06, "loss": 46.3283, "step": 1252 }, { "epoch": 29.83582089552239, "grad_norm": 21.538162231445312, "learning_rate": 7.589285714285714e-06, "loss": 46.5345, "step": 1253 }, { "epoch": 29.85970149253731, "grad_norm": 28.09955596923828, "learning_rate": 7.583333333333333e-06, "loss": 47.1, "step": 1254 }, { "epoch": 29.883582089552238, "grad_norm": 20.88216781616211, "learning_rate": 7.577380952380953e-06, "loss": 45.3354, "step": 1255 }, { "epoch": 29.907462686567165, "grad_norm": 24.15240478515625, "learning_rate": 7.571428571428572e-06, "loss": 46.299, "step": 1256 }, { "epoch": 29.93134328358209, "grad_norm": 22.839298248291016, "learning_rate": 7.565476190476191e-06, "loss": 46.3436, "step": 1257 }, { "epoch": 29.955223880597014, "grad_norm": 26.582752227783203, "learning_rate": 7.55952380952381e-06, "loss": 45.9107, "step": 1258 }, { "epoch": 29.97910447761194, "grad_norm": 24.98562240600586, "learning_rate": 7.553571428571429e-06, "loss": 46.7134, "step": 1259 }, { "epoch": 30.0, "grad_norm": 23.327436447143555, "learning_rate": 7.547619047619048e-06, "loss": 41.2325, "step": 1260 }, { "epoch": 30.023880597014927, "grad_norm": 20.400623321533203, "learning_rate": 7.541666666666667e-06, "loss": 46.9564, "step": 1261 }, { "epoch": 30.04776119402985, "grad_norm": NaN, "learning_rate": 7.5357142857142865e-06, "loss": 68.2215, "step": 1262 }, { "epoch": 30.071641791044776, "grad_norm": 22.870811462402344, "learning_rate": 7.5357142857142865e-06, "loss": 46.5712, "step": 1263 }, { "epoch": 30.095522388059702, "grad_norm": 24.057098388671875, "learning_rate": 7.5297619047619055e-06, "loss": 46.8943, "step": 1264 }, { "epoch": 30.119402985074625, "grad_norm": 25.820720672607422, "learning_rate": 7.523809523809524e-06, "loss": 46.0747, "step": 1265 }, { "epoch": 30.143283582089552, "grad_norm": 28.460693359375, "learning_rate": 7.517857142857143e-06, "loss": 47.0127, "step": 1266 }, { "epoch": 30.16716417910448, "grad_norm": 21.60432243347168, "learning_rate": 7.511904761904762e-06, "loss": 45.8081, "step": 1267 }, { "epoch": 30.1910447761194, "grad_norm": 29.013648986816406, "learning_rate": 7.505952380952381e-06, "loss": 46.6712, "step": 1268 }, { "epoch": 30.214925373134328, "grad_norm": 24.865493774414062, "learning_rate": 7.500000000000001e-06, "loss": 46.6816, "step": 1269 }, { "epoch": 30.238805970149254, "grad_norm": 23.676206588745117, "learning_rate": 7.49404761904762e-06, "loss": 46.2663, "step": 1270 }, { "epoch": 30.262686567164177, "grad_norm": 27.889135360717773, "learning_rate": 7.488095238095239e-06, "loss": 45.7052, "step": 1271 }, { "epoch": 30.286567164179104, "grad_norm": 29.024211883544922, "learning_rate": 7.482142857142858e-06, "loss": 45.5005, "step": 1272 }, { "epoch": 30.31044776119403, "grad_norm": 25.8428955078125, "learning_rate": 7.476190476190477e-06, "loss": 46.788, "step": 1273 }, { "epoch": 30.334328358208957, "grad_norm": 26.765539169311523, "learning_rate": 7.470238095238096e-06, "loss": 46.454, "step": 1274 }, { "epoch": 30.35820895522388, "grad_norm": 34.80079650878906, "learning_rate": 7.464285714285715e-06, "loss": 47.6929, "step": 1275 }, { "epoch": 30.382089552238806, "grad_norm": 25.589618682861328, "learning_rate": 7.4583333333333345e-06, "loss": 46.0104, "step": 1276 }, { "epoch": 30.405970149253733, "grad_norm": 27.0733699798584, "learning_rate": 7.4523809523809534e-06, "loss": 45.4742, "step": 1277 }, { "epoch": 30.429850746268656, "grad_norm": 26.662338256835938, "learning_rate": 7.446428571428572e-06, "loss": 46.8066, "step": 1278 }, { "epoch": 30.453731343283582, "grad_norm": 28.389951705932617, "learning_rate": 7.440476190476191e-06, "loss": 46.9716, "step": 1279 }, { "epoch": 30.47761194029851, "grad_norm": NaN, "learning_rate": 7.43452380952381e-06, "loss": 52.2915, "step": 1280 }, { "epoch": 30.501492537313432, "grad_norm": 26.77708625793457, "learning_rate": 7.43452380952381e-06, "loss": 44.919, "step": 1281 }, { "epoch": 30.52537313432836, "grad_norm": 25.423444747924805, "learning_rate": 7.428571428571429e-06, "loss": 46.5057, "step": 1282 }, { "epoch": 30.549253731343285, "grad_norm": 24.04167366027832, "learning_rate": 7.422619047619048e-06, "loss": 46.3685, "step": 1283 }, { "epoch": 30.573134328358208, "grad_norm": 23.51607894897461, "learning_rate": 7.416666666666668e-06, "loss": 45.9694, "step": 1284 }, { "epoch": 30.597014925373134, "grad_norm": 26.216157913208008, "learning_rate": 7.410714285714287e-06, "loss": 47.2582, "step": 1285 }, { "epoch": 30.62089552238806, "grad_norm": 24.339780807495117, "learning_rate": 7.404761904761906e-06, "loss": 44.8052, "step": 1286 }, { "epoch": 30.644776119402984, "grad_norm": 19.203577041625977, "learning_rate": 7.398809523809525e-06, "loss": 47.0301, "step": 1287 }, { "epoch": 30.66865671641791, "grad_norm": 22.252805709838867, "learning_rate": 7.392857142857144e-06, "loss": 45.5993, "step": 1288 }, { "epoch": 30.692537313432837, "grad_norm": 25.316205978393555, "learning_rate": 7.386904761904763e-06, "loss": 46.1157, "step": 1289 }, { "epoch": 30.71641791044776, "grad_norm": 18.311643600463867, "learning_rate": 7.380952380952382e-06, "loss": 46.5986, "step": 1290 }, { "epoch": 30.740298507462686, "grad_norm": 31.84505271911621, "learning_rate": 7.375000000000001e-06, "loss": 46.9177, "step": 1291 }, { "epoch": 30.764179104477613, "grad_norm": 26.221525192260742, "learning_rate": 7.36904761904762e-06, "loss": 47.0897, "step": 1292 }, { "epoch": 30.788059701492536, "grad_norm": 27.029104232788086, "learning_rate": 7.363095238095239e-06, "loss": 45.3724, "step": 1293 }, { "epoch": 30.811940298507462, "grad_norm": 33.51012420654297, "learning_rate": 7.357142857142858e-06, "loss": 46.7046, "step": 1294 }, { "epoch": 30.83582089552239, "grad_norm": 26.42972183227539, "learning_rate": 7.351190476190477e-06, "loss": 46.7606, "step": 1295 }, { "epoch": 30.85970149253731, "grad_norm": 30.91115951538086, "learning_rate": 7.345238095238096e-06, "loss": 47.5485, "step": 1296 }, { "epoch": 30.883582089552238, "grad_norm": 28.296560287475586, "learning_rate": 7.339285714285714e-06, "loss": 46.4997, "step": 1297 }, { "epoch": 30.907462686567165, "grad_norm": 32.054561614990234, "learning_rate": 7.333333333333333e-06, "loss": 46.4953, "step": 1298 }, { "epoch": 30.93134328358209, "grad_norm": 31.635595321655273, "learning_rate": 7.327380952380952e-06, "loss": 46.5325, "step": 1299 }, { "epoch": 30.955223880597014, "grad_norm": 25.557523727416992, "learning_rate": 7.321428571428572e-06, "loss": 45.56, "step": 1300 }, { "epoch": 30.97910447761194, "grad_norm": 30.01810073852539, "learning_rate": 7.315476190476191e-06, "loss": 46.6149, "step": 1301 }, { "epoch": 31.0, "grad_norm": 24.6826114654541, "learning_rate": 7.30952380952381e-06, "loss": 40.8651, "step": 1302 }, { "epoch": 31.023880597014927, "grad_norm": 24.378164291381836, "learning_rate": 7.303571428571429e-06, "loss": 46.0721, "step": 1303 }, { "epoch": 31.04776119402985, "grad_norm": 20.247482299804688, "learning_rate": 7.297619047619048e-06, "loss": 45.8819, "step": 1304 }, { "epoch": 31.071641791044776, "grad_norm": 25.636112213134766, "learning_rate": 7.291666666666667e-06, "loss": 47.1987, "step": 1305 }, { "epoch": 31.095522388059702, "grad_norm": 30.428096771240234, "learning_rate": 7.285714285714286e-06, "loss": 46.6961, "step": 1306 }, { "epoch": 31.119402985074625, "grad_norm": 21.404991149902344, "learning_rate": 7.279761904761905e-06, "loss": 46.6841, "step": 1307 }, { "epoch": 31.143283582089552, "grad_norm": 31.655052185058594, "learning_rate": 7.273809523809524e-06, "loss": 47.7781, "step": 1308 }, { "epoch": 31.16716417910448, "grad_norm": 24.327327728271484, "learning_rate": 7.267857142857143e-06, "loss": 46.002, "step": 1309 }, { "epoch": 31.1910447761194, "grad_norm": 26.230745315551758, "learning_rate": 7.261904761904762e-06, "loss": 47.3903, "step": 1310 }, { "epoch": 31.214925373134328, "grad_norm": 27.337961196899414, "learning_rate": 7.255952380952381e-06, "loss": 46.0999, "step": 1311 }, { "epoch": 31.238805970149254, "grad_norm": 35.14864730834961, "learning_rate": 7.25e-06, "loss": 46.5187, "step": 1312 }, { "epoch": 31.262686567164177, "grad_norm": 26.60109519958496, "learning_rate": 7.24404761904762e-06, "loss": 44.5864, "step": 1313 }, { "epoch": 31.286567164179104, "grad_norm": 33.15165710449219, "learning_rate": 7.238095238095239e-06, "loss": 46.4779, "step": 1314 }, { "epoch": 31.31044776119403, "grad_norm": 26.3510684967041, "learning_rate": 7.232142857142858e-06, "loss": 47.0845, "step": 1315 }, { "epoch": 31.334328358208957, "grad_norm": 47.12569046020508, "learning_rate": 7.226190476190477e-06, "loss": 47.2947, "step": 1316 }, { "epoch": 31.35820895522388, "grad_norm": 40.15263748168945, "learning_rate": 7.220238095238096e-06, "loss": 45.8788, "step": 1317 }, { "epoch": 31.382089552238806, "grad_norm": 36.59072494506836, "learning_rate": 7.2142857142857145e-06, "loss": 45.991, "step": 1318 }, { "epoch": 31.405970149253733, "grad_norm": 36.895408630371094, "learning_rate": 7.2083333333333335e-06, "loss": 46.197, "step": 1319 }, { "epoch": 31.429850746268656, "grad_norm": NaN, "learning_rate": 7.202380952380953e-06, "loss": 38.9024, "step": 1320 }, { "epoch": 31.453731343283582, "grad_norm": 27.446247100830078, "learning_rate": 7.202380952380953e-06, "loss": 45.5293, "step": 1321 }, { "epoch": 31.47761194029851, "grad_norm": 27.48939323425293, "learning_rate": 7.196428571428572e-06, "loss": 46.8754, "step": 1322 }, { "epoch": 31.501492537313432, "grad_norm": 22.736833572387695, "learning_rate": 7.190476190476191e-06, "loss": 44.4905, "step": 1323 }, { "epoch": 31.52537313432836, "grad_norm": 23.413612365722656, "learning_rate": 7.18452380952381e-06, "loss": 47.0714, "step": 1324 }, { "epoch": 31.549253731343285, "grad_norm": 29.154848098754883, "learning_rate": 7.178571428571429e-06, "loss": 46.393, "step": 1325 }, { "epoch": 31.573134328358208, "grad_norm": 28.130638122558594, "learning_rate": 7.172619047619048e-06, "loss": 46.1857, "step": 1326 }, { "epoch": 31.597014925373134, "grad_norm": 19.745920181274414, "learning_rate": 7.166666666666667e-06, "loss": 45.2873, "step": 1327 }, { "epoch": 31.62089552238806, "grad_norm": 27.630279541015625, "learning_rate": 7.160714285714287e-06, "loss": 46.5475, "step": 1328 }, { "epoch": 31.644776119402984, "grad_norm": 20.568862915039062, "learning_rate": 7.154761904761906e-06, "loss": 46.231, "step": 1329 }, { "epoch": 31.66865671641791, "grad_norm": 17.769695281982422, "learning_rate": 7.148809523809525e-06, "loss": 46.8431, "step": 1330 }, { "epoch": 31.692537313432837, "grad_norm": 29.941057205200195, "learning_rate": 7.1428571428571436e-06, "loss": 44.842, "step": 1331 }, { "epoch": 31.71641791044776, "grad_norm": 21.054975509643555, "learning_rate": 7.1369047619047625e-06, "loss": 45.1147, "step": 1332 }, { "epoch": 31.740298507462686, "grad_norm": 23.80388069152832, "learning_rate": 7.1309523809523814e-06, "loss": 46.1839, "step": 1333 }, { "epoch": 31.764179104477613, "grad_norm": 30.561933517456055, "learning_rate": 7.125e-06, "loss": 46.2703, "step": 1334 }, { "epoch": 31.788059701492536, "grad_norm": 23.752151489257812, "learning_rate": 7.11904761904762e-06, "loss": 46.7347, "step": 1335 }, { "epoch": 31.811940298507462, "grad_norm": 32.00548553466797, "learning_rate": 7.113095238095239e-06, "loss": 46.1236, "step": 1336 }, { "epoch": 31.83582089552239, "grad_norm": 26.685504913330078, "learning_rate": 7.107142857142858e-06, "loss": 47.8881, "step": 1337 }, { "epoch": 31.85970149253731, "grad_norm": 26.5799503326416, "learning_rate": 7.101190476190477e-06, "loss": 46.1187, "step": 1338 }, { "epoch": 31.883582089552238, "grad_norm": 28.78062629699707, "learning_rate": 7.095238095238096e-06, "loss": 46.8058, "step": 1339 }, { "epoch": 31.907462686567165, "grad_norm": 26.98428726196289, "learning_rate": 7.089285714285715e-06, "loss": 46.3602, "step": 1340 }, { "epoch": 31.93134328358209, "grad_norm": 32.5291633605957, "learning_rate": 7.083333333333335e-06, "loss": 46.464, "step": 1341 }, { "epoch": 31.955223880597014, "grad_norm": 25.088685989379883, "learning_rate": 7.077380952380954e-06, "loss": 47.0542, "step": 1342 }, { "epoch": 31.97910447761194, "grad_norm": 32.58052444458008, "learning_rate": 7.0714285714285726e-06, "loss": 46.3364, "step": 1343 }, { "epoch": 32.0, "grad_norm": 22.65249252319336, "learning_rate": 7.0654761904761915e-06, "loss": 39.8201, "step": 1344 }, { "epoch": 32.02388059701492, "grad_norm": 27.03556251525879, "learning_rate": 7.0595238095238105e-06, "loss": 47.7819, "step": 1345 }, { "epoch": 32.04776119402985, "grad_norm": 25.712047576904297, "learning_rate": 7.053571428571429e-06, "loss": 46.1116, "step": 1346 }, { "epoch": 32.071641791044776, "grad_norm": 21.99336051940918, "learning_rate": 7.047619047619048e-06, "loss": 46.3745, "step": 1347 }, { "epoch": 32.0955223880597, "grad_norm": 28.53151512145996, "learning_rate": 7.041666666666668e-06, "loss": 46.5998, "step": 1348 }, { "epoch": 32.11940298507463, "grad_norm": 20.151912689208984, "learning_rate": 7.035714285714287e-06, "loss": 45.4197, "step": 1349 }, { "epoch": 32.14328358208955, "grad_norm": 21.491193771362305, "learning_rate": 7.029761904761905e-06, "loss": 46.0246, "step": 1350 }, { "epoch": 32.167164179104475, "grad_norm": 20.057588577270508, "learning_rate": 7.023809523809524e-06, "loss": 46.2149, "step": 1351 }, { "epoch": 32.191044776119405, "grad_norm": 16.675336837768555, "learning_rate": 7.017857142857143e-06, "loss": 46.5231, "step": 1352 }, { "epoch": 32.21492537313433, "grad_norm": 22.007305145263672, "learning_rate": 7.011904761904762e-06, "loss": 44.8665, "step": 1353 }, { "epoch": 32.23880597014925, "grad_norm": 22.947837829589844, "learning_rate": 7.005952380952381e-06, "loss": 45.0394, "step": 1354 }, { "epoch": 32.26268656716418, "grad_norm": 25.444522857666016, "learning_rate": 7e-06, "loss": 46.0367, "step": 1355 }, { "epoch": 32.286567164179104, "grad_norm": 22.319833755493164, "learning_rate": 6.994047619047619e-06, "loss": 47.0455, "step": 1356 }, { "epoch": 32.31044776119403, "grad_norm": 20.41710090637207, "learning_rate": 6.988095238095239e-06, "loss": 45.5119, "step": 1357 }, { "epoch": 32.33432835820896, "grad_norm": 29.03120994567871, "learning_rate": 6.9821428571428576e-06, "loss": 45.1962, "step": 1358 }, { "epoch": 32.35820895522388, "grad_norm": 22.10372543334961, "learning_rate": 6.9761904761904765e-06, "loss": 47.379, "step": 1359 }, { "epoch": 32.3820895522388, "grad_norm": 29.49492073059082, "learning_rate": 6.9702380952380955e-06, "loss": 48.2375, "step": 1360 }, { "epoch": 32.40597014925373, "grad_norm": 26.655149459838867, "learning_rate": 6.964285714285714e-06, "loss": 45.8468, "step": 1361 }, { "epoch": 32.429850746268656, "grad_norm": 27.994979858398438, "learning_rate": 6.958333333333333e-06, "loss": 46.4883, "step": 1362 }, { "epoch": 32.45373134328358, "grad_norm": 25.787900924682617, "learning_rate": 6.952380952380952e-06, "loss": 47.0159, "step": 1363 }, { "epoch": 32.47761194029851, "grad_norm": 29.429485321044922, "learning_rate": 6.946428571428572e-06, "loss": 45.182, "step": 1364 }, { "epoch": 32.50149253731343, "grad_norm": 21.825122833251953, "learning_rate": 6.940476190476191e-06, "loss": 47.4224, "step": 1365 }, { "epoch": 32.525373134328355, "grad_norm": 26.284622192382812, "learning_rate": 6.93452380952381e-06, "loss": 45.7025, "step": 1366 }, { "epoch": 32.549253731343285, "grad_norm": 21.384979248046875, "learning_rate": 6.928571428571429e-06, "loss": 45.6267, "step": 1367 }, { "epoch": 32.57313432835821, "grad_norm": 21.64442253112793, "learning_rate": 6.922619047619048e-06, "loss": 46.8577, "step": 1368 }, { "epoch": 32.59701492537313, "grad_norm": 22.377302169799805, "learning_rate": 6.916666666666667e-06, "loss": 46.5022, "step": 1369 }, { "epoch": 32.62089552238806, "grad_norm": 18.1933536529541, "learning_rate": 6.910714285714286e-06, "loss": 46.7098, "step": 1370 }, { "epoch": 32.644776119402984, "grad_norm": NaN, "learning_rate": 6.9047619047619055e-06, "loss": 59.6159, "step": 1371 }, { "epoch": 32.668656716417914, "grad_norm": 20.35690689086914, "learning_rate": 6.9047619047619055e-06, "loss": 47.4638, "step": 1372 }, { "epoch": 32.69253731343284, "grad_norm": 29.140775680541992, "learning_rate": 6.8988095238095245e-06, "loss": 46.242, "step": 1373 }, { "epoch": 32.71641791044776, "grad_norm": 25.27906608581543, "learning_rate": 6.892857142857143e-06, "loss": 45.7122, "step": 1374 }, { "epoch": 32.74029850746269, "grad_norm": 19.000076293945312, "learning_rate": 6.886904761904762e-06, "loss": 46.4813, "step": 1375 }, { "epoch": 32.76417910447761, "grad_norm": 25.048797607421875, "learning_rate": 6.880952380952381e-06, "loss": 45.5569, "step": 1376 }, { "epoch": 32.788059701492536, "grad_norm": 24.078060150146484, "learning_rate": 6.875e-06, "loss": 45.9708, "step": 1377 }, { "epoch": 32.811940298507466, "grad_norm": 23.822643280029297, "learning_rate": 6.86904761904762e-06, "loss": 47.5914, "step": 1378 }, { "epoch": 32.83582089552239, "grad_norm": 29.267864227294922, "learning_rate": 6.863095238095239e-06, "loss": 45.2741, "step": 1379 }, { "epoch": 32.85970149253731, "grad_norm": 19.477649688720703, "learning_rate": 6.857142857142858e-06, "loss": 46.3849, "step": 1380 }, { "epoch": 32.88358208955224, "grad_norm": 33.31391525268555, "learning_rate": 6.851190476190477e-06, "loss": 44.9609, "step": 1381 }, { "epoch": 32.907462686567165, "grad_norm": 23.064956665039062, "learning_rate": 6.845238095238096e-06, "loss": 45.8295, "step": 1382 }, { "epoch": 32.93134328358209, "grad_norm": 30.366653442382812, "learning_rate": 6.839285714285715e-06, "loss": 44.3142, "step": 1383 }, { "epoch": 32.95522388059702, "grad_norm": 25.059572219848633, "learning_rate": 6.833333333333334e-06, "loss": 46.5768, "step": 1384 }, { "epoch": 32.97910447761194, "grad_norm": 23.186697006225586, "learning_rate": 6.8273809523809535e-06, "loss": 45.185, "step": 1385 }, { "epoch": 33.0, "grad_norm": 21.550168991088867, "learning_rate": 6.8214285714285724e-06, "loss": 39.1732, "step": 1386 }, { "epoch": 33.02388059701492, "grad_norm": 22.417282104492188, "learning_rate": 6.815476190476191e-06, "loss": 47.6667, "step": 1387 }, { "epoch": 33.04776119402985, "grad_norm": 26.805702209472656, "learning_rate": 6.80952380952381e-06, "loss": 46.4091, "step": 1388 }, { "epoch": 33.071641791044776, "grad_norm": 23.723695755004883, "learning_rate": 6.803571428571429e-06, "loss": 46.3798, "step": 1389 }, { "epoch": 33.0955223880597, "grad_norm": 30.029897689819336, "learning_rate": 6.797619047619048e-06, "loss": 45.9736, "step": 1390 }, { "epoch": 33.11940298507463, "grad_norm": 19.387653350830078, "learning_rate": 6.791666666666667e-06, "loss": 45.1998, "step": 1391 }, { "epoch": 33.14328358208955, "grad_norm": 33.68477249145508, "learning_rate": 6.785714285714287e-06, "loss": 45.4435, "step": 1392 }, { "epoch": 33.167164179104475, "grad_norm": 26.001699447631836, "learning_rate": 6.779761904761906e-06, "loss": 45.6725, "step": 1393 }, { "epoch": 33.191044776119405, "grad_norm": 34.19535827636719, "learning_rate": 6.773809523809525e-06, "loss": 46.6387, "step": 1394 }, { "epoch": 33.21492537313433, "grad_norm": 24.243515014648438, "learning_rate": 6.767857142857144e-06, "loss": 46.4235, "step": 1395 }, { "epoch": 33.23880597014925, "grad_norm": 33.013675689697266, "learning_rate": 6.761904761904763e-06, "loss": 46.7151, "step": 1396 }, { "epoch": 33.26268656716418, "grad_norm": 30.15135955810547, "learning_rate": 6.755952380952382e-06, "loss": 46.3002, "step": 1397 }, { "epoch": 33.286567164179104, "grad_norm": 31.58100128173828, "learning_rate": 6.750000000000001e-06, "loss": 46.6084, "step": 1398 }, { "epoch": 33.31044776119403, "grad_norm": 26.23592185974121, "learning_rate": 6.74404761904762e-06, "loss": 45.5745, "step": 1399 }, { "epoch": 33.33432835820896, "grad_norm": 32.273311614990234, "learning_rate": 6.738095238095239e-06, "loss": 45.1131, "step": 1400 }, { "epoch": 33.35820895522388, "grad_norm": 29.7532958984375, "learning_rate": 6.732142857142858e-06, "loss": 45.9739, "step": 1401 }, { "epoch": 33.3820895522388, "grad_norm": 32.648704528808594, "learning_rate": 6.726190476190477e-06, "loss": 46.6293, "step": 1402 }, { "epoch": 33.40597014925373, "grad_norm": 26.455778121948242, "learning_rate": 6.720238095238096e-06, "loss": 46.5187, "step": 1403 }, { "epoch": 33.429850746268656, "grad_norm": 30.5809326171875, "learning_rate": 6.714285714285714e-06, "loss": 46.5477, "step": 1404 }, { "epoch": 33.45373134328358, "grad_norm": 29.604442596435547, "learning_rate": 6.708333333333333e-06, "loss": 45.462, "step": 1405 }, { "epoch": 33.47761194029851, "grad_norm": 36.19733428955078, "learning_rate": 6.702380952380952e-06, "loss": 46.7046, "step": 1406 }, { "epoch": 33.50149253731343, "grad_norm": 37.733619689941406, "learning_rate": 6.696428571428571e-06, "loss": 46.2156, "step": 1407 }, { "epoch": 33.525373134328355, "grad_norm": 26.49405288696289, "learning_rate": 6.690476190476191e-06, "loss": 45.373, "step": 1408 }, { "epoch": 33.549253731343285, "grad_norm": 30.09432601928711, "learning_rate": 6.68452380952381e-06, "loss": 46.3868, "step": 1409 }, { "epoch": 33.57313432835821, "grad_norm": 25.85702896118164, "learning_rate": 6.678571428571429e-06, "loss": 45.805, "step": 1410 }, { "epoch": 33.59701492537313, "grad_norm": 28.564380645751953, "learning_rate": 6.672619047619048e-06, "loss": 46.4158, "step": 1411 }, { "epoch": 33.62089552238806, "grad_norm": 19.878551483154297, "learning_rate": 6.666666666666667e-06, "loss": 46.5922, "step": 1412 }, { "epoch": 33.644776119402984, "grad_norm": 22.83441734313965, "learning_rate": 6.660714285714286e-06, "loss": 45.1216, "step": 1413 }, { "epoch": 33.668656716417914, "grad_norm": 31.372957229614258, "learning_rate": 6.654761904761905e-06, "loss": 47.111, "step": 1414 }, { "epoch": 33.69253731343284, "grad_norm": 23.98666763305664, "learning_rate": 6.648809523809524e-06, "loss": 47.1762, "step": 1415 }, { "epoch": 33.71641791044776, "grad_norm": 27.895401000976562, "learning_rate": 6.642857142857143e-06, "loss": 45.6151, "step": 1416 }, { "epoch": 33.74029850746269, "grad_norm": 21.776100158691406, "learning_rate": 6.636904761904762e-06, "loss": 45.7198, "step": 1417 }, { "epoch": 33.76417910447761, "grad_norm": 30.373878479003906, "learning_rate": 6.630952380952381e-06, "loss": 45.2212, "step": 1418 }, { "epoch": 33.788059701492536, "grad_norm": 26.604324340820312, "learning_rate": 6.625e-06, "loss": 45.2001, "step": 1419 }, { "epoch": 33.811940298507466, "grad_norm": 29.38104248046875, "learning_rate": 6.619047619047619e-06, "loss": 46.711, "step": 1420 }, { "epoch": 33.83582089552239, "grad_norm": 24.36806869506836, "learning_rate": 6.613095238095239e-06, "loss": 46.3608, "step": 1421 }, { "epoch": 33.85970149253731, "grad_norm": 33.40534210205078, "learning_rate": 6.607142857142858e-06, "loss": 45.5189, "step": 1422 }, { "epoch": 33.88358208955224, "grad_norm": 25.91522789001465, "learning_rate": 6.601190476190477e-06, "loss": 47.3604, "step": 1423 }, { "epoch": 33.907462686567165, "grad_norm": 25.26549530029297, "learning_rate": 6.595238095238096e-06, "loss": 46.483, "step": 1424 }, { "epoch": 33.93134328358209, "grad_norm": 26.101816177368164, "learning_rate": 6.589285714285715e-06, "loss": 45.7998, "step": 1425 }, { "epoch": 33.95522388059702, "grad_norm": 27.942903518676758, "learning_rate": 6.5833333333333335e-06, "loss": 46.4593, "step": 1426 }, { "epoch": 33.97910447761194, "grad_norm": 21.551429748535156, "learning_rate": 6.5773809523809525e-06, "loss": 45.458, "step": 1427 }, { "epoch": 34.0, "grad_norm": 32.26907730102539, "learning_rate": 6.571428571428572e-06, "loss": 38.5718, "step": 1428 }, { "epoch": 34.02388059701492, "grad_norm": 32.16934585571289, "learning_rate": 6.565476190476191e-06, "loss": 45.5812, "step": 1429 }, { "epoch": 34.04776119402985, "grad_norm": 19.646459579467773, "learning_rate": 6.55952380952381e-06, "loss": 44.9032, "step": 1430 }, { "epoch": 34.071641791044776, "grad_norm": 28.886430740356445, "learning_rate": 6.553571428571429e-06, "loss": 45.4187, "step": 1431 }, { "epoch": 34.0955223880597, "grad_norm": 22.722471237182617, "learning_rate": 6.547619047619048e-06, "loss": 45.468, "step": 1432 }, { "epoch": 34.11940298507463, "grad_norm": 25.334766387939453, "learning_rate": 6.541666666666667e-06, "loss": 47.3534, "step": 1433 }, { "epoch": 34.14328358208955, "grad_norm": 28.49740982055664, "learning_rate": 6.535714285714286e-06, "loss": 47.4733, "step": 1434 }, { "epoch": 34.167164179104475, "grad_norm": 27.773820877075195, "learning_rate": 6.529761904761906e-06, "loss": 45.3215, "step": 1435 }, { "epoch": 34.191044776119405, "grad_norm": 24.25234031677246, "learning_rate": 6.523809523809525e-06, "loss": 46.0011, "step": 1436 }, { "epoch": 34.21492537313433, "grad_norm": 28.666475296020508, "learning_rate": 6.517857142857144e-06, "loss": 45.9091, "step": 1437 }, { "epoch": 34.23880597014925, "grad_norm": 24.367712020874023, "learning_rate": 6.5119047619047626e-06, "loss": 46.5004, "step": 1438 }, { "epoch": 34.26268656716418, "grad_norm": 23.11983299255371, "learning_rate": 6.5059523809523815e-06, "loss": 47.3335, "step": 1439 }, { "epoch": 34.286567164179104, "grad_norm": 20.672304153442383, "learning_rate": 6.5000000000000004e-06, "loss": 47.1491, "step": 1440 }, { "epoch": 34.31044776119403, "grad_norm": 23.815290451049805, "learning_rate": 6.49404761904762e-06, "loss": 46.7084, "step": 1441 }, { "epoch": 34.33432835820896, "grad_norm": 20.582489013671875, "learning_rate": 6.488095238095239e-06, "loss": 46.9707, "step": 1442 }, { "epoch": 34.35820895522388, "grad_norm": 18.315673828125, "learning_rate": 6.482142857142858e-06, "loss": 47.5359, "step": 1443 }, { "epoch": 34.3820895522388, "grad_norm": 24.396499633789062, "learning_rate": 6.476190476190477e-06, "loss": 46.052, "step": 1444 }, { "epoch": 34.40597014925373, "grad_norm": 21.200523376464844, "learning_rate": 6.470238095238096e-06, "loss": 46.5843, "step": 1445 }, { "epoch": 34.429850746268656, "grad_norm": 17.59020233154297, "learning_rate": 6.464285714285715e-06, "loss": 46.0017, "step": 1446 }, { "epoch": 34.45373134328358, "grad_norm": 21.810382843017578, "learning_rate": 6.458333333333334e-06, "loss": 46.4232, "step": 1447 }, { "epoch": 34.47761194029851, "grad_norm": 27.78464126586914, "learning_rate": 6.452380952380954e-06, "loss": 46.0973, "step": 1448 }, { "epoch": 34.50149253731343, "grad_norm": 29.360275268554688, "learning_rate": 6.446428571428573e-06, "loss": 45.4821, "step": 1449 }, { "epoch": 34.525373134328355, "grad_norm": 26.914587020874023, "learning_rate": 6.4404761904761916e-06, "loss": 45.2982, "step": 1450 }, { "epoch": 34.549253731343285, "grad_norm": 22.19925880432129, "learning_rate": 6.4345238095238105e-06, "loss": 46.6693, "step": 1451 }, { "epoch": 34.57313432835821, "grad_norm": 25.39541244506836, "learning_rate": 6.4285714285714295e-06, "loss": 45.8936, "step": 1452 }, { "epoch": 34.59701492537313, "grad_norm": 20.633222579956055, "learning_rate": 6.422619047619048e-06, "loss": 44.6061, "step": 1453 }, { "epoch": 34.62089552238806, "grad_norm": 22.513790130615234, "learning_rate": 6.416666666666667e-06, "loss": 45.5503, "step": 1454 }, { "epoch": 34.644776119402984, "grad_norm": 25.715484619140625, "learning_rate": 6.410714285714287e-06, "loss": 45.7485, "step": 1455 }, { "epoch": 34.668656716417914, "grad_norm": 21.964609146118164, "learning_rate": 6.404761904761904e-06, "loss": 46.3223, "step": 1456 }, { "epoch": 34.69253731343284, "grad_norm": 20.32435417175293, "learning_rate": 6.398809523809524e-06, "loss": 45.1507, "step": 1457 }, { "epoch": 34.71641791044776, "grad_norm": 24.32924461364746, "learning_rate": 6.392857142857143e-06, "loss": 45.8221, "step": 1458 }, { "epoch": 34.74029850746269, "grad_norm": 19.200895309448242, "learning_rate": 6.386904761904762e-06, "loss": 45.0915, "step": 1459 }, { "epoch": 34.76417910447761, "grad_norm": 24.436569213867188, "learning_rate": 6.380952380952381e-06, "loss": 45.5892, "step": 1460 }, { "epoch": 34.788059701492536, "grad_norm": 24.381568908691406, "learning_rate": 6.375e-06, "loss": 45.5295, "step": 1461 }, { "epoch": 34.811940298507466, "grad_norm": 19.64159393310547, "learning_rate": 6.369047619047619e-06, "loss": 46.244, "step": 1462 }, { "epoch": 34.83582089552239, "grad_norm": 27.420351028442383, "learning_rate": 6.363095238095238e-06, "loss": 45.9723, "step": 1463 }, { "epoch": 34.85970149253731, "grad_norm": 18.136165618896484, "learning_rate": 6.357142857142858e-06, "loss": 45.5106, "step": 1464 }, { "epoch": 34.88358208955224, "grad_norm": 21.70622444152832, "learning_rate": 6.3511904761904766e-06, "loss": 46.4965, "step": 1465 }, { "epoch": 34.907462686567165, "grad_norm": 23.573131561279297, "learning_rate": 6.3452380952380955e-06, "loss": 46.0698, "step": 1466 }, { "epoch": 34.93134328358209, "grad_norm": 21.20003890991211, "learning_rate": 6.3392857142857145e-06, "loss": 45.6992, "step": 1467 }, { "epoch": 34.95522388059702, "grad_norm": 23.745859146118164, "learning_rate": 6.333333333333333e-06, "loss": 45.8431, "step": 1468 }, { "epoch": 34.97910447761194, "grad_norm": 21.26241683959961, "learning_rate": 6.327380952380952e-06, "loss": 45.6577, "step": 1469 }, { "epoch": 35.0, "grad_norm": 22.033447265625, "learning_rate": 6.321428571428571e-06, "loss": 39.8491, "step": 1470 }, { "epoch": 35.02388059701492, "grad_norm": NaN, "learning_rate": 6.315476190476191e-06, "loss": 68.4405, "step": 1471 }, { "epoch": 35.04776119402985, "grad_norm": 22.06501007080078, "learning_rate": 6.315476190476191e-06, "loss": 44.971, "step": 1472 }, { "epoch": 35.071641791044776, "grad_norm": 23.923011779785156, "learning_rate": 6.30952380952381e-06, "loss": 45.4865, "step": 1473 }, { "epoch": 35.0955223880597, "grad_norm": 18.272428512573242, "learning_rate": 6.303571428571429e-06, "loss": 46.6551, "step": 1474 }, { "epoch": 35.11940298507463, "grad_norm": 23.046764373779297, "learning_rate": 6.297619047619048e-06, "loss": 46.3486, "step": 1475 }, { "epoch": 35.14328358208955, "grad_norm": 23.790733337402344, "learning_rate": 6.291666666666667e-06, "loss": 46.7032, "step": 1476 }, { "epoch": 35.167164179104475, "grad_norm": 23.891183853149414, "learning_rate": 6.285714285714286e-06, "loss": 44.9916, "step": 1477 }, { "epoch": 35.191044776119405, "grad_norm": 25.107316970825195, "learning_rate": 6.279761904761906e-06, "loss": 46.2358, "step": 1478 }, { "epoch": 35.21492537313433, "grad_norm": 20.48590660095215, "learning_rate": 6.2738095238095245e-06, "loss": 46.0048, "step": 1479 }, { "epoch": 35.23880597014925, "grad_norm": 25.425119400024414, "learning_rate": 6.2678571428571435e-06, "loss": 44.0941, "step": 1480 }, { "epoch": 35.26268656716418, "grad_norm": 28.264352798461914, "learning_rate": 6.261904761904762e-06, "loss": 46.5301, "step": 1481 }, { "epoch": 35.286567164179104, "grad_norm": 23.869232177734375, "learning_rate": 6.255952380952381e-06, "loss": 45.681, "step": 1482 }, { "epoch": 35.31044776119403, "grad_norm": 28.840408325195312, "learning_rate": 6.25e-06, "loss": 43.7517, "step": 1483 }, { "epoch": 35.33432835820896, "grad_norm": 26.768037796020508, "learning_rate": 6.244047619047619e-06, "loss": 46.1423, "step": 1484 }, { "epoch": 35.35820895522388, "grad_norm": 23.532470703125, "learning_rate": 6.238095238095239e-06, "loss": 45.6669, "step": 1485 }, { "epoch": 35.3820895522388, "grad_norm": 25.94774055480957, "learning_rate": 6.232142857142858e-06, "loss": 45.7672, "step": 1486 }, { "epoch": 35.40597014925373, "grad_norm": 23.215801239013672, "learning_rate": 6.226190476190477e-06, "loss": 45.6991, "step": 1487 }, { "epoch": 35.429850746268656, "grad_norm": 22.13661003112793, "learning_rate": 6.220238095238096e-06, "loss": 44.5214, "step": 1488 }, { "epoch": 35.45373134328358, "grad_norm": 24.596481323242188, "learning_rate": 6.214285714285715e-06, "loss": 46.1515, "step": 1489 }, { "epoch": 35.47761194029851, "grad_norm": 19.416872024536133, "learning_rate": 6.208333333333334e-06, "loss": 45.7596, "step": 1490 }, { "epoch": 35.50149253731343, "grad_norm": 23.993833541870117, "learning_rate": 6.202380952380953e-06, "loss": 46.1668, "step": 1491 }, { "epoch": 35.525373134328355, "grad_norm": 21.481637954711914, "learning_rate": 6.1964285714285725e-06, "loss": 45.1812, "step": 1492 }, { "epoch": 35.549253731343285, "grad_norm": 19.26917839050293, "learning_rate": 6.1904761904761914e-06, "loss": 45.9316, "step": 1493 }, { "epoch": 35.57313432835821, "grad_norm": 22.80115509033203, "learning_rate": 6.18452380952381e-06, "loss": 45.9088, "step": 1494 }, { "epoch": 35.59701492537313, "grad_norm": 21.33648109436035, "learning_rate": 6.178571428571429e-06, "loss": 46.7602, "step": 1495 }, { "epoch": 35.62089552238806, "grad_norm": 28.059947967529297, "learning_rate": 6.172619047619048e-06, "loss": 46.1767, "step": 1496 }, { "epoch": 35.644776119402984, "grad_norm": 21.1577205657959, "learning_rate": 6.166666666666667e-06, "loss": 45.6847, "step": 1497 }, { "epoch": 35.668656716417914, "grad_norm": 23.277509689331055, "learning_rate": 6.160714285714286e-06, "loss": 45.6145, "step": 1498 }, { "epoch": 35.69253731343284, "grad_norm": 16.815677642822266, "learning_rate": 6.154761904761906e-06, "loss": 45.515, "step": 1499 }, { "epoch": 35.71641791044776, "grad_norm": 24.218280792236328, "learning_rate": 6.148809523809525e-06, "loss": 47.6329, "step": 1500 }, { "epoch": 35.74029850746269, "grad_norm": 20.943737030029297, "learning_rate": 6.142857142857144e-06, "loss": 45.7388, "step": 1501 }, { "epoch": 35.76417910447761, "grad_norm": 20.344369888305664, "learning_rate": 6.136904761904763e-06, "loss": 45.9404, "step": 1502 }, { "epoch": 35.788059701492536, "grad_norm": 25.980487823486328, "learning_rate": 6.130952380952382e-06, "loss": 46.6928, "step": 1503 }, { "epoch": 35.811940298507466, "grad_norm": 19.285552978515625, "learning_rate": 6.125000000000001e-06, "loss": 46.4614, "step": 1504 }, { "epoch": 35.83582089552239, "grad_norm": 27.701011657714844, "learning_rate": 6.11904761904762e-06, "loss": 45.258, "step": 1505 }, { "epoch": 35.85970149253731, "grad_norm": 24.963760375976562, "learning_rate": 6.113095238095239e-06, "loss": 47.0721, "step": 1506 }, { "epoch": 35.88358208955224, "grad_norm": 25.08616828918457, "learning_rate": 6.107142857142858e-06, "loss": 45.9668, "step": 1507 }, { "epoch": 35.907462686567165, "grad_norm": 18.00580406188965, "learning_rate": 6.101190476190477e-06, "loss": 46.1049, "step": 1508 }, { "epoch": 35.93134328358209, "grad_norm": 24.686004638671875, "learning_rate": 6.095238095238096e-06, "loss": 46.6996, "step": 1509 }, { "epoch": 35.95522388059702, "grad_norm": 18.304157257080078, "learning_rate": 6.089285714285714e-06, "loss": 46.694, "step": 1510 }, { "epoch": 35.97910447761194, "grad_norm": 23.10132598876953, "learning_rate": 6.083333333333333e-06, "loss": 46.3807, "step": 1511 }, { "epoch": 36.0, "grad_norm": 19.077655792236328, "learning_rate": 6.077380952380952e-06, "loss": 41.1702, "step": 1512 }, { "epoch": 36.02388059701492, "grad_norm": 26.49584197998047, "learning_rate": 6.071428571428571e-06, "loss": 45.382, "step": 1513 }, { "epoch": 36.04776119402985, "grad_norm": 24.438323974609375, "learning_rate": 6.065476190476191e-06, "loss": 45.9433, "step": 1514 }, { "epoch": 36.071641791044776, "grad_norm": 30.8107852935791, "learning_rate": 6.05952380952381e-06, "loss": 45.6688, "step": 1515 }, { "epoch": 36.0955223880597, "grad_norm": 31.754154205322266, "learning_rate": 6.053571428571429e-06, "loss": 45.9768, "step": 1516 }, { "epoch": 36.11940298507463, "grad_norm": 26.034778594970703, "learning_rate": 6.047619047619048e-06, "loss": 46.022, "step": 1517 }, { "epoch": 36.14328358208955, "grad_norm": 31.643035888671875, "learning_rate": 6.041666666666667e-06, "loss": 44.5987, "step": 1518 }, { "epoch": 36.167164179104475, "grad_norm": 24.322874069213867, "learning_rate": 6.035714285714286e-06, "loss": 45.3774, "step": 1519 }, { "epoch": 36.191044776119405, "grad_norm": 29.067466735839844, "learning_rate": 6.029761904761905e-06, "loss": 46.1784, "step": 1520 }, { "epoch": 36.21492537313433, "grad_norm": 30.415788650512695, "learning_rate": 6.023809523809524e-06, "loss": 46.7259, "step": 1521 }, { "epoch": 36.23880597014925, "grad_norm": 19.417943954467773, "learning_rate": 6.017857142857143e-06, "loss": 46.0544, "step": 1522 }, { "epoch": 36.26268656716418, "grad_norm": 27.239500045776367, "learning_rate": 6.011904761904762e-06, "loss": 46.9344, "step": 1523 }, { "epoch": 36.286567164179104, "grad_norm": 27.671018600463867, "learning_rate": 6.005952380952381e-06, "loss": 45.78, "step": 1524 }, { "epoch": 36.31044776119403, "grad_norm": 25.103811264038086, "learning_rate": 6e-06, "loss": 45.7153, "step": 1525 }, { "epoch": 36.33432835820896, "grad_norm": 26.25937843322754, "learning_rate": 5.994047619047619e-06, "loss": 45.3151, "step": 1526 }, { "epoch": 36.35820895522388, "grad_norm": 18.400033950805664, "learning_rate": 5.988095238095238e-06, "loss": 46.5614, "step": 1527 }, { "epoch": 36.3820895522388, "grad_norm": 35.505374908447266, "learning_rate": 5.982142857142858e-06, "loss": 45.8805, "step": 1528 }, { "epoch": 36.40597014925373, "grad_norm": 31.476438522338867, "learning_rate": 5.976190476190477e-06, "loss": 46.189, "step": 1529 }, { "epoch": 36.429850746268656, "grad_norm": 26.192047119140625, "learning_rate": 5.970238095238096e-06, "loss": 45.7026, "step": 1530 }, { "epoch": 36.45373134328358, "grad_norm": 29.712961196899414, "learning_rate": 5.964285714285715e-06, "loss": 44.86, "step": 1531 }, { "epoch": 36.47761194029851, "grad_norm": 28.22374153137207, "learning_rate": 5.958333333333334e-06, "loss": 45.7644, "step": 1532 }, { "epoch": 36.50149253731343, "grad_norm": 23.614940643310547, "learning_rate": 5.9523809523809525e-06, "loss": 45.0373, "step": 1533 }, { "epoch": 36.525373134328355, "grad_norm": 27.78896141052246, "learning_rate": 5.9464285714285715e-06, "loss": 46.9277, "step": 1534 }, { "epoch": 36.549253731343285, "grad_norm": 18.64702606201172, "learning_rate": 5.940476190476191e-06, "loss": 45.277, "step": 1535 }, { "epoch": 36.57313432835821, "grad_norm": 27.2061710357666, "learning_rate": 5.93452380952381e-06, "loss": 46.8394, "step": 1536 }, { "epoch": 36.59701492537313, "grad_norm": 26.296287536621094, "learning_rate": 5.928571428571429e-06, "loss": 44.8519, "step": 1537 }, { "epoch": 36.62089552238806, "grad_norm": 26.594314575195312, "learning_rate": 5.922619047619048e-06, "loss": 45.1743, "step": 1538 }, { "epoch": 36.644776119402984, "grad_norm": 24.076461791992188, "learning_rate": 5.916666666666667e-06, "loss": 45.4145, "step": 1539 }, { "epoch": 36.668656716417914, "grad_norm": 23.31978416442871, "learning_rate": 5.910714285714286e-06, "loss": 45.7526, "step": 1540 }, { "epoch": 36.69253731343284, "grad_norm": 22.630998611450195, "learning_rate": 5.904761904761905e-06, "loss": 46.4197, "step": 1541 }, { "epoch": 36.71641791044776, "grad_norm": 32.66592025756836, "learning_rate": 5.898809523809525e-06, "loss": 45.0123, "step": 1542 }, { "epoch": 36.74029850746269, "grad_norm": 24.478839874267578, "learning_rate": 5.892857142857144e-06, "loss": 46.1418, "step": 1543 }, { "epoch": 36.76417910447761, "grad_norm": 33.325775146484375, "learning_rate": 5.886904761904763e-06, "loss": 45.8228, "step": 1544 }, { "epoch": 36.788059701492536, "grad_norm": 29.264528274536133, "learning_rate": 5.8809523809523816e-06, "loss": 46.1921, "step": 1545 }, { "epoch": 36.811940298507466, "grad_norm": 31.78297233581543, "learning_rate": 5.8750000000000005e-06, "loss": 45.4564, "step": 1546 }, { "epoch": 36.83582089552239, "grad_norm": 27.223127365112305, "learning_rate": 5.8690476190476194e-06, "loss": 45.5277, "step": 1547 }, { "epoch": 36.85970149253731, "grad_norm": 26.29422950744629, "learning_rate": 5.863095238095239e-06, "loss": 46.2285, "step": 1548 }, { "epoch": 36.88358208955224, "grad_norm": 27.933652877807617, "learning_rate": 5.857142857142858e-06, "loss": 46.4441, "step": 1549 }, { "epoch": 36.907462686567165, "grad_norm": 25.306129455566406, "learning_rate": 5.851190476190477e-06, "loss": 45.9724, "step": 1550 }, { "epoch": 36.93134328358209, "grad_norm": 23.481304168701172, "learning_rate": 5.845238095238096e-06, "loss": 46.2544, "step": 1551 }, { "epoch": 36.95522388059702, "grad_norm": 20.86615562438965, "learning_rate": 5.839285714285715e-06, "loss": 47.4502, "step": 1552 }, { "epoch": 36.97910447761194, "grad_norm": 21.519290924072266, "learning_rate": 5.833333333333334e-06, "loss": 45.0165, "step": 1553 }, { "epoch": 37.0, "grad_norm": 22.031705856323242, "learning_rate": 5.827380952380953e-06, "loss": 40.6199, "step": 1554 }, { "epoch": 37.02388059701492, "grad_norm": 29.273820877075195, "learning_rate": 5.821428571428573e-06, "loss": 46.5836, "step": 1555 }, { "epoch": 37.04776119402985, "grad_norm": 24.417945861816406, "learning_rate": 5.815476190476192e-06, "loss": 44.9549, "step": 1556 }, { "epoch": 37.071641791044776, "grad_norm": 24.60706901550293, "learning_rate": 5.8095238095238106e-06, "loss": 44.8607, "step": 1557 }, { "epoch": 37.0955223880597, "grad_norm": 24.76397132873535, "learning_rate": 5.8035714285714295e-06, "loss": 44.9875, "step": 1558 }, { "epoch": 37.11940298507463, "grad_norm": 24.380352020263672, "learning_rate": 5.7976190476190485e-06, "loss": 45.4835, "step": 1559 }, { "epoch": 37.14328358208955, "grad_norm": 19.852746963500977, "learning_rate": 5.791666666666667e-06, "loss": 45.1303, "step": 1560 }, { "epoch": 37.167164179104475, "grad_norm": 23.550888061523438, "learning_rate": 5.785714285714286e-06, "loss": 46.1086, "step": 1561 }, { "epoch": 37.191044776119405, "grad_norm": 24.31315803527832, "learning_rate": 5.7797619047619044e-06, "loss": 45.8181, "step": 1562 }, { "epoch": 37.21492537313433, "grad_norm": 19.324602127075195, "learning_rate": 5.773809523809523e-06, "loss": 44.8606, "step": 1563 }, { "epoch": 37.23880597014925, "grad_norm": 26.747098922729492, "learning_rate": 5.767857142857143e-06, "loss": 45.753, "step": 1564 }, { "epoch": 37.26268656716418, "grad_norm": 22.472572326660156, "learning_rate": 5.761904761904762e-06, "loss": 46.0156, "step": 1565 }, { "epoch": 37.286567164179104, "grad_norm": 20.813426971435547, "learning_rate": 5.755952380952381e-06, "loss": 46.7466, "step": 1566 }, { "epoch": 37.31044776119403, "grad_norm": 27.869413375854492, "learning_rate": 5.75e-06, "loss": 46.287, "step": 1567 }, { "epoch": 37.33432835820896, "grad_norm": 23.257444381713867, "learning_rate": 5.744047619047619e-06, "loss": 45.9862, "step": 1568 }, { "epoch": 37.35820895522388, "grad_norm": 24.715946197509766, "learning_rate": 5.738095238095238e-06, "loss": 47.3128, "step": 1569 }, { "epoch": 37.3820895522388, "grad_norm": 21.670385360717773, "learning_rate": 5.732142857142857e-06, "loss": 46.121, "step": 1570 }, { "epoch": 37.40597014925373, "grad_norm": 24.53063201904297, "learning_rate": 5.726190476190477e-06, "loss": 46.5441, "step": 1571 }, { "epoch": 37.429850746268656, "grad_norm": 19.584630966186523, "learning_rate": 5.7202380952380956e-06, "loss": 46.0683, "step": 1572 }, { "epoch": 37.45373134328358, "grad_norm": 26.179149627685547, "learning_rate": 5.7142857142857145e-06, "loss": 46.3294, "step": 1573 }, { "epoch": 37.47761194029851, "grad_norm": 21.13595199584961, "learning_rate": 5.7083333333333335e-06, "loss": 45.7853, "step": 1574 }, { "epoch": 37.50149253731343, "grad_norm": 28.440006256103516, "learning_rate": 5.702380952380952e-06, "loss": 46.5029, "step": 1575 }, { "epoch": 37.525373134328355, "grad_norm": 27.941879272460938, "learning_rate": 5.696428571428571e-06, "loss": 45.6132, "step": 1576 }, { "epoch": 37.549253731343285, "grad_norm": 25.952688217163086, "learning_rate": 5.690476190476191e-06, "loss": 45.6803, "step": 1577 }, { "epoch": 37.57313432835821, "grad_norm": 23.551633834838867, "learning_rate": 5.68452380952381e-06, "loss": 45.1563, "step": 1578 }, { "epoch": 37.59701492537313, "grad_norm": 23.119415283203125, "learning_rate": 5.678571428571429e-06, "loss": 47.2717, "step": 1579 }, { "epoch": 37.62089552238806, "grad_norm": 27.995214462280273, "learning_rate": 5.672619047619048e-06, "loss": 46.1847, "step": 1580 }, { "epoch": 37.644776119402984, "grad_norm": 28.0698299407959, "learning_rate": 5.666666666666667e-06, "loss": 46.4639, "step": 1581 }, { "epoch": 37.668656716417914, "grad_norm": 23.09457015991211, "learning_rate": 5.660714285714286e-06, "loss": 45.0939, "step": 1582 }, { "epoch": 37.69253731343284, "grad_norm": 25.94692611694336, "learning_rate": 5.654761904761905e-06, "loss": 45.216, "step": 1583 }, { "epoch": 37.71641791044776, "grad_norm": 20.192176818847656, "learning_rate": 5.648809523809525e-06, "loss": 45.7997, "step": 1584 }, { "epoch": 37.74029850746269, "grad_norm": 26.115283966064453, "learning_rate": 5.6428571428571435e-06, "loss": 44.8405, "step": 1585 }, { "epoch": 37.76417910447761, "grad_norm": 24.431346893310547, "learning_rate": 5.6369047619047625e-06, "loss": 46.5067, "step": 1586 }, { "epoch": 37.788059701492536, "grad_norm": 25.838623046875, "learning_rate": 5.630952380952381e-06, "loss": 46.1806, "step": 1587 }, { "epoch": 37.811940298507466, "grad_norm": 20.44222640991211, "learning_rate": 5.625e-06, "loss": 45.7445, "step": 1588 }, { "epoch": 37.83582089552239, "grad_norm": 19.459331512451172, "learning_rate": 5.619047619047619e-06, "loss": 45.7875, "step": 1589 }, { "epoch": 37.85970149253731, "grad_norm": 17.49920082092285, "learning_rate": 5.613095238095238e-06, "loss": 44.2889, "step": 1590 }, { "epoch": 37.88358208955224, "grad_norm": 18.541828155517578, "learning_rate": 5.607142857142858e-06, "loss": 46.7668, "step": 1591 }, { "epoch": 37.907462686567165, "grad_norm": 16.22308349609375, "learning_rate": 5.601190476190477e-06, "loss": 45.0406, "step": 1592 }, { "epoch": 37.93134328358209, "grad_norm": 21.068069458007812, "learning_rate": 5.595238095238096e-06, "loss": 44.0997, "step": 1593 }, { "epoch": 37.95522388059702, "grad_norm": 18.877992630004883, "learning_rate": 5.589285714285715e-06, "loss": 46.5816, "step": 1594 }, { "epoch": 37.97910447761194, "grad_norm": 20.14031410217285, "learning_rate": 5.583333333333334e-06, "loss": 44.8537, "step": 1595 }, { "epoch": 38.0, "grad_norm": 19.989953994750977, "learning_rate": 5.577380952380953e-06, "loss": 39.8501, "step": 1596 }, { "epoch": 38.02388059701492, "grad_norm": 23.484283447265625, "learning_rate": 5.571428571428572e-06, "loss": 46.3864, "step": 1597 }, { "epoch": 38.04776119402985, "grad_norm": 20.579587936401367, "learning_rate": 5.5654761904761915e-06, "loss": 46.1473, "step": 1598 }, { "epoch": 38.071641791044776, "grad_norm": 19.48423194885254, "learning_rate": 5.5595238095238104e-06, "loss": 45.3255, "step": 1599 }, { "epoch": 38.0955223880597, "grad_norm": 23.766077041625977, "learning_rate": 5.553571428571429e-06, "loss": 45.4387, "step": 1600 }, { "epoch": 38.11940298507463, "grad_norm": 17.605247497558594, "learning_rate": 5.547619047619048e-06, "loss": 46.1065, "step": 1601 }, { "epoch": 38.14328358208955, "grad_norm": 20.179826736450195, "learning_rate": 5.541666666666667e-06, "loss": 45.974, "step": 1602 }, { "epoch": 38.167164179104475, "grad_norm": 28.50605583190918, "learning_rate": 5.535714285714286e-06, "loss": 46.0505, "step": 1603 }, { "epoch": 38.191044776119405, "grad_norm": 16.770771026611328, "learning_rate": 5.529761904761905e-06, "loss": 46.4403, "step": 1604 }, { "epoch": 38.21492537313433, "grad_norm": NaN, "learning_rate": 5.523809523809525e-06, "loss": 69.3153, "step": 1605 }, { "epoch": 38.23880597014925, "grad_norm": 25.01431655883789, "learning_rate": 5.523809523809525e-06, "loss": 46.8119, "step": 1606 }, { "epoch": 38.26268656716418, "grad_norm": 20.459747314453125, "learning_rate": 5.517857142857144e-06, "loss": 47.7687, "step": 1607 }, { "epoch": 38.286567164179104, "grad_norm": 21.603086471557617, "learning_rate": 5.511904761904763e-06, "loss": 44.6093, "step": 1608 }, { "epoch": 38.31044776119403, "grad_norm": 25.284805297851562, "learning_rate": 5.505952380952382e-06, "loss": 45.0834, "step": 1609 }, { "epoch": 38.33432835820896, "grad_norm": 21.638917922973633, "learning_rate": 5.500000000000001e-06, "loss": 45.3904, "step": 1610 }, { "epoch": 38.35820895522388, "grad_norm": 22.443374633789062, "learning_rate": 5.49404761904762e-06, "loss": 43.7163, "step": 1611 }, { "epoch": 38.3820895522388, "grad_norm": 23.427288055419922, "learning_rate": 5.4880952380952394e-06, "loss": 44.7692, "step": 1612 }, { "epoch": 38.40597014925373, "grad_norm": 22.346813201904297, "learning_rate": 5.482142857142858e-06, "loss": 45.0674, "step": 1613 }, { "epoch": 38.429850746268656, "grad_norm": 20.567325592041016, "learning_rate": 5.476190476190477e-06, "loss": 45.5367, "step": 1614 }, { "epoch": 38.45373134328358, "grad_norm": 23.872394561767578, "learning_rate": 5.470238095238096e-06, "loss": 46.2728, "step": 1615 }, { "epoch": 38.47761194029851, "grad_norm": 23.790176391601562, "learning_rate": 5.464285714285714e-06, "loss": 46.3734, "step": 1616 }, { "epoch": 38.50149253731343, "grad_norm": 22.707136154174805, "learning_rate": 5.458333333333333e-06, "loss": 44.577, "step": 1617 }, { "epoch": 38.525373134328355, "grad_norm": 26.203781127929688, "learning_rate": 5.452380952380952e-06, "loss": 45.6794, "step": 1618 }, { "epoch": 38.549253731343285, "grad_norm": 22.935991287231445, "learning_rate": 5.446428571428571e-06, "loss": 45.7815, "step": 1619 }, { "epoch": 38.57313432835821, "grad_norm": 28.275053024291992, "learning_rate": 5.44047619047619e-06, "loss": 45.0312, "step": 1620 }, { "epoch": 38.59701492537313, "grad_norm": 23.848264694213867, "learning_rate": 5.43452380952381e-06, "loss": 46.7093, "step": 1621 }, { "epoch": 38.62089552238806, "grad_norm": 25.240819931030273, "learning_rate": 5.428571428571429e-06, "loss": 46.6751, "step": 1622 }, { "epoch": 38.644776119402984, "grad_norm": 26.2618350982666, "learning_rate": 5.422619047619048e-06, "loss": 47.5501, "step": 1623 }, { "epoch": 38.668656716417914, "grad_norm": 23.986392974853516, "learning_rate": 5.416666666666667e-06, "loss": 45.6208, "step": 1624 }, { "epoch": 38.69253731343284, "grad_norm": 22.11539077758789, "learning_rate": 5.410714285714286e-06, "loss": 44.4163, "step": 1625 }, { "epoch": 38.71641791044776, "grad_norm": 22.9071044921875, "learning_rate": 5.404761904761905e-06, "loss": 45.5715, "step": 1626 }, { "epoch": 38.74029850746269, "grad_norm": 22.759733200073242, "learning_rate": 5.398809523809524e-06, "loss": 45.1706, "step": 1627 }, { "epoch": 38.76417910447761, "grad_norm": 23.66644287109375, "learning_rate": 5.392857142857143e-06, "loss": 45.4343, "step": 1628 }, { "epoch": 38.788059701492536, "grad_norm": 20.179203033447266, "learning_rate": 5.386904761904762e-06, "loss": 45.9163, "step": 1629 }, { "epoch": 38.811940298507466, "grad_norm": 22.327817916870117, "learning_rate": 5.380952380952381e-06, "loss": 44.1558, "step": 1630 }, { "epoch": 38.83582089552239, "grad_norm": 22.10496711730957, "learning_rate": 5.375e-06, "loss": 45.764, "step": 1631 }, { "epoch": 38.85970149253731, "grad_norm": 24.25627326965332, "learning_rate": 5.369047619047619e-06, "loss": 46.6394, "step": 1632 }, { "epoch": 38.88358208955224, "grad_norm": 20.797740936279297, "learning_rate": 5.363095238095238e-06, "loss": 45.6251, "step": 1633 }, { "epoch": 38.907462686567165, "grad_norm": 24.14659309387207, "learning_rate": 5.357142857142857e-06, "loss": 45.5603, "step": 1634 }, { "epoch": 38.93134328358209, "grad_norm": 23.259584426879883, "learning_rate": 5.351190476190477e-06, "loss": 46.055, "step": 1635 }, { "epoch": 38.95522388059702, "grad_norm": 23.72128677368164, "learning_rate": 5.345238095238096e-06, "loss": 45.9729, "step": 1636 }, { "epoch": 38.97910447761194, "grad_norm": 22.746183395385742, "learning_rate": 5.339285714285715e-06, "loss": 46.1893, "step": 1637 }, { "epoch": 39.0, "grad_norm": 22.067306518554688, "learning_rate": 5.333333333333334e-06, "loss": 39.8095, "step": 1638 }, { "epoch": 39.02388059701492, "grad_norm": 22.888097763061523, "learning_rate": 5.327380952380953e-06, "loss": 45.3095, "step": 1639 }, { "epoch": 39.04776119402985, "grad_norm": 23.86408233642578, "learning_rate": 5.3214285714285715e-06, "loss": 46.3774, "step": 1640 }, { "epoch": 39.071641791044776, "grad_norm": 21.418088912963867, "learning_rate": 5.3154761904761905e-06, "loss": 45.6404, "step": 1641 }, { "epoch": 39.0955223880597, "grad_norm": 21.521831512451172, "learning_rate": 5.30952380952381e-06, "loss": 46.4895, "step": 1642 }, { "epoch": 39.11940298507463, "grad_norm": 20.189105987548828, "learning_rate": 5.303571428571429e-06, "loss": 44.6538, "step": 1643 }, { "epoch": 39.14328358208955, "grad_norm": 19.73761558532715, "learning_rate": 5.297619047619048e-06, "loss": 45.5941, "step": 1644 }, { "epoch": 39.167164179104475, "grad_norm": 25.631227493286133, "learning_rate": 5.291666666666667e-06, "loss": 44.4105, "step": 1645 }, { "epoch": 39.191044776119405, "grad_norm": 19.47798728942871, "learning_rate": 5.285714285714286e-06, "loss": 46.3286, "step": 1646 }, { "epoch": 39.21492537313433, "grad_norm": 19.627609252929688, "learning_rate": 5.279761904761905e-06, "loss": 46.0707, "step": 1647 }, { "epoch": 39.23880597014925, "grad_norm": 19.668777465820312, "learning_rate": 5.273809523809525e-06, "loss": 44.8447, "step": 1648 }, { "epoch": 39.26268656716418, "grad_norm": 23.311546325683594, "learning_rate": 5.267857142857144e-06, "loss": 45.9081, "step": 1649 }, { "epoch": 39.286567164179104, "grad_norm": 21.426624298095703, "learning_rate": 5.261904761904763e-06, "loss": 45.4256, "step": 1650 }, { "epoch": 39.31044776119403, "grad_norm": 19.545969009399414, "learning_rate": 5.255952380952382e-06, "loss": 45.4264, "step": 1651 }, { "epoch": 39.33432835820896, "grad_norm": 22.78704833984375, "learning_rate": 5.2500000000000006e-06, "loss": 47.6822, "step": 1652 }, { "epoch": 39.35820895522388, "grad_norm": 18.759178161621094, "learning_rate": 5.2440476190476195e-06, "loss": 44.5254, "step": 1653 }, { "epoch": 39.3820895522388, "grad_norm": 19.855981826782227, "learning_rate": 5.2380952380952384e-06, "loss": 46.249, "step": 1654 }, { "epoch": 39.40597014925373, "grad_norm": 18.817089080810547, "learning_rate": 5.232142857142858e-06, "loss": 45.2813, "step": 1655 }, { "epoch": 39.429850746268656, "grad_norm": 19.587581634521484, "learning_rate": 5.226190476190477e-06, "loss": 45.0445, "step": 1656 }, { "epoch": 39.45373134328358, "grad_norm": 19.9105167388916, "learning_rate": 5.220238095238096e-06, "loss": 46.8658, "step": 1657 }, { "epoch": 39.47761194029851, "grad_norm": 19.529748916625977, "learning_rate": 5.214285714285715e-06, "loss": 46.6175, "step": 1658 }, { "epoch": 39.50149253731343, "grad_norm": 18.63764762878418, "learning_rate": 5.208333333333334e-06, "loss": 46.3122, "step": 1659 }, { "epoch": 39.525373134328355, "grad_norm": 19.58228874206543, "learning_rate": 5.202380952380953e-06, "loss": 44.6263, "step": 1660 }, { "epoch": 39.549253731343285, "grad_norm": 21.451528549194336, "learning_rate": 5.196428571428572e-06, "loss": 46.2707, "step": 1661 }, { "epoch": 39.57313432835821, "grad_norm": 22.756628036499023, "learning_rate": 5.190476190476192e-06, "loss": 45.0001, "step": 1662 }, { "epoch": 39.59701492537313, "grad_norm": 24.481945037841797, "learning_rate": 5.184523809523811e-06, "loss": 45.3038, "step": 1663 }, { "epoch": 39.62089552238806, "grad_norm": 19.3010196685791, "learning_rate": 5.1785714285714296e-06, "loss": 46.1894, "step": 1664 }, { "epoch": 39.644776119402984, "grad_norm": 24.840822219848633, "learning_rate": 5.1726190476190485e-06, "loss": 46.6593, "step": 1665 }, { "epoch": 39.668656716417914, "grad_norm": 20.712875366210938, "learning_rate": 5.1666666666666675e-06, "loss": 46.7594, "step": 1666 }, { "epoch": 39.69253731343284, "grad_norm": 20.431598663330078, "learning_rate": 5.160714285714286e-06, "loss": 46.4969, "step": 1667 }, { "epoch": 39.71641791044776, "grad_norm": 21.094484329223633, "learning_rate": 5.1547619047619045e-06, "loss": 44.5114, "step": 1668 }, { "epoch": 39.74029850746269, "grad_norm": 22.929946899414062, "learning_rate": 5.1488095238095234e-06, "loss": 44.5251, "step": 1669 }, { "epoch": 39.76417910447761, "grad_norm": 17.285877227783203, "learning_rate": 5.142857142857142e-06, "loss": 45.312, "step": 1670 }, { "epoch": 39.788059701492536, "grad_norm": 18.29960823059082, "learning_rate": 5.136904761904762e-06, "loss": 44.984, "step": 1671 }, { "epoch": 39.811940298507466, "grad_norm": 25.79044532775879, "learning_rate": 5.130952380952381e-06, "loss": 44.8192, "step": 1672 }, { "epoch": 39.83582089552239, "grad_norm": 21.014759063720703, "learning_rate": 5.125e-06, "loss": 46.9319, "step": 1673 }, { "epoch": 39.85970149253731, "grad_norm": 24.91911506652832, "learning_rate": 5.119047619047619e-06, "loss": 46.7778, "step": 1674 }, { "epoch": 39.88358208955224, "grad_norm": 25.174942016601562, "learning_rate": 5.113095238095238e-06, "loss": 44.948, "step": 1675 }, { "epoch": 39.907462686567165, "grad_norm": 22.642148971557617, "learning_rate": 5.107142857142857e-06, "loss": 45.5964, "step": 1676 }, { "epoch": 39.93134328358209, "grad_norm": 24.867389678955078, "learning_rate": 5.101190476190476e-06, "loss": 45.446, "step": 1677 }, { "epoch": 39.95522388059702, "grad_norm": 21.888269424438477, "learning_rate": 5.095238095238096e-06, "loss": 45.414, "step": 1678 }, { "epoch": 39.97910447761194, "grad_norm": 25.071487426757812, "learning_rate": 5.0892857142857146e-06, "loss": 44.9464, "step": 1679 }, { "epoch": 40.0, "grad_norm": 19.389556884765625, "learning_rate": 5.0833333333333335e-06, "loss": 39.5515, "step": 1680 }, { "epoch": 40.0, "step": 1680, "total_flos": 8.26172747445074e+16, "train_loss": 23.38366504396711, "train_runtime": 26137.4766, "train_samples_per_second": 8.191, "train_steps_per_second": 0.064 }, { "epoch": 40.02388059701492, "grad_norm": 18.99544334411621, "learning_rate": 1e-05, "loss": 46.1194, "step": 1681 }, { "epoch": 40.04776119402985, "grad_norm": Infinity, "learning_rate": 9.996031746031746e-06, "loss": 54.6718, "step": 1682 }, { "epoch": 40.071641791044776, "grad_norm": Infinity, "learning_rate": 9.996031746031746e-06, "loss": 54.4703, "step": 1683 }, { "epoch": 40.0955223880597, "grad_norm": 416.26324462890625, "learning_rate": 9.996031746031746e-06, "loss": 53.5676, "step": 1684 }, { "epoch": 40.11940298507463, "grad_norm": 147.0504608154297, "learning_rate": 9.992063492063493e-06, "loss": 50.2561, "step": 1685 }, { "epoch": 40.14328358208955, "grad_norm": 122.7557601928711, "learning_rate": 9.988095238095239e-06, "loss": 50.4153, "step": 1686 }, { "epoch": 40.167164179104475, "grad_norm": 97.062744140625, "learning_rate": 9.984126984126986e-06, "loss": 47.2739, "step": 1687 }, { "epoch": 40.191044776119405, "grad_norm": 73.37904357910156, "learning_rate": 9.980158730158731e-06, "loss": 48.0252, "step": 1688 }, { "epoch": 40.21492537313433, "grad_norm": 68.98373413085938, "learning_rate": 9.976190476190477e-06, "loss": 47.6782, "step": 1689 }, { "epoch": 40.23880597014925, "grad_norm": 56.258548736572266, "learning_rate": 9.972222222222224e-06, "loss": 47.5786, "step": 1690 }, { "epoch": 40.26268656716418, "grad_norm": 68.9515609741211, "learning_rate": 9.968253968253969e-06, "loss": 46.3938, "step": 1691 }, { "epoch": 40.286567164179104, "grad_norm": 39.17803955078125, "learning_rate": 9.964285714285714e-06, "loss": 45.9047, "step": 1692 }, { "epoch": 40.31044776119403, "grad_norm": 51.936981201171875, "learning_rate": 9.960317460317462e-06, "loss": 45.6047, "step": 1693 }, { "epoch": 40.33432835820896, "grad_norm": 43.64280700683594, "learning_rate": 9.956349206349207e-06, "loss": 46.6234, "step": 1694 }, { "epoch": 40.35820895522388, "grad_norm": 58.56443405151367, "learning_rate": 9.952380952380954e-06, "loss": 47.255, "step": 1695 }, { "epoch": 40.3820895522388, "grad_norm": 37.53863525390625, "learning_rate": 9.9484126984127e-06, "loss": 47.1183, "step": 1696 }, { "epoch": 40.40597014925373, "grad_norm": 35.800628662109375, "learning_rate": 9.944444444444445e-06, "loss": 46.3602, "step": 1697 }, { "epoch": 40.429850746268656, "grad_norm": 39.58418655395508, "learning_rate": 9.940476190476192e-06, "loss": 46.3082, "step": 1698 }, { "epoch": 40.45373134328358, "grad_norm": 30.6373233795166, "learning_rate": 9.936507936507937e-06, "loss": 45.2231, "step": 1699 }, { "epoch": 40.47761194029851, "grad_norm": 34.47962951660156, "learning_rate": 9.932539682539684e-06, "loss": 46.3243, "step": 1700 }, { "epoch": 40.50149253731343, "grad_norm": 23.599184036254883, "learning_rate": 9.92857142857143e-06, "loss": 46.3045, "step": 1701 }, { "epoch": 40.525373134328355, "grad_norm": 27.183767318725586, "learning_rate": 9.924603174603175e-06, "loss": 45.3216, "step": 1702 }, { "epoch": 40.549253731343285, "grad_norm": 27.263038635253906, "learning_rate": 9.920634920634922e-06, "loss": 46.8117, "step": 1703 }, { "epoch": 40.57313432835821, "grad_norm": 30.570518493652344, "learning_rate": 9.916666666666668e-06, "loss": 46.0951, "step": 1704 }, { "epoch": 40.59701492537313, "grad_norm": 23.30783462524414, "learning_rate": 9.912698412698413e-06, "loss": 45.5407, "step": 1705 }, { "epoch": 40.62089552238806, "grad_norm": 29.269088745117188, "learning_rate": 9.90873015873016e-06, "loss": 45.9624, "step": 1706 }, { "epoch": 40.644776119402984, "grad_norm": NaN, "learning_rate": 9.904761904761906e-06, "loss": 75.1575, "step": 1707 }, { "epoch": 40.668656716417914, "grad_norm": 25.785404205322266, "learning_rate": 9.904761904761906e-06, "loss": 45.9263, "step": 1708 }, { "epoch": 40.69253731343284, "grad_norm": 34.729549407958984, "learning_rate": 9.900793650793653e-06, "loss": 45.5276, "step": 1709 }, { "epoch": 40.71641791044776, "grad_norm": 28.62750816345215, "learning_rate": 9.896825396825398e-06, "loss": 46.2797, "step": 1710 }, { "epoch": 40.74029850746269, "grad_norm": 31.081378936767578, "learning_rate": 9.892857142857143e-06, "loss": 45.1643, "step": 1711 }, { "epoch": 40.76417910447761, "grad_norm": 28.92620086669922, "learning_rate": 9.88888888888889e-06, "loss": 46.3105, "step": 1712 }, { "epoch": 40.788059701492536, "grad_norm": 23.232866287231445, "learning_rate": 9.884920634920636e-06, "loss": 46.6131, "step": 1713 }, { "epoch": 40.811940298507466, "grad_norm": 25.97928810119629, "learning_rate": 9.880952380952381e-06, "loss": 45.5054, "step": 1714 }, { "epoch": 40.83582089552239, "grad_norm": 27.382034301757812, "learning_rate": 9.876984126984128e-06, "loss": 45.593, "step": 1715 }, { "epoch": 40.85970149253731, "grad_norm": 23.762460708618164, "learning_rate": 9.873015873015874e-06, "loss": 45.7414, "step": 1716 }, { "epoch": 40.88358208955224, "grad_norm": 29.6158390045166, "learning_rate": 9.869047619047621e-06, "loss": 45.0669, "step": 1717 }, { "epoch": 40.907462686567165, "grad_norm": 24.66147804260254, "learning_rate": 9.865079365079366e-06, "loss": 45.6125, "step": 1718 }, { "epoch": 40.93134328358209, "grad_norm": 28.167495727539062, "learning_rate": 9.861111111111112e-06, "loss": 46.099, "step": 1719 }, { "epoch": 40.95522388059702, "grad_norm": 27.325531005859375, "learning_rate": 9.857142857142859e-06, "loss": 45.1728, "step": 1720 }, { "epoch": 40.97910447761194, "grad_norm": 23.650911331176758, "learning_rate": 9.853174603174604e-06, "loss": 44.6743, "step": 1721 }, { "epoch": 41.0, "grad_norm": 22.53518295288086, "learning_rate": 9.849206349206351e-06, "loss": 39.1464, "step": 1722 }, { "epoch": 41.02388059701492, "grad_norm": 28.995275497436523, "learning_rate": 9.845238095238097e-06, "loss": 44.5823, "step": 1723 }, { "epoch": 41.04776119402985, "grad_norm": 28.680805206298828, "learning_rate": 9.841269841269842e-06, "loss": 44.7002, "step": 1724 }, { "epoch": 41.071641791044776, "grad_norm": 24.10047149658203, "learning_rate": 9.837301587301588e-06, "loss": 46.232, "step": 1725 }, { "epoch": 41.0955223880597, "grad_norm": 25.722291946411133, "learning_rate": 9.833333333333333e-06, "loss": 45.1447, "step": 1726 }, { "epoch": 41.11940298507463, "grad_norm": 22.944278717041016, "learning_rate": 9.82936507936508e-06, "loss": 46.57, "step": 1727 }, { "epoch": 41.14328358208955, "grad_norm": 25.734941482543945, "learning_rate": 9.825396825396825e-06, "loss": 45.8386, "step": 1728 }, { "epoch": 41.167164179104475, "grad_norm": 23.644197463989258, "learning_rate": 9.821428571428573e-06, "loss": 46.2608, "step": 1729 }, { "epoch": 41.191044776119405, "grad_norm": 22.163721084594727, "learning_rate": 9.817460317460318e-06, "loss": 45.2914, "step": 1730 }, { "epoch": 41.21492537313433, "grad_norm": 33.71270751953125, "learning_rate": 9.813492063492063e-06, "loss": 44.6372, "step": 1731 }, { "epoch": 41.23880597014925, "grad_norm": 28.478361129760742, "learning_rate": 9.80952380952381e-06, "loss": 45.472, "step": 1732 }, { "epoch": 41.26268656716418, "grad_norm": 27.120990753173828, "learning_rate": 9.805555555555556e-06, "loss": 46.445, "step": 1733 }, { "epoch": 41.286567164179104, "grad_norm": 25.342784881591797, "learning_rate": 9.801587301587301e-06, "loss": 45.8317, "step": 1734 }, { "epoch": 41.31044776119403, "grad_norm": 28.94765853881836, "learning_rate": 9.797619047619048e-06, "loss": 46.0677, "step": 1735 }, { "epoch": 41.33432835820896, "grad_norm": 22.983802795410156, "learning_rate": 9.793650793650794e-06, "loss": 45.8029, "step": 1736 }, { "epoch": 41.35820895522388, "grad_norm": 24.97469711303711, "learning_rate": 9.78968253968254e-06, "loss": 46.7215, "step": 1737 }, { "epoch": 41.3820895522388, "grad_norm": 26.136960983276367, "learning_rate": 9.785714285714286e-06, "loss": 45.8042, "step": 1738 }, { "epoch": 41.40597014925373, "grad_norm": 21.150083541870117, "learning_rate": 9.781746031746032e-06, "loss": 45.836, "step": 1739 }, { "epoch": 41.429850746268656, "grad_norm": 19.56538963317871, "learning_rate": 9.777777777777779e-06, "loss": 46.0126, "step": 1740 }, { "epoch": 41.45373134328358, "grad_norm": 26.608108520507812, "learning_rate": 9.773809523809524e-06, "loss": 45.3108, "step": 1741 }, { "epoch": 41.47761194029851, "grad_norm": 19.020097732543945, "learning_rate": 9.769841269841271e-06, "loss": 46.278, "step": 1742 }, { "epoch": 41.50149253731343, "grad_norm": 25.4818172454834, "learning_rate": 9.765873015873017e-06, "loss": 45.8142, "step": 1743 }, { "epoch": 41.525373134328355, "grad_norm": 21.7120304107666, "learning_rate": 9.761904761904762e-06, "loss": 46.0221, "step": 1744 }, { "epoch": 41.549253731343285, "grad_norm": 24.395984649658203, "learning_rate": 9.757936507936509e-06, "loss": 45.3654, "step": 1745 }, { "epoch": 41.57313432835821, "grad_norm": 26.8757381439209, "learning_rate": 9.753968253968254e-06, "loss": 46.0073, "step": 1746 }, { "epoch": 41.59701492537313, "grad_norm": 27.31254768371582, "learning_rate": 9.75e-06, "loss": 45.465, "step": 1747 }, { "epoch": 41.62089552238806, "grad_norm": 23.271629333496094, "learning_rate": 9.746031746031747e-06, "loss": 46.0739, "step": 1748 }, { "epoch": 41.644776119402984, "grad_norm": 24.240131378173828, "learning_rate": 9.742063492063492e-06, "loss": 45.6978, "step": 1749 }, { "epoch": 41.668656716417914, "grad_norm": 23.16962242126465, "learning_rate": 9.73809523809524e-06, "loss": 45.9961, "step": 1750 }, { "epoch": 41.69253731343284, "grad_norm": 29.63677406311035, "learning_rate": 9.734126984126985e-06, "loss": 45.0859, "step": 1751 }, { "epoch": 41.71641791044776, "grad_norm": 20.725126266479492, "learning_rate": 9.73015873015873e-06, "loss": 45.3546, "step": 1752 }, { "epoch": 41.74029850746269, "grad_norm": 23.172834396362305, "learning_rate": 9.726190476190477e-06, "loss": 45.3822, "step": 1753 }, { "epoch": 41.76417910447761, "grad_norm": 30.179182052612305, "learning_rate": 9.722222222222223e-06, "loss": 45.0901, "step": 1754 }, { "epoch": 41.788059701492536, "grad_norm": 17.276126861572266, "learning_rate": 9.71825396825397e-06, "loss": 45.4555, "step": 1755 }, { "epoch": 41.811940298507466, "grad_norm": 24.585174560546875, "learning_rate": 9.714285714285715e-06, "loss": 43.8513, "step": 1756 }, { "epoch": 41.83582089552239, "grad_norm": 23.242969512939453, "learning_rate": 9.71031746031746e-06, "loss": 45.7996, "step": 1757 }, { "epoch": 41.85970149253731, "grad_norm": 21.585342407226562, "learning_rate": 9.706349206349208e-06, "loss": 45.2616, "step": 1758 }, { "epoch": 41.88358208955224, "grad_norm": 28.802600860595703, "learning_rate": 9.702380952380953e-06, "loss": 45.6062, "step": 1759 }, { "epoch": 41.907462686567165, "grad_norm": 23.895822525024414, "learning_rate": 9.698412698412698e-06, "loss": 44.3029, "step": 1760 }, { "epoch": 41.93134328358209, "grad_norm": 26.175247192382812, "learning_rate": 9.694444444444446e-06, "loss": 45.6048, "step": 1761 }, { "epoch": 41.95522388059702, "grad_norm": 23.499914169311523, "learning_rate": 9.690476190476191e-06, "loss": 45.4891, "step": 1762 }, { "epoch": 41.97910447761194, "grad_norm": 22.244211196899414, "learning_rate": 9.686507936507938e-06, "loss": 44.1723, "step": 1763 }, { "epoch": 42.0, "grad_norm": 20.29228401184082, "learning_rate": 9.682539682539683e-06, "loss": 39.7896, "step": 1764 }, { "epoch": 42.02388059701492, "grad_norm": 27.773515701293945, "learning_rate": 9.678571428571429e-06, "loss": 45.7383, "step": 1765 }, { "epoch": 42.04776119402985, "grad_norm": 27.289716720581055, "learning_rate": 9.674603174603176e-06, "loss": 45.2073, "step": 1766 }, { "epoch": 42.071641791044776, "grad_norm": 21.16016387939453, "learning_rate": 9.670634920634921e-06, "loss": 45.3415, "step": 1767 }, { "epoch": 42.0955223880597, "grad_norm": 28.878597259521484, "learning_rate": 9.666666666666667e-06, "loss": 45.1139, "step": 1768 }, { "epoch": 42.11940298507463, "grad_norm": 29.504600524902344, "learning_rate": 9.662698412698414e-06, "loss": 46.185, "step": 1769 }, { "epoch": 42.14328358208955, "grad_norm": 20.372560501098633, "learning_rate": 9.65873015873016e-06, "loss": 46.4996, "step": 1770 }, { "epoch": 42.167164179104475, "grad_norm": 27.437274932861328, "learning_rate": 9.654761904761906e-06, "loss": 43.77, "step": 1771 }, { "epoch": 42.191044776119405, "grad_norm": 23.735233306884766, "learning_rate": 9.650793650793652e-06, "loss": 43.9415, "step": 1772 }, { "epoch": 42.21492537313433, "grad_norm": 26.434886932373047, "learning_rate": 9.646825396825397e-06, "loss": 46.6163, "step": 1773 }, { "epoch": 42.23880597014925, "grad_norm": 26.843782424926758, "learning_rate": 9.642857142857144e-06, "loss": 46.1987, "step": 1774 }, { "epoch": 42.26268656716418, "grad_norm": 25.86046600341797, "learning_rate": 9.63888888888889e-06, "loss": 46.635, "step": 1775 }, { "epoch": 42.286567164179104, "grad_norm": 25.95208740234375, "learning_rate": 9.634920634920637e-06, "loss": 44.6339, "step": 1776 }, { "epoch": 42.31044776119403, "grad_norm": 21.243392944335938, "learning_rate": 9.630952380952382e-06, "loss": 45.1151, "step": 1777 }, { "epoch": 42.33432835820896, "grad_norm": 22.445972442626953, "learning_rate": 9.626984126984127e-06, "loss": 45.1704, "step": 1778 }, { "epoch": 42.35820895522388, "grad_norm": 37.871681213378906, "learning_rate": 9.623015873015875e-06, "loss": 45.116, "step": 1779 }, { "epoch": 42.3820895522388, "grad_norm": 25.75882339477539, "learning_rate": 9.61904761904762e-06, "loss": 45.2748, "step": 1780 }, { "epoch": 42.40597014925373, "grad_norm": 32.44329071044922, "learning_rate": 9.615079365079365e-06, "loss": 45.0782, "step": 1781 }, { "epoch": 42.429850746268656, "grad_norm": 25.74696159362793, "learning_rate": 9.611111111111112e-06, "loss": 46.1405, "step": 1782 }, { "epoch": 42.45373134328358, "grad_norm": 44.88374710083008, "learning_rate": 9.607142857142858e-06, "loss": 45.7843, "step": 1783 }, { "epoch": 42.47761194029851, "grad_norm": 29.956615447998047, "learning_rate": 9.603174603174605e-06, "loss": 46.7361, "step": 1784 }, { "epoch": 42.50149253731343, "grad_norm": 41.191864013671875, "learning_rate": 9.59920634920635e-06, "loss": 45.7368, "step": 1785 }, { "epoch": 42.525373134328355, "grad_norm": 32.30370330810547, "learning_rate": 9.595238095238096e-06, "loss": 45.4091, "step": 1786 }, { "epoch": 42.549253731343285, "grad_norm": 32.65694046020508, "learning_rate": 9.591269841269843e-06, "loss": 44.8837, "step": 1787 }, { "epoch": 42.57313432835821, "grad_norm": 29.783634185791016, "learning_rate": 9.587301587301588e-06, "loss": 46.0239, "step": 1788 }, { "epoch": 42.59701492537313, "grad_norm": 32.415035247802734, "learning_rate": 9.583333333333335e-06, "loss": 44.7968, "step": 1789 }, { "epoch": 42.62089552238806, "grad_norm": 31.461589813232422, "learning_rate": 9.57936507936508e-06, "loss": 44.5408, "step": 1790 }, { "epoch": 42.644776119402984, "grad_norm": 27.083560943603516, "learning_rate": 9.575396825396826e-06, "loss": 44.9716, "step": 1791 }, { "epoch": 42.668656716417914, "grad_norm": 34.453102111816406, "learning_rate": 9.571428571428573e-06, "loss": 44.8527, "step": 1792 }, { "epoch": 42.69253731343284, "grad_norm": 24.403902053833008, "learning_rate": 9.567460317460319e-06, "loss": 44.6635, "step": 1793 }, { "epoch": 42.71641791044776, "grad_norm": 43.89455795288086, "learning_rate": 9.563492063492064e-06, "loss": 45.9798, "step": 1794 }, { "epoch": 42.74029850746269, "grad_norm": 33.704498291015625, "learning_rate": 9.559523809523811e-06, "loss": 45.8182, "step": 1795 }, { "epoch": 42.76417910447761, "grad_norm": 38.266357421875, "learning_rate": 9.555555555555556e-06, "loss": 44.8923, "step": 1796 }, { "epoch": 42.788059701492536, "grad_norm": 36.38774490356445, "learning_rate": 9.551587301587304e-06, "loss": 45.5987, "step": 1797 }, { "epoch": 42.811940298507466, "grad_norm": 33.449737548828125, "learning_rate": 9.547619047619049e-06, "loss": 46.2494, "step": 1798 }, { "epoch": 42.83582089552239, "grad_norm": 29.902509689331055, "learning_rate": 9.543650793650794e-06, "loss": 44.7438, "step": 1799 }, { "epoch": 42.85970149253731, "grad_norm": 35.025184631347656, "learning_rate": 9.539682539682541e-06, "loss": 44.7825, "step": 1800 }, { "epoch": 42.88358208955224, "grad_norm": 30.783037185668945, "learning_rate": 9.535714285714287e-06, "loss": 45.3493, "step": 1801 }, { "epoch": 42.907462686567165, "grad_norm": 28.61165428161621, "learning_rate": 9.531746031746032e-06, "loss": 46.5537, "step": 1802 }, { "epoch": 42.93134328358209, "grad_norm": 34.27008056640625, "learning_rate": 9.527777777777778e-06, "loss": 44.0439, "step": 1803 }, { "epoch": 42.95522388059702, "grad_norm": 31.05691146850586, "learning_rate": 9.523809523809525e-06, "loss": 46.1128, "step": 1804 }, { "epoch": 42.97910447761194, "grad_norm": 28.658565521240234, "learning_rate": 9.51984126984127e-06, "loss": 46.2442, "step": 1805 }, { "epoch": 43.0, "grad_norm": 20.02385139465332, "learning_rate": 9.515873015873016e-06, "loss": 39.8537, "step": 1806 }, { "epoch": 43.02388059701492, "grad_norm": 32.5422248840332, "learning_rate": 9.511904761904763e-06, "loss": 44.0765, "step": 1807 }, { "epoch": 43.04776119402985, "grad_norm": 22.364904403686523, "learning_rate": 9.507936507936508e-06, "loss": 45.6789, "step": 1808 }, { "epoch": 43.071641791044776, "grad_norm": 35.576072692871094, "learning_rate": 9.503968253968255e-06, "loss": 45.6707, "step": 1809 }, { "epoch": 43.0955223880597, "grad_norm": 27.892908096313477, "learning_rate": 9.5e-06, "loss": 46.348, "step": 1810 }, { "epoch": 43.11940298507463, "grad_norm": 22.283756256103516, "learning_rate": 9.496031746031746e-06, "loss": 44.8757, "step": 1811 }, { "epoch": 43.14328358208955, "grad_norm": 34.38758087158203, "learning_rate": 9.492063492063493e-06, "loss": 45.0544, "step": 1812 }, { "epoch": 43.167164179104475, "grad_norm": 26.720060348510742, "learning_rate": 9.488095238095238e-06, "loss": 46.2092, "step": 1813 }, { "epoch": 43.191044776119405, "grad_norm": 35.375362396240234, "learning_rate": 9.484126984126984e-06, "loss": 46.0173, "step": 1814 }, { "epoch": 43.21492537313433, "grad_norm": 24.92397117614746, "learning_rate": 9.480158730158731e-06, "loss": 45.5031, "step": 1815 }, { "epoch": 43.23880597014925, "grad_norm": 35.76795959472656, "learning_rate": 9.476190476190476e-06, "loss": 44.8149, "step": 1816 }, { "epoch": 43.26268656716418, "grad_norm": 29.861675262451172, "learning_rate": 9.472222222222223e-06, "loss": 45.4173, "step": 1817 }, { "epoch": 43.286567164179104, "grad_norm": 33.83314895629883, "learning_rate": 9.468253968253969e-06, "loss": 44.8036, "step": 1818 }, { "epoch": 43.31044776119403, "grad_norm": 32.994483947753906, "learning_rate": 9.464285714285714e-06, "loss": 46.5555, "step": 1819 }, { "epoch": 43.33432835820896, "grad_norm": 23.94085693359375, "learning_rate": 9.460317460317461e-06, "loss": 45.9566, "step": 1820 }, { "epoch": 43.35820895522388, "grad_norm": 34.10947799682617, "learning_rate": 9.456349206349207e-06, "loss": 45.2182, "step": 1821 }, { "epoch": 43.3820895522388, "grad_norm": 23.844850540161133, "learning_rate": 9.452380952380952e-06, "loss": 45.5904, "step": 1822 }, { "epoch": 43.40597014925373, "grad_norm": 46.643768310546875, "learning_rate": 9.4484126984127e-06, "loss": 46.6924, "step": 1823 }, { "epoch": 43.429850746268656, "grad_norm": 32.49457931518555, "learning_rate": 9.444444444444445e-06, "loss": 44.024, "step": 1824 }, { "epoch": 43.45373134328358, "grad_norm": 32.36979293823242, "learning_rate": 9.440476190476192e-06, "loss": 45.5752, "step": 1825 }, { "epoch": 43.47761194029851, "grad_norm": 28.21212387084961, "learning_rate": 9.436507936507937e-06, "loss": 43.3649, "step": 1826 }, { "epoch": 43.50149253731343, "grad_norm": 30.494169235229492, "learning_rate": 9.432539682539682e-06, "loss": 44.4726, "step": 1827 }, { "epoch": 43.525373134328355, "grad_norm": 29.817806243896484, "learning_rate": 9.42857142857143e-06, "loss": 45.4315, "step": 1828 }, { "epoch": 43.549253731343285, "grad_norm": 32.68490219116211, "learning_rate": 9.424603174603175e-06, "loss": 45.1985, "step": 1829 }, { "epoch": 43.57313432835821, "grad_norm": 28.48166275024414, "learning_rate": 9.420634920634922e-06, "loss": 45.6737, "step": 1830 }, { "epoch": 43.59701492537313, "grad_norm": 30.532995223999023, "learning_rate": 9.416666666666667e-06, "loss": 45.9931, "step": 1831 }, { "epoch": 43.62089552238806, "grad_norm": 24.953765869140625, "learning_rate": 9.412698412698413e-06, "loss": 44.4189, "step": 1832 }, { "epoch": 43.644776119402984, "grad_norm": 23.647258758544922, "learning_rate": 9.40873015873016e-06, "loss": 44.6757, "step": 1833 }, { "epoch": 43.668656716417914, "grad_norm": 28.41623878479004, "learning_rate": 9.404761904761905e-06, "loss": 45.5732, "step": 1834 }, { "epoch": 43.69253731343284, "grad_norm": 25.599082946777344, "learning_rate": 9.40079365079365e-06, "loss": 45.0382, "step": 1835 }, { "epoch": 43.71641791044776, "grad_norm": 39.584144592285156, "learning_rate": 9.396825396825398e-06, "loss": 45.1531, "step": 1836 }, { "epoch": 43.74029850746269, "grad_norm": 30.606550216674805, "learning_rate": 9.392857142857143e-06, "loss": 44.3696, "step": 1837 }, { "epoch": 43.76417910447761, "grad_norm": 37.25154495239258, "learning_rate": 9.38888888888889e-06, "loss": 45.4297, "step": 1838 }, { "epoch": 43.788059701492536, "grad_norm": 30.60915184020996, "learning_rate": 9.384920634920636e-06, "loss": 45.2441, "step": 1839 }, { "epoch": 43.811940298507466, "grad_norm": 32.886268615722656, "learning_rate": 9.380952380952381e-06, "loss": 45.3913, "step": 1840 }, { "epoch": 43.83582089552239, "grad_norm": 27.98761749267578, "learning_rate": 9.376984126984128e-06, "loss": 45.2191, "step": 1841 }, { "epoch": 43.85970149253731, "grad_norm": 33.787261962890625, "learning_rate": 9.373015873015874e-06, "loss": 45.0051, "step": 1842 }, { "epoch": 43.88358208955224, "grad_norm": 26.90253257751465, "learning_rate": 9.36904761904762e-06, "loss": 45.9333, "step": 1843 }, { "epoch": 43.907462686567165, "grad_norm": 29.78704261779785, "learning_rate": 9.365079365079366e-06, "loss": 45.3598, "step": 1844 }, { "epoch": 43.93134328358209, "grad_norm": 24.871315002441406, "learning_rate": 9.361111111111111e-06, "loss": 44.4159, "step": 1845 }, { "epoch": 43.95522388059702, "grad_norm": 22.998323440551758, "learning_rate": 9.357142857142859e-06, "loss": 44.9989, "step": 1846 }, { "epoch": 43.97910447761194, "grad_norm": 28.724388122558594, "learning_rate": 9.353174603174604e-06, "loss": 46.0748, "step": 1847 }, { "epoch": 44.0, "grad_norm": 19.870426177978516, "learning_rate": 9.34920634920635e-06, "loss": 41.7823, "step": 1848 }, { "epoch": 44.02388059701492, "grad_norm": 28.000728607177734, "learning_rate": 9.345238095238096e-06, "loss": 44.3686, "step": 1849 }, { "epoch": 44.04776119402985, "grad_norm": 25.330766677856445, "learning_rate": 9.341269841269842e-06, "loss": 44.8766, "step": 1850 }, { "epoch": 44.071641791044776, "grad_norm": 30.589149475097656, "learning_rate": 9.337301587301589e-06, "loss": 46.3777, "step": 1851 }, { "epoch": 44.0955223880597, "grad_norm": 27.803207397460938, "learning_rate": 9.333333333333334e-06, "loss": 45.7627, "step": 1852 }, { "epoch": 44.11940298507463, "grad_norm": 28.11823081970215, "learning_rate": 9.32936507936508e-06, "loss": 44.9612, "step": 1853 }, { "epoch": 44.14328358208955, "grad_norm": 24.24823570251465, "learning_rate": 9.325396825396827e-06, "loss": 45.6512, "step": 1854 }, { "epoch": 44.167164179104475, "grad_norm": 29.81229019165039, "learning_rate": 9.321428571428572e-06, "loss": 43.7322, "step": 1855 }, { "epoch": 44.191044776119405, "grad_norm": 24.88245964050293, "learning_rate": 9.317460317460318e-06, "loss": 45.288, "step": 1856 }, { "epoch": 44.21492537313433, "grad_norm": 31.246389389038086, "learning_rate": 9.313492063492065e-06, "loss": 44.6547, "step": 1857 }, { "epoch": 44.23880597014925, "grad_norm": 29.363845825195312, "learning_rate": 9.30952380952381e-06, "loss": 44.7851, "step": 1858 }, { "epoch": 44.26268656716418, "grad_norm": 32.35028839111328, "learning_rate": 9.305555555555557e-06, "loss": 44.5643, "step": 1859 }, { "epoch": 44.286567164179104, "grad_norm": 31.52218246459961, "learning_rate": 9.301587301587303e-06, "loss": 45.293, "step": 1860 }, { "epoch": 44.31044776119403, "grad_norm": 29.180295944213867, "learning_rate": 9.297619047619048e-06, "loss": 45.298, "step": 1861 }, { "epoch": 44.33432835820896, "grad_norm": 27.626508712768555, "learning_rate": 9.293650793650795e-06, "loss": 45.1187, "step": 1862 }, { "epoch": 44.35820895522388, "grad_norm": 28.44379425048828, "learning_rate": 9.28968253968254e-06, "loss": 45.0835, "step": 1863 }, { "epoch": 44.3820895522388, "grad_norm": 29.45343017578125, "learning_rate": 9.285714285714288e-06, "loss": 45.5642, "step": 1864 }, { "epoch": 44.40597014925373, "grad_norm": 21.64850425720215, "learning_rate": 9.281746031746033e-06, "loss": 45.6837, "step": 1865 }, { "epoch": 44.429850746268656, "grad_norm": 35.32088088989258, "learning_rate": 9.277777777777778e-06, "loss": 44.9266, "step": 1866 }, { "epoch": 44.45373134328358, "grad_norm": 28.638429641723633, "learning_rate": 9.273809523809525e-06, "loss": 45.9407, "step": 1867 }, { "epoch": 44.47761194029851, "grad_norm": 31.444725036621094, "learning_rate": 9.26984126984127e-06, "loss": 45.4442, "step": 1868 }, { "epoch": 44.50149253731343, "grad_norm": 26.114784240722656, "learning_rate": 9.265873015873016e-06, "loss": 45.1998, "step": 1869 }, { "epoch": 44.525373134328355, "grad_norm": 24.51571273803711, "learning_rate": 9.261904761904763e-06, "loss": 45.0705, "step": 1870 }, { "epoch": 44.549253731343285, "grad_norm": 24.52007293701172, "learning_rate": 9.257936507936509e-06, "loss": 43.9359, "step": 1871 }, { "epoch": 44.57313432835821, "grad_norm": 17.876834869384766, "learning_rate": 9.253968253968256e-06, "loss": 44.5254, "step": 1872 }, { "epoch": 44.59701492537313, "grad_norm": 21.0299015045166, "learning_rate": 9.250000000000001e-06, "loss": 46.0916, "step": 1873 }, { "epoch": 44.62089552238806, "grad_norm": 30.12071990966797, "learning_rate": 9.246031746031747e-06, "loss": 44.1769, "step": 1874 }, { "epoch": 44.644776119402984, "grad_norm": 23.94618797302246, "learning_rate": 9.242063492063494e-06, "loss": 45.628, "step": 1875 }, { "epoch": 44.668656716417914, "grad_norm": 29.615930557250977, "learning_rate": 9.238095238095239e-06, "loss": 45.2762, "step": 1876 }, { "epoch": 44.69253731343284, "grad_norm": 30.00957489013672, "learning_rate": 9.234126984126986e-06, "loss": 45.6399, "step": 1877 }, { "epoch": 44.71641791044776, "grad_norm": 26.414703369140625, "learning_rate": 9.230158730158732e-06, "loss": 44.6988, "step": 1878 }, { "epoch": 44.74029850746269, "grad_norm": 28.785755157470703, "learning_rate": 9.226190476190477e-06, "loss": 45.4551, "step": 1879 }, { "epoch": 44.76417910447761, "grad_norm": 23.4616756439209, "learning_rate": 9.222222222222224e-06, "loss": 44.8668, "step": 1880 }, { "epoch": 44.788059701492536, "grad_norm": 25.046113967895508, "learning_rate": 9.218253968253968e-06, "loss": 45.8905, "step": 1881 }, { "epoch": 44.811940298507466, "grad_norm": 31.216581344604492, "learning_rate": 9.214285714285715e-06, "loss": 46.6996, "step": 1882 }, { "epoch": 44.83582089552239, "grad_norm": 22.215465545654297, "learning_rate": 9.21031746031746e-06, "loss": 46.1791, "step": 1883 }, { "epoch": 44.85970149253731, "grad_norm": 33.831214904785156, "learning_rate": 9.206349206349207e-06, "loss": 45.3197, "step": 1884 }, { "epoch": 44.88358208955224, "grad_norm": 24.447084426879883, "learning_rate": 9.202380952380953e-06, "loss": 45.2949, "step": 1885 }, { "epoch": 44.907462686567165, "grad_norm": 31.735240936279297, "learning_rate": 9.198412698412698e-06, "loss": 46.4555, "step": 1886 }, { "epoch": 44.93134328358209, "grad_norm": 27.23394012451172, "learning_rate": 9.194444444444445e-06, "loss": 45.9441, "step": 1887 }, { "epoch": 44.95522388059702, "grad_norm": 27.79869270324707, "learning_rate": 9.19047619047619e-06, "loss": 45.387, "step": 1888 }, { "epoch": 44.97910447761194, "grad_norm": 24.329313278198242, "learning_rate": 9.186507936507936e-06, "loss": 44.2934, "step": 1889 }, { "epoch": 45.0, "grad_norm": 22.191181182861328, "learning_rate": 9.182539682539683e-06, "loss": 39.0195, "step": 1890 }, { "epoch": 45.02388059701492, "grad_norm": 32.1130256652832, "learning_rate": 9.178571428571429e-06, "loss": 45.9272, "step": 1891 }, { "epoch": 45.04776119402985, "grad_norm": 19.621145248413086, "learning_rate": 9.174603174603176e-06, "loss": 44.3447, "step": 1892 }, { "epoch": 45.071641791044776, "grad_norm": 39.10493087768555, "learning_rate": 9.170634920634921e-06, "loss": 46.001, "step": 1893 }, { "epoch": 45.0955223880597, "grad_norm": 23.7473201751709, "learning_rate": 9.166666666666666e-06, "loss": 45.5786, "step": 1894 }, { "epoch": 45.11940298507463, "grad_norm": 30.535781860351562, "learning_rate": 9.162698412698414e-06, "loss": 46.3373, "step": 1895 }, { "epoch": 45.14328358208955, "grad_norm": 26.53186798095703, "learning_rate": 9.158730158730159e-06, "loss": 44.6074, "step": 1896 }, { "epoch": 45.167164179104475, "grad_norm": 31.9615478515625, "learning_rate": 9.154761904761906e-06, "loss": 43.229, "step": 1897 }, { "epoch": 45.191044776119405, "grad_norm": 28.577655792236328, "learning_rate": 9.150793650793651e-06, "loss": 45.6407, "step": 1898 }, { "epoch": 45.21492537313433, "grad_norm": 18.897531509399414, "learning_rate": 9.146825396825397e-06, "loss": 45.4092, "step": 1899 }, { "epoch": 45.23880597014925, "grad_norm": 31.126819610595703, "learning_rate": 9.142857142857144e-06, "loss": 43.966, "step": 1900 }, { "epoch": 45.26268656716418, "grad_norm": 26.312490463256836, "learning_rate": 9.13888888888889e-06, "loss": 45.3819, "step": 1901 }, { "epoch": 45.286567164179104, "grad_norm": 28.255640029907227, "learning_rate": 9.134920634920635e-06, "loss": 43.9926, "step": 1902 }, { "epoch": 45.31044776119403, "grad_norm": 27.333642959594727, "learning_rate": 9.130952380952382e-06, "loss": 45.7229, "step": 1903 }, { "epoch": 45.33432835820896, "grad_norm": 27.024580001831055, "learning_rate": 9.126984126984127e-06, "loss": 45.24, "step": 1904 }, { "epoch": 45.35820895522388, "grad_norm": 31.131914138793945, "learning_rate": 9.123015873015874e-06, "loss": 44.4842, "step": 1905 }, { "epoch": 45.3820895522388, "grad_norm": 27.244861602783203, "learning_rate": 9.11904761904762e-06, "loss": 45.0392, "step": 1906 }, { "epoch": 45.40597014925373, "grad_norm": 30.606016159057617, "learning_rate": 9.115079365079365e-06, "loss": 44.4968, "step": 1907 }, { "epoch": 45.429850746268656, "grad_norm": 22.56324577331543, "learning_rate": 9.111111111111112e-06, "loss": 45.5149, "step": 1908 }, { "epoch": 45.45373134328358, "grad_norm": 31.586326599121094, "learning_rate": 9.107142857142858e-06, "loss": 45.9413, "step": 1909 }, { "epoch": 45.47761194029851, "grad_norm": 23.143661499023438, "learning_rate": 9.103174603174603e-06, "loss": 44.3301, "step": 1910 }, { "epoch": 45.50149253731343, "grad_norm": 33.158111572265625, "learning_rate": 9.09920634920635e-06, "loss": 45.3503, "step": 1911 }, { "epoch": 45.525373134328355, "grad_norm": 26.259010314941406, "learning_rate": 9.095238095238095e-06, "loss": 44.125, "step": 1912 }, { "epoch": 45.549253731343285, "grad_norm": 25.72600555419922, "learning_rate": 9.091269841269843e-06, "loss": 45.8252, "step": 1913 }, { "epoch": 45.57313432835821, "grad_norm": 29.651403427124023, "learning_rate": 9.087301587301588e-06, "loss": 44.7603, "step": 1914 }, { "epoch": 45.59701492537313, "grad_norm": 24.896892547607422, "learning_rate": 9.083333333333333e-06, "loss": 45.3582, "step": 1915 }, { "epoch": 45.62089552238806, "grad_norm": 26.172271728515625, "learning_rate": 9.07936507936508e-06, "loss": 45.418, "step": 1916 }, { "epoch": 45.644776119402984, "grad_norm": 31.333498001098633, "learning_rate": 9.075396825396826e-06, "loss": 45.5952, "step": 1917 }, { "epoch": 45.668656716417914, "grad_norm": 23.452194213867188, "learning_rate": 9.071428571428573e-06, "loss": 45.8141, "step": 1918 }, { "epoch": 45.69253731343284, "grad_norm": 30.300634384155273, "learning_rate": 9.067460317460318e-06, "loss": 46.1877, "step": 1919 }, { "epoch": 45.71641791044776, "grad_norm": 24.516042709350586, "learning_rate": 9.063492063492064e-06, "loss": 44.0542, "step": 1920 }, { "epoch": 45.74029850746269, "grad_norm": 26.41005516052246, "learning_rate": 9.05952380952381e-06, "loss": 44.2296, "step": 1921 }, { "epoch": 45.76417910447761, "grad_norm": 23.099822998046875, "learning_rate": 9.055555555555556e-06, "loss": 45.2567, "step": 1922 }, { "epoch": 45.788059701492536, "grad_norm": 18.7821044921875, "learning_rate": 9.051587301587302e-06, "loss": 44.5807, "step": 1923 }, { "epoch": 45.811940298507466, "grad_norm": 31.705181121826172, "learning_rate": 9.047619047619049e-06, "loss": 45.1571, "step": 1924 }, { "epoch": 45.83582089552239, "grad_norm": 25.712608337402344, "learning_rate": 9.043650793650794e-06, "loss": 44.9665, "step": 1925 }, { "epoch": 45.85970149253731, "grad_norm": 31.790864944458008, "learning_rate": 9.039682539682541e-06, "loss": 45.6095, "step": 1926 }, { "epoch": 45.88358208955224, "grad_norm": 27.735107421875, "learning_rate": 9.035714285714287e-06, "loss": 45.8988, "step": 1927 }, { "epoch": 45.907462686567165, "grad_norm": 30.94534683227539, "learning_rate": 9.031746031746032e-06, "loss": 45.8302, "step": 1928 }, { "epoch": 45.93134328358209, "grad_norm": 23.146005630493164, "learning_rate": 9.027777777777779e-06, "loss": 45.3911, "step": 1929 }, { "epoch": 45.95522388059702, "grad_norm": 24.59404945373535, "learning_rate": 9.023809523809524e-06, "loss": 45.1403, "step": 1930 }, { "epoch": 45.97910447761194, "grad_norm": 25.62955665588379, "learning_rate": 9.019841269841272e-06, "loss": 44.8934, "step": 1931 }, { "epoch": 46.0, "grad_norm": 20.037391662597656, "learning_rate": 9.015873015873017e-06, "loss": 39.4122, "step": 1932 }, { "epoch": 46.02388059701492, "grad_norm": 25.78251075744629, "learning_rate": 9.011904761904762e-06, "loss": 45.7163, "step": 1933 }, { "epoch": 46.04776119402985, "grad_norm": 28.0667781829834, "learning_rate": 9.00793650793651e-06, "loss": 44.6447, "step": 1934 }, { "epoch": 46.071641791044776, "grad_norm": NaN, "learning_rate": 9.003968253968255e-06, "loss": 61.1269, "step": 1935 }, { "epoch": 46.0955223880597, "grad_norm": NaN, "learning_rate": 9.003968253968255e-06, "loss": 57.8669, "step": 1936 }, { "epoch": 46.11940298507463, "grad_norm": 19.022104263305664, "learning_rate": 9.003968253968255e-06, "loss": 46.0546, "step": 1937 }, { "epoch": 46.14328358208955, "grad_norm": 28.844619750976562, "learning_rate": 9e-06, "loss": 44.5077, "step": 1938 }, { "epoch": 46.167164179104475, "grad_norm": 23.570850372314453, "learning_rate": 8.996031746031747e-06, "loss": 44.8965, "step": 1939 }, { "epoch": 46.191044776119405, "grad_norm": 27.71855354309082, "learning_rate": 8.992063492063493e-06, "loss": 45.3302, "step": 1940 }, { "epoch": 46.21492537313433, "grad_norm": 23.61193084716797, "learning_rate": 8.98809523809524e-06, "loss": 45.4048, "step": 1941 }, { "epoch": 46.23880597014925, "grad_norm": 27.16132926940918, "learning_rate": 8.984126984126985e-06, "loss": 44.535, "step": 1942 }, { "epoch": 46.26268656716418, "grad_norm": 25.254039764404297, "learning_rate": 8.98015873015873e-06, "loss": 45.2944, "step": 1943 }, { "epoch": 46.286567164179104, "grad_norm": 28.196325302124023, "learning_rate": 8.976190476190478e-06, "loss": 44.0106, "step": 1944 }, { "epoch": 46.31044776119403, "grad_norm": 24.75798988342285, "learning_rate": 8.972222222222223e-06, "loss": 44.949, "step": 1945 }, { "epoch": 46.33432835820896, "grad_norm": 30.992849349975586, "learning_rate": 8.968253968253968e-06, "loss": 44.6185, "step": 1946 }, { "epoch": 46.35820895522388, "grad_norm": 28.122825622558594, "learning_rate": 8.964285714285716e-06, "loss": 46.7498, "step": 1947 }, { "epoch": 46.3820895522388, "grad_norm": 25.130678176879883, "learning_rate": 8.960317460317461e-06, "loss": 45.7823, "step": 1948 }, { "epoch": 46.40597014925373, "grad_norm": 26.97332763671875, "learning_rate": 8.956349206349208e-06, "loss": 44.8217, "step": 1949 }, { "epoch": 46.429850746268656, "grad_norm": 21.403100967407227, "learning_rate": 8.952380952380953e-06, "loss": 45.4608, "step": 1950 }, { "epoch": 46.45373134328358, "grad_norm": 30.794330596923828, "learning_rate": 8.948412698412699e-06, "loss": 45.0327, "step": 1951 }, { "epoch": 46.47761194029851, "grad_norm": 26.035839080810547, "learning_rate": 8.944444444444446e-06, "loss": 44.6979, "step": 1952 }, { "epoch": 46.50149253731343, "grad_norm": 21.501266479492188, "learning_rate": 8.940476190476191e-06, "loss": 44.6421, "step": 1953 }, { "epoch": 46.525373134328355, "grad_norm": 27.67610740661621, "learning_rate": 8.936507936507938e-06, "loss": 44.5721, "step": 1954 }, { "epoch": 46.549253731343285, "grad_norm": 24.71251678466797, "learning_rate": 8.932539682539684e-06, "loss": 45.2891, "step": 1955 }, { "epoch": 46.57313432835821, "grad_norm": 32.72700500488281, "learning_rate": 8.92857142857143e-06, "loss": 45.0829, "step": 1956 }, { "epoch": 46.59701492537313, "grad_norm": 26.203643798828125, "learning_rate": 8.924603174603176e-06, "loss": 44.9264, "step": 1957 }, { "epoch": 46.62089552238806, "grad_norm": 25.362638473510742, "learning_rate": 8.920634920634922e-06, "loss": 45.1448, "step": 1958 }, { "epoch": 46.644776119402984, "grad_norm": 25.224456787109375, "learning_rate": 8.916666666666667e-06, "loss": 45.6017, "step": 1959 }, { "epoch": 46.668656716417914, "grad_norm": 29.02377700805664, "learning_rate": 8.912698412698414e-06, "loss": 45.5859, "step": 1960 }, { "epoch": 46.69253731343284, "grad_norm": 25.2493896484375, "learning_rate": 8.90873015873016e-06, "loss": 44.3262, "step": 1961 }, { "epoch": 46.71641791044776, "grad_norm": 24.432043075561523, "learning_rate": 8.904761904761905e-06, "loss": 44.0005, "step": 1962 }, { "epoch": 46.74029850746269, "grad_norm": 23.06245994567871, "learning_rate": 8.90079365079365e-06, "loss": 45.2406, "step": 1963 }, { "epoch": 46.76417910447761, "grad_norm": 27.603015899658203, "learning_rate": 8.896825396825398e-06, "loss": 45.2547, "step": 1964 }, { "epoch": 46.788059701492536, "grad_norm": 26.66181182861328, "learning_rate": 8.892857142857143e-06, "loss": 45.0288, "step": 1965 }, { "epoch": 46.811940298507466, "grad_norm": 19.665678024291992, "learning_rate": 8.888888888888888e-06, "loss": 45.1412, "step": 1966 }, { "epoch": 46.83582089552239, "grad_norm": 31.3046932220459, "learning_rate": 8.884920634920635e-06, "loss": 45.7144, "step": 1967 }, { "epoch": 46.85970149253731, "grad_norm": 24.661293029785156, "learning_rate": 8.88095238095238e-06, "loss": 43.9468, "step": 1968 }, { "epoch": 46.88358208955224, "grad_norm": 25.421525955200195, "learning_rate": 8.876984126984128e-06, "loss": 45.4404, "step": 1969 }, { "epoch": 46.907462686567165, "grad_norm": 30.11313247680664, "learning_rate": 8.873015873015873e-06, "loss": 44.4083, "step": 1970 }, { "epoch": 46.93134328358209, "grad_norm": 24.19677734375, "learning_rate": 8.869047619047619e-06, "loss": 45.5387, "step": 1971 }, { "epoch": 46.95522388059702, "grad_norm": 25.183414459228516, "learning_rate": 8.865079365079366e-06, "loss": 45.2725, "step": 1972 }, { "epoch": 46.97910447761194, "grad_norm": 22.570981979370117, "learning_rate": 8.861111111111111e-06, "loss": 44.1263, "step": 1973 }, { "epoch": 47.0, "grad_norm": 27.16869354248047, "learning_rate": 8.857142857142858e-06, "loss": 39.0382, "step": 1974 }, { "epoch": 47.02388059701492, "grad_norm": 27.326980590820312, "learning_rate": 8.853174603174604e-06, "loss": 45.0956, "step": 1975 }, { "epoch": 47.04776119402985, "grad_norm": 25.321685791015625, "learning_rate": 8.849206349206349e-06, "loss": 45.1531, "step": 1976 }, { "epoch": 47.071641791044776, "grad_norm": 29.480770111083984, "learning_rate": 8.845238095238096e-06, "loss": 44.7925, "step": 1977 }, { "epoch": 47.0955223880597, "grad_norm": 29.82880210876465, "learning_rate": 8.841269841269842e-06, "loss": 45.6435, "step": 1978 }, { "epoch": 47.11940298507463, "grad_norm": 31.852386474609375, "learning_rate": 8.837301587301587e-06, "loss": 45.0481, "step": 1979 }, { "epoch": 47.14328358208955, "grad_norm": 27.80265235900879, "learning_rate": 8.833333333333334e-06, "loss": 44.7472, "step": 1980 }, { "epoch": 47.167164179104475, "grad_norm": NaN, "learning_rate": 8.82936507936508e-06, "loss": 38.8619, "step": 1981 }, { "epoch": 47.191044776119405, "grad_norm": 24.525455474853516, "learning_rate": 8.82936507936508e-06, "loss": 44.8093, "step": 1982 }, { "epoch": 47.21492537313433, "grad_norm": 26.450302124023438, "learning_rate": 8.825396825396827e-06, "loss": 44.7615, "step": 1983 }, { "epoch": 47.23880597014925, "grad_norm": 22.493268966674805, "learning_rate": 8.821428571428572e-06, "loss": 44.5445, "step": 1984 }, { "epoch": 47.26268656716418, "grad_norm": 26.506013870239258, "learning_rate": 8.817460317460317e-06, "loss": 45.4412, "step": 1985 }, { "epoch": 47.286567164179104, "grad_norm": 23.09911346435547, "learning_rate": 8.813492063492064e-06, "loss": 44.8791, "step": 1986 }, { "epoch": 47.31044776119403, "grad_norm": 21.34832191467285, "learning_rate": 8.80952380952381e-06, "loss": 44.8867, "step": 1987 }, { "epoch": 47.33432835820896, "grad_norm": 25.69770050048828, "learning_rate": 8.805555555555557e-06, "loss": 45.0307, "step": 1988 }, { "epoch": 47.35820895522388, "grad_norm": 27.75917625427246, "learning_rate": 8.801587301587302e-06, "loss": 43.7733, "step": 1989 }, { "epoch": 47.3820895522388, "grad_norm": 24.314449310302734, "learning_rate": 8.797619047619048e-06, "loss": 44.8685, "step": 1990 }, { "epoch": 47.40597014925373, "grad_norm": 22.21106719970703, "learning_rate": 8.793650793650795e-06, "loss": 45.2589, "step": 1991 }, { "epoch": 47.429850746268656, "grad_norm": 28.61949920654297, "learning_rate": 8.78968253968254e-06, "loss": 45.7972, "step": 1992 }, { "epoch": 47.45373134328358, "grad_norm": 27.726839065551758, "learning_rate": 8.785714285714286e-06, "loss": 44.0989, "step": 1993 }, { "epoch": 47.47761194029851, "grad_norm": 24.9364013671875, "learning_rate": 8.781746031746033e-06, "loss": 44.9365, "step": 1994 }, { "epoch": 47.50149253731343, "grad_norm": 23.380905151367188, "learning_rate": 8.777777777777778e-06, "loss": 44.9662, "step": 1995 }, { "epoch": 47.525373134328355, "grad_norm": 22.02720832824707, "learning_rate": 8.773809523809525e-06, "loss": 45.1456, "step": 1996 }, { "epoch": 47.549253731343285, "grad_norm": NaN, "learning_rate": 8.76984126984127e-06, "loss": 60.0243, "step": 1997 }, { "epoch": 47.57313432835821, "grad_norm": 21.263904571533203, "learning_rate": 8.76984126984127e-06, "loss": 44.6697, "step": 1998 }, { "epoch": 47.59701492537313, "grad_norm": 25.381332397460938, "learning_rate": 8.765873015873016e-06, "loss": 44.9032, "step": 1999 }, { "epoch": 47.62089552238806, "grad_norm": 24.297027587890625, "learning_rate": 8.761904761904763e-06, "loss": 44.5833, "step": 2000 }, { "epoch": 47.644776119402984, "grad_norm": 26.303585052490234, "learning_rate": 8.757936507936508e-06, "loss": 45.252, "step": 2001 }, { "epoch": 47.668656716417914, "grad_norm": 23.310070037841797, "learning_rate": 8.753968253968254e-06, "loss": 45.0068, "step": 2002 }, { "epoch": 47.69253731343284, "grad_norm": 30.19032859802246, "learning_rate": 8.750000000000001e-06, "loss": 46.1286, "step": 2003 }, { "epoch": 47.71641791044776, "grad_norm": 27.43839454650879, "learning_rate": 8.746031746031746e-06, "loss": 46.5151, "step": 2004 }, { "epoch": 47.74029850746269, "grad_norm": 24.49736976623535, "learning_rate": 8.742063492063493e-06, "loss": 45.2309, "step": 2005 }, { "epoch": 47.76417910447761, "grad_norm": 32.9915885925293, "learning_rate": 8.738095238095239e-06, "loss": 44.221, "step": 2006 }, { "epoch": 47.788059701492536, "grad_norm": 27.080114364624023, "learning_rate": 8.734126984126984e-06, "loss": 44.4515, "step": 2007 }, { "epoch": 47.811940298507466, "grad_norm": 34.84925079345703, "learning_rate": 8.730158730158731e-06, "loss": 44.5223, "step": 2008 }, { "epoch": 47.83582089552239, "grad_norm": 28.061695098876953, "learning_rate": 8.726190476190477e-06, "loss": 45.6776, "step": 2009 }, { "epoch": 47.85970149253731, "grad_norm": 35.316009521484375, "learning_rate": 8.722222222222224e-06, "loss": 45.6784, "step": 2010 }, { "epoch": 47.88358208955224, "grad_norm": 29.395872116088867, "learning_rate": 8.71825396825397e-06, "loss": 46.054, "step": 2011 }, { "epoch": 47.907462686567165, "grad_norm": 31.359512329101562, "learning_rate": 8.714285714285715e-06, "loss": 44.6921, "step": 2012 }, { "epoch": 47.93134328358209, "grad_norm": 24.621870040893555, "learning_rate": 8.710317460317462e-06, "loss": 45.8119, "step": 2013 }, { "epoch": 47.95522388059702, "grad_norm": 30.466150283813477, "learning_rate": 8.706349206349207e-06, "loss": 44.5282, "step": 2014 }, { "epoch": 47.97910447761194, "grad_norm": 29.490886688232422, "learning_rate": 8.702380952380952e-06, "loss": 45.2275, "step": 2015 }, { "epoch": 48.0, "grad_norm": 18.86721420288086, "learning_rate": 8.6984126984127e-06, "loss": 38.1757, "step": 2016 }, { "epoch": 48.02388059701492, "grad_norm": 34.39149856567383, "learning_rate": 8.694444444444445e-06, "loss": 45.4931, "step": 2017 }, { "epoch": 48.04776119402985, "grad_norm": 28.87833023071289, "learning_rate": 8.690476190476192e-06, "loss": 45.3396, "step": 2018 }, { "epoch": 48.071641791044776, "grad_norm": 36.20280838012695, "learning_rate": 8.686507936507937e-06, "loss": 44.7758, "step": 2019 }, { "epoch": 48.0955223880597, "grad_norm": 30.76156234741211, "learning_rate": 8.682539682539683e-06, "loss": 44.2899, "step": 2020 }, { "epoch": 48.11940298507463, "grad_norm": 36.33967208862305, "learning_rate": 8.67857142857143e-06, "loss": 44.6879, "step": 2021 }, { "epoch": 48.14328358208955, "grad_norm": 30.22699737548828, "learning_rate": 8.674603174603175e-06, "loss": 45.8113, "step": 2022 }, { "epoch": 48.167164179104475, "grad_norm": 30.748640060424805, "learning_rate": 8.670634920634922e-06, "loss": 44.048, "step": 2023 }, { "epoch": 48.191044776119405, "grad_norm": 25.484418869018555, "learning_rate": 8.666666666666668e-06, "loss": 44.9645, "step": 2024 }, { "epoch": 48.21492537313433, "grad_norm": 33.34728240966797, "learning_rate": 8.662698412698413e-06, "loss": 44.2533, "step": 2025 }, { "epoch": 48.23880597014925, "grad_norm": 24.65802764892578, "learning_rate": 8.65873015873016e-06, "loss": 45.9453, "step": 2026 }, { "epoch": 48.26268656716418, "grad_norm": 30.4432373046875, "learning_rate": 8.654761904761906e-06, "loss": 45.8027, "step": 2027 }, { "epoch": 48.286567164179104, "grad_norm": 22.55684471130371, "learning_rate": 8.650793650793651e-06, "loss": 45.6855, "step": 2028 }, { "epoch": 48.31044776119403, "grad_norm": 22.167613983154297, "learning_rate": 8.646825396825398e-06, "loss": 44.3946, "step": 2029 }, { "epoch": 48.33432835820896, "grad_norm": 27.42496681213379, "learning_rate": 8.642857142857144e-06, "loss": 45.3506, "step": 2030 }, { "epoch": 48.35820895522388, "grad_norm": 24.647188186645508, "learning_rate": 8.63888888888889e-06, "loss": 44.3746, "step": 2031 }, { "epoch": 48.3820895522388, "grad_norm": 28.068981170654297, "learning_rate": 8.634920634920636e-06, "loss": 44.7821, "step": 2032 }, { "epoch": 48.40597014925373, "grad_norm": 22.093984603881836, "learning_rate": 8.630952380952381e-06, "loss": 43.8444, "step": 2033 }, { "epoch": 48.429850746268656, "grad_norm": 33.278778076171875, "learning_rate": 8.626984126984129e-06, "loss": 44.8849, "step": 2034 }, { "epoch": 48.45373134328358, "grad_norm": 23.357349395751953, "learning_rate": 8.623015873015874e-06, "loss": 44.8346, "step": 2035 }, { "epoch": 48.47761194029851, "grad_norm": 29.543947219848633, "learning_rate": 8.61904761904762e-06, "loss": 45.8072, "step": 2036 }, { "epoch": 48.50149253731343, "grad_norm": 24.81306266784668, "learning_rate": 8.615079365079366e-06, "loss": 43.6868, "step": 2037 }, { "epoch": 48.525373134328355, "grad_norm": 30.09635353088379, "learning_rate": 8.611111111111112e-06, "loss": 45.1631, "step": 2038 }, { "epoch": 48.549253731343285, "grad_norm": 26.751686096191406, "learning_rate": 8.607142857142859e-06, "loss": 44.5276, "step": 2039 }, { "epoch": 48.57313432835821, "grad_norm": 22.96086883544922, "learning_rate": 8.603174603174604e-06, "loss": 45.5322, "step": 2040 }, { "epoch": 48.59701492537313, "grad_norm": 30.90753173828125, "learning_rate": 8.59920634920635e-06, "loss": 44.5476, "step": 2041 }, { "epoch": 48.62089552238806, "grad_norm": 22.072256088256836, "learning_rate": 8.595238095238097e-06, "loss": 45.3412, "step": 2042 }, { "epoch": 48.644776119402984, "grad_norm": 37.27132034301758, "learning_rate": 8.591269841269842e-06, "loss": 43.9968, "step": 2043 }, { "epoch": 48.668656716417914, "grad_norm": 31.473464965820312, "learning_rate": 8.587301587301588e-06, "loss": 46.7003, "step": 2044 }, { "epoch": 48.69253731343284, "grad_norm": 41.3200798034668, "learning_rate": 8.583333333333333e-06, "loss": 44.9254, "step": 2045 }, { "epoch": 48.71641791044776, "grad_norm": 28.326889038085938, "learning_rate": 8.57936507936508e-06, "loss": 45.4611, "step": 2046 }, { "epoch": 48.74029850746269, "grad_norm": 42.016624450683594, "learning_rate": 8.575396825396826e-06, "loss": 45.9752, "step": 2047 }, { "epoch": 48.76417910447761, "grad_norm": 39.264827728271484, "learning_rate": 8.571428571428571e-06, "loss": 45.9133, "step": 2048 }, { "epoch": 48.788059701492536, "grad_norm": 36.876461029052734, "learning_rate": 8.567460317460318e-06, "loss": 44.052, "step": 2049 }, { "epoch": 48.811940298507466, "grad_norm": 33.36867141723633, "learning_rate": 8.563492063492063e-06, "loss": 44.8014, "step": 2050 }, { "epoch": 48.83582089552239, "grad_norm": 33.16298294067383, "learning_rate": 8.55952380952381e-06, "loss": 44.005, "step": 2051 }, { "epoch": 48.85970149253731, "grad_norm": 32.4409065246582, "learning_rate": 8.555555555555556e-06, "loss": 44.2993, "step": 2052 }, { "epoch": 48.88358208955224, "grad_norm": 32.56459426879883, "learning_rate": 8.551587301587301e-06, "loss": 45.2025, "step": 2053 }, { "epoch": 48.907462686567165, "grad_norm": 30.31665802001953, "learning_rate": 8.547619047619048e-06, "loss": 43.8506, "step": 2054 }, { "epoch": 48.93134328358209, "grad_norm": 29.07672119140625, "learning_rate": 8.543650793650794e-06, "loss": 44.2567, "step": 2055 }, { "epoch": 48.95522388059702, "grad_norm": 24.603849411010742, "learning_rate": 8.53968253968254e-06, "loss": 44.5072, "step": 2056 }, { "epoch": 48.97910447761194, "grad_norm": 26.305355072021484, "learning_rate": 8.535714285714286e-06, "loss": 45.2023, "step": 2057 }, { "epoch": 49.0, "grad_norm": 20.483905792236328, "learning_rate": 8.531746031746032e-06, "loss": 38.3416, "step": 2058 }, { "epoch": 49.02388059701492, "grad_norm": 18.845535278320312, "learning_rate": 8.527777777777779e-06, "loss": 44.0003, "step": 2059 }, { "epoch": 49.04776119402985, "grad_norm": 20.018390655517578, "learning_rate": 8.523809523809524e-06, "loss": 45.5951, "step": 2060 }, { "epoch": 49.071641791044776, "grad_norm": 18.276540756225586, "learning_rate": 8.51984126984127e-06, "loss": 45.4302, "step": 2061 }, { "epoch": 49.0955223880597, "grad_norm": 18.592966079711914, "learning_rate": 8.515873015873017e-06, "loss": 44.9415, "step": 2062 }, { "epoch": 49.11940298507463, "grad_norm": NaN, "learning_rate": 8.511904761904762e-06, "loss": 77.195, "step": 2063 }, { "epoch": 49.14328358208955, "grad_norm": 23.695045471191406, "learning_rate": 8.511904761904762e-06, "loss": 45.1853, "step": 2064 }, { "epoch": 49.167164179104475, "grad_norm": 16.90850830078125, "learning_rate": 8.507936507936509e-06, "loss": 44.0122, "step": 2065 }, { "epoch": 49.191044776119405, "grad_norm": 30.50786781311035, "learning_rate": 8.503968253968255e-06, "loss": 44.8398, "step": 2066 }, { "epoch": 49.21492537313433, "grad_norm": 24.35599136352539, "learning_rate": 8.5e-06, "loss": 43.4544, "step": 2067 }, { "epoch": 49.23880597014925, "grad_norm": 29.541887283325195, "learning_rate": 8.496031746031747e-06, "loss": 45.1471, "step": 2068 }, { "epoch": 49.26268656716418, "grad_norm": 20.277528762817383, "learning_rate": 8.492063492063492e-06, "loss": 45.1862, "step": 2069 }, { "epoch": 49.286567164179104, "grad_norm": 33.5463752746582, "learning_rate": 8.488095238095238e-06, "loss": 43.5467, "step": 2070 }, { "epoch": 49.31044776119403, "grad_norm": 23.218936920166016, "learning_rate": 8.484126984126985e-06, "loss": 44.6577, "step": 2071 }, { "epoch": 49.33432835820896, "grad_norm": 36.53571701049805, "learning_rate": 8.48015873015873e-06, "loss": 46.4774, "step": 2072 }, { "epoch": 49.35820895522388, "grad_norm": 32.15842819213867, "learning_rate": 8.476190476190477e-06, "loss": 45.3236, "step": 2073 }, { "epoch": 49.3820895522388, "grad_norm": 29.57740020751953, "learning_rate": 8.472222222222223e-06, "loss": 44.7034, "step": 2074 }, { "epoch": 49.40597014925373, "grad_norm": 28.12784194946289, "learning_rate": 8.468253968253968e-06, "loss": 43.741, "step": 2075 }, { "epoch": 49.429850746268656, "grad_norm": 28.08392906188965, "learning_rate": 8.464285714285715e-06, "loss": 45.326, "step": 2076 }, { "epoch": 49.45373134328358, "grad_norm": 24.909330368041992, "learning_rate": 8.46031746031746e-06, "loss": 45.979, "step": 2077 }, { "epoch": 49.47761194029851, "grad_norm": 26.343902587890625, "learning_rate": 8.456349206349208e-06, "loss": 44.1665, "step": 2078 }, { "epoch": 49.50149253731343, "grad_norm": 30.070533752441406, "learning_rate": 8.452380952380953e-06, "loss": 45.1331, "step": 2079 }, { "epoch": 49.525373134328355, "grad_norm": 26.733827590942383, "learning_rate": 8.448412698412699e-06, "loss": 43.9576, "step": 2080 }, { "epoch": 49.549253731343285, "grad_norm": 31.43610191345215, "learning_rate": 8.444444444444446e-06, "loss": 44.3933, "step": 2081 }, { "epoch": 49.57313432835821, "grad_norm": 24.856496810913086, "learning_rate": 8.440476190476191e-06, "loss": 44.561, "step": 2082 }, { "epoch": 49.59701492537313, "grad_norm": 30.097368240356445, "learning_rate": 8.436507936507936e-06, "loss": 44.617, "step": 2083 }, { "epoch": 49.62089552238806, "grad_norm": 26.63928985595703, "learning_rate": 8.432539682539684e-06, "loss": 45.1091, "step": 2084 }, { "epoch": 49.644776119402984, "grad_norm": 33.428932189941406, "learning_rate": 8.428571428571429e-06, "loss": 45.8576, "step": 2085 }, { "epoch": 49.668656716417914, "grad_norm": 26.33061408996582, "learning_rate": 8.424603174603176e-06, "loss": 46.6266, "step": 2086 }, { "epoch": 49.69253731343284, "grad_norm": 35.67467498779297, "learning_rate": 8.420634920634921e-06, "loss": 43.8886, "step": 2087 }, { "epoch": 49.71641791044776, "grad_norm": 33.62556076049805, "learning_rate": 8.416666666666667e-06, "loss": 44.819, "step": 2088 }, { "epoch": 49.74029850746269, "grad_norm": 29.146684646606445, "learning_rate": 8.412698412698414e-06, "loss": 45.1877, "step": 2089 }, { "epoch": 49.76417910447761, "grad_norm": 29.51055335998535, "learning_rate": 8.40873015873016e-06, "loss": 44.9054, "step": 2090 }, { "epoch": 49.788059701492536, "grad_norm": 31.709413528442383, "learning_rate": 8.404761904761905e-06, "loss": 44.8456, "step": 2091 }, { "epoch": 49.811940298507466, "grad_norm": 26.646390914916992, "learning_rate": 8.400793650793652e-06, "loss": 44.1815, "step": 2092 }, { "epoch": 49.83582089552239, "grad_norm": 35.582496643066406, "learning_rate": 8.396825396825397e-06, "loss": 44.9951, "step": 2093 }, { "epoch": 49.85970149253731, "grad_norm": 25.587371826171875, "learning_rate": 8.392857142857144e-06, "loss": 44.3349, "step": 2094 }, { "epoch": 49.88358208955224, "grad_norm": 29.13399887084961, "learning_rate": 8.38888888888889e-06, "loss": 45.28, "step": 2095 }, { "epoch": 49.907462686567165, "grad_norm": 21.462890625, "learning_rate": 8.384920634920635e-06, "loss": 44.4383, "step": 2096 }, { "epoch": 49.93134328358209, "grad_norm": 31.970626831054688, "learning_rate": 8.380952380952382e-06, "loss": 45.989, "step": 2097 }, { "epoch": 49.95522388059702, "grad_norm": 21.948705673217773, "learning_rate": 8.376984126984128e-06, "loss": 44.0871, "step": 2098 }, { "epoch": 49.97910447761194, "grad_norm": 35.07805252075195, "learning_rate": 8.373015873015875e-06, "loss": 44.709, "step": 2099 }, { "epoch": 50.0, "grad_norm": 21.554956436157227, "learning_rate": 8.36904761904762e-06, "loss": 38.6725, "step": 2100 }, { "epoch": 50.02388059701492, "grad_norm": 35.4162712097168, "learning_rate": 8.365079365079365e-06, "loss": 44.2866, "step": 2101 }, { "epoch": 50.04776119402985, "grad_norm": 31.357215881347656, "learning_rate": 8.361111111111113e-06, "loss": 44.9399, "step": 2102 }, { "epoch": 50.071641791044776, "grad_norm": 28.055850982666016, "learning_rate": 8.357142857142858e-06, "loss": 44.2145, "step": 2103 }, { "epoch": 50.0955223880597, "grad_norm": 27.62700080871582, "learning_rate": 8.353174603174603e-06, "loss": 44.715, "step": 2104 }, { "epoch": 50.11940298507463, "grad_norm": 32.586219787597656, "learning_rate": 8.34920634920635e-06, "loss": 45.6174, "step": 2105 }, { "epoch": 50.14328358208955, "grad_norm": 24.922584533691406, "learning_rate": 8.345238095238096e-06, "loss": 46.0653, "step": 2106 }, { "epoch": 50.167164179104475, "grad_norm": 29.282079696655273, "learning_rate": 8.341269841269843e-06, "loss": 44.8826, "step": 2107 }, { "epoch": 50.191044776119405, "grad_norm": 25.85003089904785, "learning_rate": 8.337301587301588e-06, "loss": 43.7337, "step": 2108 }, { "epoch": 50.21492537313433, "grad_norm": 26.331398010253906, "learning_rate": 8.333333333333334e-06, "loss": 44.9624, "step": 2109 }, { "epoch": 50.23880597014925, "grad_norm": 19.595951080322266, "learning_rate": 8.32936507936508e-06, "loss": 45.0561, "step": 2110 }, { "epoch": 50.26268656716418, "grad_norm": 18.431438446044922, "learning_rate": 8.325396825396826e-06, "loss": 44.6963, "step": 2111 }, { "epoch": 50.286567164179104, "grad_norm": 20.670730590820312, "learning_rate": 8.321428571428573e-06, "loss": 44.6057, "step": 2112 }, { "epoch": 50.31044776119403, "grad_norm": 20.497106552124023, "learning_rate": 8.317460317460319e-06, "loss": 45.6219, "step": 2113 }, { "epoch": 50.33432835820896, "grad_norm": 21.33808708190918, "learning_rate": 8.313492063492064e-06, "loss": 43.6802, "step": 2114 }, { "epoch": 50.35820895522388, "grad_norm": 17.015180587768555, "learning_rate": 8.309523809523811e-06, "loss": 45.6156, "step": 2115 }, { "epoch": 50.3820895522388, "grad_norm": 25.82108497619629, "learning_rate": 8.305555555555557e-06, "loss": 45.529, "step": 2116 }, { "epoch": 50.40597014925373, "grad_norm": 20.37699317932129, "learning_rate": 8.301587301587302e-06, "loss": 44.4007, "step": 2117 }, { "epoch": 50.429850746268656, "grad_norm": 24.1844482421875, "learning_rate": 8.297619047619049e-06, "loss": 45.0155, "step": 2118 }, { "epoch": 50.45373134328358, "grad_norm": 21.229581832885742, "learning_rate": 8.293650793650794e-06, "loss": 44.8109, "step": 2119 }, { "epoch": 50.47761194029851, "grad_norm": 23.752500534057617, "learning_rate": 8.289682539682542e-06, "loss": 45.1129, "step": 2120 }, { "epoch": 50.50149253731343, "grad_norm": 19.724092483520508, "learning_rate": 8.285714285714287e-06, "loss": 44.1519, "step": 2121 }, { "epoch": 50.525373134328355, "grad_norm": 21.154827117919922, "learning_rate": 8.281746031746032e-06, "loss": 43.8136, "step": 2122 }, { "epoch": 50.549253731343285, "grad_norm": 21.17751121520996, "learning_rate": 8.277777777777778e-06, "loss": 44.7593, "step": 2123 }, { "epoch": 50.57313432835821, "grad_norm": 24.729738235473633, "learning_rate": 8.273809523809523e-06, "loss": 44.7794, "step": 2124 }, { "epoch": 50.59701492537313, "grad_norm": 18.432241439819336, "learning_rate": 8.26984126984127e-06, "loss": 44.0237, "step": 2125 }, { "epoch": 50.62089552238806, "grad_norm": 26.357515335083008, "learning_rate": 8.265873015873016e-06, "loss": 45.2566, "step": 2126 }, { "epoch": 50.644776119402984, "grad_norm": 24.270259857177734, "learning_rate": 8.261904761904763e-06, "loss": 44.1182, "step": 2127 }, { "epoch": 50.668656716417914, "grad_norm": 20.756067276000977, "learning_rate": 8.257936507936508e-06, "loss": 46.2374, "step": 2128 }, { "epoch": 50.69253731343284, "grad_norm": 23.159393310546875, "learning_rate": 8.253968253968254e-06, "loss": 44.1878, "step": 2129 }, { "epoch": 50.71641791044776, "grad_norm": 22.44221305847168, "learning_rate": 8.25e-06, "loss": 45.3746, "step": 2130 }, { "epoch": 50.74029850746269, "grad_norm": 20.27827262878418, "learning_rate": 8.246031746031746e-06, "loss": 44.1278, "step": 2131 }, { "epoch": 50.76417910447761, "grad_norm": 21.407669067382812, "learning_rate": 8.242063492063493e-06, "loss": 44.8487, "step": 2132 }, { "epoch": 50.788059701492536, "grad_norm": 24.570688247680664, "learning_rate": 8.238095238095239e-06, "loss": 44.2913, "step": 2133 }, { "epoch": 50.811940298507466, "grad_norm": 23.73247528076172, "learning_rate": 8.234126984126984e-06, "loss": 45.4539, "step": 2134 }, { "epoch": 50.83582089552239, "grad_norm": 20.265886306762695, "learning_rate": 8.230158730158731e-06, "loss": 43.1901, "step": 2135 }, { "epoch": 50.85970149253731, "grad_norm": 16.51488494873047, "learning_rate": 8.226190476190476e-06, "loss": 45.0321, "step": 2136 }, { "epoch": 50.88358208955224, "grad_norm": 19.107425689697266, "learning_rate": 8.222222222222222e-06, "loss": 44.3746, "step": 2137 }, { "epoch": 50.907462686567165, "grad_norm": 19.300790786743164, "learning_rate": 8.218253968253969e-06, "loss": 45.1466, "step": 2138 }, { "epoch": 50.93134328358209, "grad_norm": 19.817272186279297, "learning_rate": 8.214285714285714e-06, "loss": 44.9703, "step": 2139 }, { "epoch": 50.95522388059702, "grad_norm": 22.794174194335938, "learning_rate": 8.210317460317461e-06, "loss": 43.917, "step": 2140 }, { "epoch": 50.97910447761194, "grad_norm": 18.948871612548828, "learning_rate": 8.206349206349207e-06, "loss": 44.4099, "step": 2141 }, { "epoch": 51.0, "grad_norm": 13.966577529907227, "learning_rate": 8.202380952380952e-06, "loss": 38.9733, "step": 2142 }, { "epoch": 51.02388059701492, "grad_norm": 29.5616397857666, "learning_rate": 8.1984126984127e-06, "loss": 44.8355, "step": 2143 }, { "epoch": 51.04776119402985, "grad_norm": 22.391014099121094, "learning_rate": 8.194444444444445e-06, "loss": 44.6835, "step": 2144 }, { "epoch": 51.071641791044776, "grad_norm": 28.830854415893555, "learning_rate": 8.190476190476192e-06, "loss": 43.3011, "step": 2145 }, { "epoch": 51.0955223880597, "grad_norm": 21.114011764526367, "learning_rate": 8.186507936507937e-06, "loss": 44.4223, "step": 2146 }, { "epoch": 51.11940298507463, "grad_norm": 28.902416229248047, "learning_rate": 8.182539682539683e-06, "loss": 44.0485, "step": 2147 }, { "epoch": 51.14328358208955, "grad_norm": 21.923168182373047, "learning_rate": 8.17857142857143e-06, "loss": 45.3272, "step": 2148 }, { "epoch": 51.167164179104475, "grad_norm": 28.772884368896484, "learning_rate": 8.174603174603175e-06, "loss": 45.6205, "step": 2149 }, { "epoch": 51.191044776119405, "grad_norm": 23.949098587036133, "learning_rate": 8.17063492063492e-06, "loss": 45.0204, "step": 2150 }, { "epoch": 51.21492537313433, "grad_norm": 26.735624313354492, "learning_rate": 8.166666666666668e-06, "loss": 45.6338, "step": 2151 }, { "epoch": 51.23880597014925, "grad_norm": 28.049888610839844, "learning_rate": 8.162698412698413e-06, "loss": 44.2502, "step": 2152 }, { "epoch": 51.26268656716418, "grad_norm": 23.256439208984375, "learning_rate": 8.15873015873016e-06, "loss": 44.1981, "step": 2153 }, { "epoch": 51.286567164179104, "grad_norm": 32.3640022277832, "learning_rate": 8.154761904761905e-06, "loss": 43.6928, "step": 2154 }, { "epoch": 51.31044776119403, "grad_norm": 23.900907516479492, "learning_rate": 8.15079365079365e-06, "loss": 45.3594, "step": 2155 }, { "epoch": 51.33432835820896, "grad_norm": 39.41314697265625, "learning_rate": 8.146825396825398e-06, "loss": 44.5862, "step": 2156 }, { "epoch": 51.35820895522388, "grad_norm": 31.826566696166992, "learning_rate": 8.142857142857143e-06, "loss": 44.6213, "step": 2157 }, { "epoch": 51.3820895522388, "grad_norm": 35.3351936340332, "learning_rate": 8.138888888888889e-06, "loss": 44.9952, "step": 2158 }, { "epoch": 51.40597014925373, "grad_norm": 33.0169677734375, "learning_rate": 8.134920634920636e-06, "loss": 44.7576, "step": 2159 }, { "epoch": 51.429850746268656, "grad_norm": 32.347251892089844, "learning_rate": 8.130952380952381e-06, "loss": 45.0997, "step": 2160 }, { "epoch": 51.45373134328358, "grad_norm": 25.79857635498047, "learning_rate": 8.126984126984128e-06, "loss": 45.8578, "step": 2161 }, { "epoch": 51.47761194029851, "grad_norm": 33.378108978271484, "learning_rate": 8.123015873015874e-06, "loss": 44.6084, "step": 2162 }, { "epoch": 51.50149253731343, "grad_norm": 27.625028610229492, "learning_rate": 8.119047619047619e-06, "loss": 45.1928, "step": 2163 }, { "epoch": 51.525373134328355, "grad_norm": 32.47718811035156, "learning_rate": 8.115079365079366e-06, "loss": 44.38, "step": 2164 }, { "epoch": 51.549253731343285, "grad_norm": 31.10133934020996, "learning_rate": 8.111111111111112e-06, "loss": 44.1878, "step": 2165 }, { "epoch": 51.57313432835821, "grad_norm": 33.062007904052734, "learning_rate": 8.107142857142859e-06, "loss": 44.6587, "step": 2166 }, { "epoch": 51.59701492537313, "grad_norm": 31.35774803161621, "learning_rate": 8.103174603174604e-06, "loss": 44.0408, "step": 2167 }, { "epoch": 51.62089552238806, "grad_norm": 35.262237548828125, "learning_rate": 8.09920634920635e-06, "loss": 45.3717, "step": 2168 }, { "epoch": 51.644776119402984, "grad_norm": 32.77524948120117, "learning_rate": 8.095238095238097e-06, "loss": 44.8105, "step": 2169 }, { "epoch": 51.668656716417914, "grad_norm": 28.838821411132812, "learning_rate": 8.091269841269842e-06, "loss": 44.3364, "step": 2170 }, { "epoch": 51.69253731343284, "grad_norm": 26.18807029724121, "learning_rate": 8.087301587301587e-06, "loss": 44.5054, "step": 2171 }, { "epoch": 51.71641791044776, "grad_norm": 31.639286041259766, "learning_rate": 8.083333333333334e-06, "loss": 45.4023, "step": 2172 }, { "epoch": 51.74029850746269, "grad_norm": 27.998628616333008, "learning_rate": 8.07936507936508e-06, "loss": 44.8306, "step": 2173 }, { "epoch": 51.76417910447761, "grad_norm": 30.69230079650879, "learning_rate": 8.075396825396827e-06, "loss": 45.1802, "step": 2174 }, { "epoch": 51.788059701492536, "grad_norm": 23.640962600708008, "learning_rate": 8.071428571428572e-06, "loss": 43.7667, "step": 2175 }, { "epoch": 51.811940298507466, "grad_norm": 29.017114639282227, "learning_rate": 8.067460317460318e-06, "loss": 43.9821, "step": 2176 }, { "epoch": 51.83582089552239, "grad_norm": 21.79175567626953, "learning_rate": 8.063492063492065e-06, "loss": 45.0959, "step": 2177 }, { "epoch": 51.85970149253731, "grad_norm": 25.505756378173828, "learning_rate": 8.05952380952381e-06, "loss": 44.1622, "step": 2178 }, { "epoch": 51.88358208955224, "grad_norm": 19.43979263305664, "learning_rate": 8.055555555555557e-06, "loss": 43.4959, "step": 2179 }, { "epoch": 51.907462686567165, "grad_norm": 32.855037689208984, "learning_rate": 8.051587301587303e-06, "loss": 44.3206, "step": 2180 }, { "epoch": 51.93134328358209, "grad_norm": 23.80797576904297, "learning_rate": 8.047619047619048e-06, "loss": 43.6716, "step": 2181 }, { "epoch": 51.95522388059702, "grad_norm": 37.09321594238281, "learning_rate": 8.043650793650795e-06, "loss": 45.3091, "step": 2182 }, { "epoch": 51.97910447761194, "grad_norm": 25.76487922668457, "learning_rate": 8.03968253968254e-06, "loss": 44.5829, "step": 2183 }, { "epoch": 52.0, "grad_norm": 24.34773063659668, "learning_rate": 8.035714285714286e-06, "loss": 39.637, "step": 2184 }, { "epoch": 52.02388059701492, "grad_norm": 24.28459358215332, "learning_rate": 8.031746031746033e-06, "loss": 42.8823, "step": 2185 }, { "epoch": 52.04776119402985, "grad_norm": 31.015172958374023, "learning_rate": 8.027777777777778e-06, "loss": 43.6859, "step": 2186 }, { "epoch": 52.071641791044776, "grad_norm": 27.413232803344727, "learning_rate": 8.023809523809526e-06, "loss": 44.0734, "step": 2187 }, { "epoch": 52.0955223880597, "grad_norm": 34.3042106628418, "learning_rate": 8.019841269841271e-06, "loss": 44.4303, "step": 2188 }, { "epoch": 52.11940298507463, "grad_norm": 25.737226486206055, "learning_rate": 8.015873015873016e-06, "loss": 45.6858, "step": 2189 }, { "epoch": 52.14328358208955, "grad_norm": 33.09044647216797, "learning_rate": 8.011904761904763e-06, "loss": 44.0591, "step": 2190 }, { "epoch": 52.167164179104475, "grad_norm": 26.903594970703125, "learning_rate": 8.007936507936509e-06, "loss": 44.4434, "step": 2191 }, { "epoch": 52.191044776119405, "grad_norm": 32.05507278442383, "learning_rate": 8.003968253968254e-06, "loss": 44.1334, "step": 2192 }, { "epoch": 52.21492537313433, "grad_norm": 23.954050064086914, "learning_rate": 8.000000000000001e-06, "loss": 45.4077, "step": 2193 }, { "epoch": 52.23880597014925, "grad_norm": 25.273069381713867, "learning_rate": 7.996031746031747e-06, "loss": 44.4704, "step": 2194 }, { "epoch": 52.26268656716418, "grad_norm": 24.762975692749023, "learning_rate": 7.992063492063494e-06, "loss": 44.9846, "step": 2195 }, { "epoch": 52.286567164179104, "grad_norm": 31.624853134155273, "learning_rate": 7.98809523809524e-06, "loss": 44.6678, "step": 2196 }, { "epoch": 52.31044776119403, "grad_norm": 20.407798767089844, "learning_rate": 7.984126984126985e-06, "loss": 44.5191, "step": 2197 }, { "epoch": 52.33432835820896, "grad_norm": 35.610721588134766, "learning_rate": 7.980158730158732e-06, "loss": 43.797, "step": 2198 }, { "epoch": 52.35820895522388, "grad_norm": 23.916271209716797, "learning_rate": 7.976190476190477e-06, "loss": 44.5035, "step": 2199 }, { "epoch": 52.3820895522388, "grad_norm": 30.07246971130371, "learning_rate": 7.972222222222224e-06, "loss": 44.8658, "step": 2200 }, { "epoch": 52.40597014925373, "grad_norm": 26.69670295715332, "learning_rate": 7.968253968253968e-06, "loss": 43.1086, "step": 2201 }, { "epoch": 52.429850746268656, "grad_norm": 35.99201583862305, "learning_rate": 7.964285714285715e-06, "loss": 43.8965, "step": 2202 }, { "epoch": 52.45373134328358, "grad_norm": 26.909433364868164, "learning_rate": 7.96031746031746e-06, "loss": 44.3023, "step": 2203 }, { "epoch": 52.47761194029851, "grad_norm": 31.2402286529541, "learning_rate": 7.956349206349206e-06, "loss": 43.8009, "step": 2204 }, { "epoch": 52.50149253731343, "grad_norm": 28.230714797973633, "learning_rate": 7.952380952380953e-06, "loss": 45.6781, "step": 2205 }, { "epoch": 52.525373134328355, "grad_norm": 32.47516632080078, "learning_rate": 7.948412698412698e-06, "loss": 46.0123, "step": 2206 }, { "epoch": 52.549253731343285, "grad_norm": 29.042253494262695, "learning_rate": 7.944444444444445e-06, "loss": 46.4036, "step": 2207 }, { "epoch": 52.57313432835821, "grad_norm": 24.23044776916504, "learning_rate": 7.94047619047619e-06, "loss": 44.0722, "step": 2208 }, { "epoch": 52.59701492537313, "grad_norm": 25.844972610473633, "learning_rate": 7.936507936507936e-06, "loss": 44.1403, "step": 2209 }, { "epoch": 52.62089552238806, "grad_norm": 25.40447235107422, "learning_rate": 7.932539682539683e-06, "loss": 43.699, "step": 2210 }, { "epoch": 52.644776119402984, "grad_norm": 24.027687072753906, "learning_rate": 7.928571428571429e-06, "loss": 45.1803, "step": 2211 }, { "epoch": 52.668656716417914, "grad_norm": 22.707393646240234, "learning_rate": 7.924603174603174e-06, "loss": 43.7808, "step": 2212 }, { "epoch": 52.69253731343284, "grad_norm": 17.410104751586914, "learning_rate": 7.920634920634921e-06, "loss": 44.7556, "step": 2213 }, { "epoch": 52.71641791044776, "grad_norm": 19.376863479614258, "learning_rate": 7.916666666666667e-06, "loss": 45.3176, "step": 2214 }, { "epoch": 52.74029850746269, "grad_norm": 21.29641342163086, "learning_rate": 7.912698412698414e-06, "loss": 44.8597, "step": 2215 }, { "epoch": 52.76417910447761, "grad_norm": 21.937013626098633, "learning_rate": 7.908730158730159e-06, "loss": 44.3548, "step": 2216 }, { "epoch": 52.788059701492536, "grad_norm": 27.38592529296875, "learning_rate": 7.904761904761904e-06, "loss": 45.204, "step": 2217 }, { "epoch": 52.811940298507466, "grad_norm": 21.232566833496094, "learning_rate": 7.900793650793652e-06, "loss": 43.9788, "step": 2218 }, { "epoch": 52.83582089552239, "grad_norm": 22.52651023864746, "learning_rate": 7.896825396825397e-06, "loss": 44.161, "step": 2219 }, { "epoch": 52.85970149253731, "grad_norm": 23.06977081298828, "learning_rate": 7.892857142857144e-06, "loss": 44.5394, "step": 2220 }, { "epoch": 52.88358208955224, "grad_norm": 19.71670150756836, "learning_rate": 7.88888888888889e-06, "loss": 44.4384, "step": 2221 }, { "epoch": 52.907462686567165, "grad_norm": 19.651142120361328, "learning_rate": 7.884920634920635e-06, "loss": 45.3143, "step": 2222 }, { "epoch": 52.93134328358209, "grad_norm": 23.386962890625, "learning_rate": 7.880952380952382e-06, "loss": 44.4246, "step": 2223 }, { "epoch": 52.95522388059702, "grad_norm": 19.763513565063477, "learning_rate": 7.876984126984127e-06, "loss": 45.6001, "step": 2224 }, { "epoch": 52.97910447761194, "grad_norm": 21.81069564819336, "learning_rate": 7.873015873015873e-06, "loss": 45.6176, "step": 2225 }, { "epoch": 53.0, "grad_norm": 18.15079689025879, "learning_rate": 7.86904761904762e-06, "loss": 39.4819, "step": 2226 }, { "epoch": 53.02388059701492, "grad_norm": 17.333694458007812, "learning_rate": 7.865079365079365e-06, "loss": 45.612, "step": 2227 }, { "epoch": 53.04776119402985, "grad_norm": 18.72818946838379, "learning_rate": 7.861111111111112e-06, "loss": 43.5423, "step": 2228 }, { "epoch": 53.071641791044776, "grad_norm": 18.34732437133789, "learning_rate": 7.857142857142858e-06, "loss": 43.972, "step": 2229 }, { "epoch": 53.0955223880597, "grad_norm": 21.568077087402344, "learning_rate": 7.853174603174603e-06, "loss": 44.8122, "step": 2230 }, { "epoch": 53.11940298507463, "grad_norm": 20.801836013793945, "learning_rate": 7.84920634920635e-06, "loss": 43.8729, "step": 2231 }, { "epoch": 53.14328358208955, "grad_norm": 23.20212745666504, "learning_rate": 7.845238095238096e-06, "loss": 45.3738, "step": 2232 }, { "epoch": 53.167164179104475, "grad_norm": 24.016311645507812, "learning_rate": 7.841269841269843e-06, "loss": 45.0862, "step": 2233 }, { "epoch": 53.191044776119405, "grad_norm": 18.800554275512695, "learning_rate": 7.837301587301588e-06, "loss": 43.2166, "step": 2234 }, { "epoch": 53.21492537313433, "grad_norm": 20.73765754699707, "learning_rate": 7.833333333333333e-06, "loss": 43.3917, "step": 2235 }, { "epoch": 53.23880597014925, "grad_norm": 23.1943302154541, "learning_rate": 7.82936507936508e-06, "loss": 44.0957, "step": 2236 }, { "epoch": 53.26268656716418, "grad_norm": 18.450380325317383, "learning_rate": 7.825396825396826e-06, "loss": 44.1782, "step": 2237 }, { "epoch": 53.286567164179104, "grad_norm": 24.16314697265625, "learning_rate": 7.821428571428571e-06, "loss": 45.0735, "step": 2238 }, { "epoch": 53.31044776119403, "grad_norm": 25.004743576049805, "learning_rate": 7.817460317460318e-06, "loss": 44.4628, "step": 2239 }, { "epoch": 53.33432835820896, "grad_norm": 17.29636573791504, "learning_rate": 7.813492063492064e-06, "loss": 45.2476, "step": 2240 }, { "epoch": 53.35820895522388, "grad_norm": 24.759471893310547, "learning_rate": 7.809523809523811e-06, "loss": 44.71, "step": 2241 }, { "epoch": 53.3820895522388, "grad_norm": 21.52720832824707, "learning_rate": 7.805555555555556e-06, "loss": 44.9929, "step": 2242 }, { "epoch": 53.40597014925373, "grad_norm": 22.760278701782227, "learning_rate": 7.801587301587302e-06, "loss": 43.6639, "step": 2243 }, { "epoch": 53.429850746268656, "grad_norm": 19.5325927734375, "learning_rate": 7.797619047619049e-06, "loss": 44.2974, "step": 2244 }, { "epoch": 53.45373134328358, "grad_norm": 25.756797790527344, "learning_rate": 7.793650793650794e-06, "loss": 45.401, "step": 2245 }, { "epoch": 53.47761194029851, "grad_norm": 19.75324058532715, "learning_rate": 7.78968253968254e-06, "loss": 44.6426, "step": 2246 }, { "epoch": 53.50149253731343, "grad_norm": 25.47930145263672, "learning_rate": 7.785714285714287e-06, "loss": 42.2875, "step": 2247 }, { "epoch": 53.525373134328355, "grad_norm": 21.61121368408203, "learning_rate": 7.781746031746032e-06, "loss": 45.7982, "step": 2248 }, { "epoch": 53.549253731343285, "grad_norm": 24.11342430114746, "learning_rate": 7.77777777777778e-06, "loss": 43.6397, "step": 2249 }, { "epoch": 53.57313432835821, "grad_norm": 25.151281356811523, "learning_rate": 7.773809523809525e-06, "loss": 44.0536, "step": 2250 }, { "epoch": 53.59701492537313, "grad_norm": 21.925559997558594, "learning_rate": 7.76984126984127e-06, "loss": 45.2035, "step": 2251 }, { "epoch": 53.62089552238806, "grad_norm": 22.38170623779297, "learning_rate": 7.765873015873017e-06, "loss": 44.3272, "step": 2252 }, { "epoch": 53.644776119402984, "grad_norm": 24.35360336303711, "learning_rate": 7.761904761904762e-06, "loss": 45.687, "step": 2253 }, { "epoch": 53.668656716417914, "grad_norm": 20.127119064331055, "learning_rate": 7.75793650793651e-06, "loss": 44.001, "step": 2254 }, { "epoch": 53.69253731343284, "grad_norm": 20.66204833984375, "learning_rate": 7.753968253968255e-06, "loss": 45.1368, "step": 2255 }, { "epoch": 53.71641791044776, "grad_norm": 22.565038681030273, "learning_rate": 7.75e-06, "loss": 43.7021, "step": 2256 }, { "epoch": 53.74029850746269, "grad_norm": 20.893674850463867, "learning_rate": 7.746031746031747e-06, "loss": 44.381, "step": 2257 }, { "epoch": 53.76417910447761, "grad_norm": 21.53620147705078, "learning_rate": 7.742063492063493e-06, "loss": 45.2511, "step": 2258 }, { "epoch": 53.788059701492536, "grad_norm": 20.66484832763672, "learning_rate": 7.738095238095238e-06, "loss": 45.167, "step": 2259 }, { "epoch": 53.811940298507466, "grad_norm": 24.964414596557617, "learning_rate": 7.734126984126985e-06, "loss": 44.6754, "step": 2260 }, { "epoch": 53.83582089552239, "grad_norm": 22.17997169494629, "learning_rate": 7.73015873015873e-06, "loss": 44.1696, "step": 2261 }, { "epoch": 53.85970149253731, "grad_norm": 19.715208053588867, "learning_rate": 7.726190476190478e-06, "loss": 43.8961, "step": 2262 }, { "epoch": 53.88358208955224, "grad_norm": 27.562166213989258, "learning_rate": 7.722222222222223e-06, "loss": 43.9035, "step": 2263 }, { "epoch": 53.907462686567165, "grad_norm": 21.021081924438477, "learning_rate": 7.718253968253969e-06, "loss": 45.5108, "step": 2264 }, { "epoch": 53.93134328358209, "grad_norm": 25.675813674926758, "learning_rate": 7.714285714285716e-06, "loss": 44.5437, "step": 2265 }, { "epoch": 53.95522388059702, "grad_norm": 25.80996322631836, "learning_rate": 7.710317460317461e-06, "loss": 44.7417, "step": 2266 }, { "epoch": 53.97910447761194, "grad_norm": 23.055217742919922, "learning_rate": 7.706349206349208e-06, "loss": 44.3122, "step": 2267 }, { "epoch": 54.0, "grad_norm": 14.958904266357422, "learning_rate": 7.702380952380954e-06, "loss": 40.1174, "step": 2268 }, { "epoch": 54.02388059701492, "grad_norm": 23.825021743774414, "learning_rate": 7.698412698412699e-06, "loss": 43.6857, "step": 2269 }, { "epoch": 54.04776119402985, "grad_norm": 20.29824447631836, "learning_rate": 7.694444444444446e-06, "loss": 43.9979, "step": 2270 }, { "epoch": 54.071641791044776, "grad_norm": 19.905017852783203, "learning_rate": 7.690476190476191e-06, "loss": 44.1637, "step": 2271 }, { "epoch": 54.0955223880597, "grad_norm": 16.342378616333008, "learning_rate": 7.686507936507937e-06, "loss": 43.9998, "step": 2272 }, { "epoch": 54.11940298507463, "grad_norm": 22.551780700683594, "learning_rate": 7.682539682539684e-06, "loss": 44.6528, "step": 2273 }, { "epoch": 54.14328358208955, "grad_norm": 16.87897491455078, "learning_rate": 7.67857142857143e-06, "loss": 44.4259, "step": 2274 }, { "epoch": 54.167164179104475, "grad_norm": 27.494592666625977, "learning_rate": 7.674603174603176e-06, "loss": 45.7648, "step": 2275 }, { "epoch": 54.191044776119405, "grad_norm": 22.326452255249023, "learning_rate": 7.670634920634922e-06, "loss": 44.1077, "step": 2276 }, { "epoch": 54.21492537313433, "grad_norm": 30.2500057220459, "learning_rate": 7.666666666666667e-06, "loss": 44.5322, "step": 2277 }, { "epoch": 54.23880597014925, "grad_norm": 28.212095260620117, "learning_rate": 7.662698412698414e-06, "loss": 43.6225, "step": 2278 }, { "epoch": 54.26268656716418, "grad_norm": 23.524145126342773, "learning_rate": 7.65873015873016e-06, "loss": 44.9014, "step": 2279 }, { "epoch": 54.286567164179104, "grad_norm": 29.799076080322266, "learning_rate": 7.654761904761905e-06, "loss": 44.6654, "step": 2280 }, { "epoch": 54.31044776119403, "grad_norm": 20.350683212280273, "learning_rate": 7.65079365079365e-06, "loss": 44.254, "step": 2281 }, { "epoch": 54.33432835820896, "grad_norm": 25.748899459838867, "learning_rate": 7.646825396825398e-06, "loss": 44.4278, "step": 2282 }, { "epoch": 54.35820895522388, "grad_norm": 25.086284637451172, "learning_rate": 7.642857142857143e-06, "loss": 43.8974, "step": 2283 }, { "epoch": 54.3820895522388, "grad_norm": 24.49972915649414, "learning_rate": 7.638888888888888e-06, "loss": 44.4423, "step": 2284 }, { "epoch": 54.40597014925373, "grad_norm": 18.78260612487793, "learning_rate": 7.634920634920635e-06, "loss": 44.5666, "step": 2285 }, { "epoch": 54.429850746268656, "grad_norm": 20.125263214111328, "learning_rate": 7.630952380952381e-06, "loss": 44.4853, "step": 2286 }, { "epoch": 54.45373134328358, "grad_norm": 20.763385772705078, "learning_rate": 7.626984126984127e-06, "loss": 43.6951, "step": 2287 }, { "epoch": 54.47761194029851, "grad_norm": 22.267620086669922, "learning_rate": 7.623015873015873e-06, "loss": 45.0328, "step": 2288 }, { "epoch": 54.50149253731343, "grad_norm": 24.786283493041992, "learning_rate": 7.61904761904762e-06, "loss": 45.0047, "step": 2289 }, { "epoch": 54.525373134328355, "grad_norm": 18.581987380981445, "learning_rate": 7.615079365079365e-06, "loss": 45.0172, "step": 2290 }, { "epoch": 54.549253731343285, "grad_norm": 27.262859344482422, "learning_rate": 7.611111111111111e-06, "loss": 44.1982, "step": 2291 }, { "epoch": 54.57313432835821, "grad_norm": 25.255537033081055, "learning_rate": 7.6071428571428575e-06, "loss": 43.8743, "step": 2292 }, { "epoch": 54.59701492537313, "grad_norm": 17.982698440551758, "learning_rate": 7.603174603174604e-06, "loss": 43.4799, "step": 2293 }, { "epoch": 54.62089552238806, "grad_norm": 25.1834716796875, "learning_rate": 7.599206349206349e-06, "loss": 44.8257, "step": 2294 }, { "epoch": 54.644776119402984, "grad_norm": 19.047700881958008, "learning_rate": 7.595238095238095e-06, "loss": 45.2678, "step": 2295 }, { "epoch": 54.668656716417914, "grad_norm": 26.404882431030273, "learning_rate": 7.591269841269842e-06, "loss": 43.3948, "step": 2296 }, { "epoch": 54.69253731343284, "grad_norm": 19.84337615966797, "learning_rate": 7.587301587301588e-06, "loss": 45.7629, "step": 2297 }, { "epoch": 54.71641791044776, "grad_norm": 19.9034481048584, "learning_rate": 7.583333333333333e-06, "loss": 44.4071, "step": 2298 }, { "epoch": 54.74029850746269, "grad_norm": NaN, "learning_rate": 7.5793650793650795e-06, "loss": 62.5737, "step": 2299 }, { "epoch": 54.76417910447761, "grad_norm": 21.210494995117188, "learning_rate": 7.5793650793650795e-06, "loss": 44.7093, "step": 2300 }, { "epoch": 54.788059701492536, "grad_norm": 21.403833389282227, "learning_rate": 7.575396825396826e-06, "loss": 44.6465, "step": 2301 }, { "epoch": 54.811940298507466, "grad_norm": 21.515085220336914, "learning_rate": 7.571428571428572e-06, "loss": 44.3846, "step": 2302 }, { "epoch": 54.83582089552239, "grad_norm": 25.024738311767578, "learning_rate": 7.567460317460317e-06, "loss": 44.7177, "step": 2303 }, { "epoch": 54.85970149253731, "grad_norm": 18.742982864379883, "learning_rate": 7.563492063492064e-06, "loss": 44.0076, "step": 2304 }, { "epoch": 54.88358208955224, "grad_norm": 20.333248138427734, "learning_rate": 7.55952380952381e-06, "loss": 43.6154, "step": 2305 }, { "epoch": 54.907462686567165, "grad_norm": 21.4791316986084, "learning_rate": 7.555555555555556e-06, "loss": 44.2151, "step": 2306 }, { "epoch": 54.93134328358209, "grad_norm": 26.272165298461914, "learning_rate": 7.551587301587302e-06, "loss": 45.5681, "step": 2307 }, { "epoch": 54.95522388059702, "grad_norm": 24.541637420654297, "learning_rate": 7.547619047619048e-06, "loss": 42.8248, "step": 2308 }, { "epoch": 54.97910447761194, "grad_norm": 23.563505172729492, "learning_rate": 7.543650793650794e-06, "loss": 45.1566, "step": 2309 }, { "epoch": 55.0, "grad_norm": 18.819583892822266, "learning_rate": 7.53968253968254e-06, "loss": 39.9674, "step": 2310 }, { "epoch": 55.02388059701492, "grad_norm": 21.50135040283203, "learning_rate": 7.5357142857142865e-06, "loss": 44.7008, "step": 2311 }, { "epoch": 55.04776119402985, "grad_norm": 20.28955078125, "learning_rate": 7.531746031746032e-06, "loss": 45.7705, "step": 2312 }, { "epoch": 55.071641791044776, "grad_norm": 19.94631004333496, "learning_rate": 7.527777777777778e-06, "loss": 44.3223, "step": 2313 }, { "epoch": 55.0955223880597, "grad_norm": 22.5699520111084, "learning_rate": 7.523809523809524e-06, "loss": 45.2447, "step": 2314 }, { "epoch": 55.11940298507463, "grad_norm": 17.17287254333496, "learning_rate": 7.519841269841271e-06, "loss": 44.4756, "step": 2315 }, { "epoch": 55.14328358208955, "grad_norm": 17.727617263793945, "learning_rate": 7.515873015873016e-06, "loss": 44.3907, "step": 2316 }, { "epoch": 55.167164179104475, "grad_norm": 18.320112228393555, "learning_rate": 7.511904761904762e-06, "loss": 44.3684, "step": 2317 }, { "epoch": 55.191044776119405, "grad_norm": 14.576784133911133, "learning_rate": 7.5079365079365085e-06, "loss": 43.8936, "step": 2318 }, { "epoch": 55.21492537313433, "grad_norm": 18.880218505859375, "learning_rate": 7.503968253968255e-06, "loss": 44.5556, "step": 2319 }, { "epoch": 55.23880597014925, "grad_norm": 19.764198303222656, "learning_rate": 7.500000000000001e-06, "loss": 43.8549, "step": 2320 }, { "epoch": 55.26268656716418, "grad_norm": NaN, "learning_rate": 7.4960317460317464e-06, "loss": 44.1908, "step": 2321 }, { "epoch": 55.286567164179104, "grad_norm": 19.977001190185547, "learning_rate": 7.4960317460317464e-06, "loss": 44.3724, "step": 2322 }, { "epoch": 55.31044776119403, "grad_norm": 21.85407066345215, "learning_rate": 7.492063492063493e-06, "loss": 44.2829, "step": 2323 }, { "epoch": 55.33432835820896, "grad_norm": 22.168100357055664, "learning_rate": 7.488095238095239e-06, "loss": 44.4898, "step": 2324 }, { "epoch": 55.35820895522388, "grad_norm": 18.234298706054688, "learning_rate": 7.484126984126985e-06, "loss": 44.3711, "step": 2325 }, { "epoch": 55.3820895522388, "grad_norm": 30.803691864013672, "learning_rate": 7.4801587301587306e-06, "loss": 43.825, "step": 2326 }, { "epoch": 55.40597014925373, "grad_norm": 22.18082046508789, "learning_rate": 7.476190476190477e-06, "loss": 44.8057, "step": 2327 }, { "epoch": 55.429850746268656, "grad_norm": 32.34336853027344, "learning_rate": 7.472222222222223e-06, "loss": 45.0128, "step": 2328 }, { "epoch": 55.45373134328358, "grad_norm": 28.040363311767578, "learning_rate": 7.468253968253969e-06, "loss": 45.2428, "step": 2329 }, { "epoch": 55.47761194029851, "grad_norm": 31.505037307739258, "learning_rate": 7.464285714285715e-06, "loss": 43.6846, "step": 2330 }, { "epoch": 55.50149253731343, "grad_norm": 21.62251853942871, "learning_rate": 7.460317460317461e-06, "loss": 44.467, "step": 2331 }, { "epoch": 55.525373134328355, "grad_norm": 29.035741806030273, "learning_rate": 7.456349206349207e-06, "loss": 43.7965, "step": 2332 }, { "epoch": 55.549253731343285, "grad_norm": 17.466440200805664, "learning_rate": 7.4523809523809534e-06, "loss": 44.8456, "step": 2333 }, { "epoch": 55.57313432835821, "grad_norm": 37.01939392089844, "learning_rate": 7.448412698412699e-06, "loss": 44.4359, "step": 2334 }, { "epoch": 55.59701492537313, "grad_norm": 24.612354278564453, "learning_rate": 7.444444444444445e-06, "loss": 43.7534, "step": 2335 }, { "epoch": 55.62089552238806, "grad_norm": 39.57061004638672, "learning_rate": 7.440476190476191e-06, "loss": 45.5826, "step": 2336 }, { "epoch": 55.644776119402984, "grad_norm": 33.687538146972656, "learning_rate": 7.4365079365079376e-06, "loss": 44.3013, "step": 2337 }, { "epoch": 55.668656716417914, "grad_norm": 38.39813995361328, "learning_rate": 7.432539682539684e-06, "loss": 44.7874, "step": 2338 }, { "epoch": 55.69253731343284, "grad_norm": 38.15765380859375, "learning_rate": 7.428571428571429e-06, "loss": 44.6177, "step": 2339 }, { "epoch": 55.71641791044776, "grad_norm": 27.5556640625, "learning_rate": 7.4246031746031754e-06, "loss": 43.1288, "step": 2340 }, { "epoch": 55.74029850746269, "grad_norm": 30.958349227905273, "learning_rate": 7.420634920634922e-06, "loss": 45.4904, "step": 2341 }, { "epoch": 55.76417910447761, "grad_norm": 21.71723747253418, "learning_rate": 7.416666666666668e-06, "loss": 43.0391, "step": 2342 }, { "epoch": 55.788059701492536, "grad_norm": 24.229736328125, "learning_rate": 7.412698412698413e-06, "loss": 44.0514, "step": 2343 }, { "epoch": 55.811940298507466, "grad_norm": 32.11771011352539, "learning_rate": 7.40873015873016e-06, "loss": 44.5521, "step": 2344 }, { "epoch": 55.83582089552239, "grad_norm": 21.694486618041992, "learning_rate": 7.404761904761906e-06, "loss": 43.9067, "step": 2345 }, { "epoch": 55.85970149253731, "grad_norm": 31.469402313232422, "learning_rate": 7.400793650793652e-06, "loss": 43.3506, "step": 2346 }, { "epoch": 55.88358208955224, "grad_norm": 28.431011199951172, "learning_rate": 7.3968253968253975e-06, "loss": 43.7056, "step": 2347 }, { "epoch": 55.907462686567165, "grad_norm": 34.616065979003906, "learning_rate": 7.392857142857144e-06, "loss": 44.0428, "step": 2348 }, { "epoch": 55.93134328358209, "grad_norm": 27.52882194519043, "learning_rate": 7.38888888888889e-06, "loss": 43.8619, "step": 2349 }, { "epoch": 55.95522388059702, "grad_norm": 36.93627166748047, "learning_rate": 7.384920634920636e-06, "loss": 44.1589, "step": 2350 }, { "epoch": 55.97910447761194, "grad_norm": 37.746578216552734, "learning_rate": 7.380952380952382e-06, "loss": 46.0174, "step": 2351 }, { "epoch": 56.0, "grad_norm": 20.20071029663086, "learning_rate": 7.376984126984128e-06, "loss": 37.7123, "step": 2352 }, { "epoch": 56.02388059701492, "grad_norm": 25.446529388427734, "learning_rate": 7.373015873015874e-06, "loss": 43.4657, "step": 2353 }, { "epoch": 56.04776119402985, "grad_norm": 25.78912353515625, "learning_rate": 7.36904761904762e-06, "loss": 44.7379, "step": 2354 }, { "epoch": 56.071641791044776, "grad_norm": 24.028154373168945, "learning_rate": 7.3650793650793666e-06, "loss": 43.4876, "step": 2355 }, { "epoch": 56.0955223880597, "grad_norm": 30.157793045043945, "learning_rate": 7.361111111111112e-06, "loss": 44.2346, "step": 2356 }, { "epoch": 56.11940298507463, "grad_norm": 29.836891174316406, "learning_rate": 7.357142857142858e-06, "loss": 44.0793, "step": 2357 }, { "epoch": 56.14328358208955, "grad_norm": 30.458818435668945, "learning_rate": 7.3531746031746045e-06, "loss": 44.7587, "step": 2358 }, { "epoch": 56.167164179104475, "grad_norm": 28.30854034423828, "learning_rate": 7.349206349206351e-06, "loss": 44.9153, "step": 2359 }, { "epoch": 56.191044776119405, "grad_norm": 29.497085571289062, "learning_rate": 7.345238095238096e-06, "loss": 44.194, "step": 2360 }, { "epoch": 56.21492537313433, "grad_norm": 21.790063858032227, "learning_rate": 7.3412698412698415e-06, "loss": 44.1507, "step": 2361 }, { "epoch": 56.23880597014925, "grad_norm": 27.489242553710938, "learning_rate": 7.337301587301588e-06, "loss": 44.3505, "step": 2362 }, { "epoch": 56.26268656716418, "grad_norm": 22.906452178955078, "learning_rate": 7.333333333333333e-06, "loss": 44.9329, "step": 2363 }, { "epoch": 56.286567164179104, "grad_norm": 33.46257400512695, "learning_rate": 7.329365079365079e-06, "loss": 43.1323, "step": 2364 }, { "epoch": 56.31044776119403, "grad_norm": 22.410470962524414, "learning_rate": 7.325396825396826e-06, "loss": 43.6305, "step": 2365 }, { "epoch": 56.33432835820896, "grad_norm": 32.74277114868164, "learning_rate": 7.321428571428572e-06, "loss": 43.6165, "step": 2366 }, { "epoch": 56.35820895522388, "grad_norm": 31.368419647216797, "learning_rate": 7.317460317460317e-06, "loss": 44.8837, "step": 2367 }, { "epoch": 56.3820895522388, "grad_norm": 30.941593170166016, "learning_rate": 7.3134920634920635e-06, "loss": 44.2282, "step": 2368 }, { "epoch": 56.40597014925373, "grad_norm": 34.12104415893555, "learning_rate": 7.30952380952381e-06, "loss": 45.2191, "step": 2369 }, { "epoch": 56.429850746268656, "grad_norm": 28.09603500366211, "learning_rate": 7.305555555555556e-06, "loss": 44.9848, "step": 2370 }, { "epoch": 56.45373134328358, "grad_norm": 29.32378578186035, "learning_rate": 7.301587301587301e-06, "loss": 44.2658, "step": 2371 }, { "epoch": 56.47761194029851, "grad_norm": 26.269071578979492, "learning_rate": 7.297619047619048e-06, "loss": 46.2933, "step": 2372 }, { "epoch": 56.50149253731343, "grad_norm": 25.23761558532715, "learning_rate": 7.293650793650794e-06, "loss": 44.4726, "step": 2373 }, { "epoch": 56.525373134328355, "grad_norm": 27.6646728515625, "learning_rate": 7.28968253968254e-06, "loss": 43.9823, "step": 2374 }, { "epoch": 56.549253731343285, "grad_norm": 23.01959228515625, "learning_rate": 7.285714285714286e-06, "loss": 44.5168, "step": 2375 }, { "epoch": 56.57313432835821, "grad_norm": 31.120128631591797, "learning_rate": 7.281746031746032e-06, "loss": 44.6706, "step": 2376 }, { "epoch": 56.59701492537313, "grad_norm": 24.57699203491211, "learning_rate": 7.277777777777778e-06, "loss": 45.1781, "step": 2377 }, { "epoch": 56.62089552238806, "grad_norm": 28.831064224243164, "learning_rate": 7.273809523809524e-06, "loss": 43.7437, "step": 2378 }, { "epoch": 56.644776119402984, "grad_norm": 26.15156364440918, "learning_rate": 7.2698412698412705e-06, "loss": 45.1624, "step": 2379 }, { "epoch": 56.668656716417914, "grad_norm": 28.825542449951172, "learning_rate": 7.265873015873016e-06, "loss": 44.66, "step": 2380 }, { "epoch": 56.69253731343284, "grad_norm": 26.763559341430664, "learning_rate": 7.261904761904762e-06, "loss": 43.6022, "step": 2381 }, { "epoch": 56.71641791044776, "grad_norm": 26.9444522857666, "learning_rate": 7.257936507936508e-06, "loss": 44.025, "step": 2382 }, { "epoch": 56.74029850746269, "grad_norm": 24.93570327758789, "learning_rate": 7.253968253968255e-06, "loss": 45.3596, "step": 2383 }, { "epoch": 56.76417910447761, "grad_norm": 29.15943717956543, "learning_rate": 7.25e-06, "loss": 44.2434, "step": 2384 }, { "epoch": 56.788059701492536, "grad_norm": 25.154356002807617, "learning_rate": 7.246031746031746e-06, "loss": 44.1319, "step": 2385 }, { "epoch": 56.811940298507466, "grad_norm": 24.86849021911621, "learning_rate": 7.2420634920634925e-06, "loss": 44.5886, "step": 2386 }, { "epoch": 56.83582089552239, "grad_norm": 22.996164321899414, "learning_rate": 7.238095238095239e-06, "loss": 43.9457, "step": 2387 }, { "epoch": 56.85970149253731, "grad_norm": 21.19574737548828, "learning_rate": 7.234126984126984e-06, "loss": 42.4004, "step": 2388 }, { "epoch": 56.88358208955224, "grad_norm": 22.6845645904541, "learning_rate": 7.23015873015873e-06, "loss": 43.7031, "step": 2389 }, { "epoch": 56.907462686567165, "grad_norm": 24.80902099609375, "learning_rate": 7.226190476190477e-06, "loss": 45.4586, "step": 2390 }, { "epoch": 56.93134328358209, "grad_norm": 23.157859802246094, "learning_rate": 7.222222222222223e-06, "loss": 43.4031, "step": 2391 }, { "epoch": 56.95522388059702, "grad_norm": 16.62670135498047, "learning_rate": 7.218253968253969e-06, "loss": 43.9552, "step": 2392 }, { "epoch": 56.97910447761194, "grad_norm": 31.313417434692383, "learning_rate": 7.2142857142857145e-06, "loss": 44.5037, "step": 2393 }, { "epoch": 57.0, "grad_norm": 17.58053970336914, "learning_rate": 7.210317460317461e-06, "loss": 38.2111, "step": 2394 }, { "epoch": 57.02388059701492, "grad_norm": 28.399444580078125, "learning_rate": 7.206349206349207e-06, "loss": 43.0462, "step": 2395 }, { "epoch": 57.04776119402985, "grad_norm": 28.965984344482422, "learning_rate": 7.202380952380953e-06, "loss": 45.5986, "step": 2396 }, { "epoch": 57.071641791044776, "grad_norm": 20.43558120727539, "learning_rate": 7.198412698412699e-06, "loss": 43.5525, "step": 2397 }, { "epoch": 57.0955223880597, "grad_norm": 29.135162353515625, "learning_rate": 7.194444444444445e-06, "loss": 44.4881, "step": 2398 }, { "epoch": 57.11940298507463, "grad_norm": 23.580699920654297, "learning_rate": 7.190476190476191e-06, "loss": 43.7271, "step": 2399 }, { "epoch": 57.14328358208955, "grad_norm": 22.67143440246582, "learning_rate": 7.186507936507937e-06, "loss": 42.7561, "step": 2400 }, { "epoch": 57.167164179104475, "grad_norm": 21.757719039916992, "learning_rate": 7.182539682539683e-06, "loss": 44.4426, "step": 2401 }, { "epoch": 57.191044776119405, "grad_norm": 22.878713607788086, "learning_rate": 7.178571428571429e-06, "loss": 43.9361, "step": 2402 }, { "epoch": 57.21492537313433, "grad_norm": 21.213376998901367, "learning_rate": 7.174603174603175e-06, "loss": 44.4992, "step": 2403 }, { "epoch": 57.23880597014925, "grad_norm": 26.92378807067871, "learning_rate": 7.1706349206349215e-06, "loss": 43.3883, "step": 2404 }, { "epoch": 57.26268656716418, "grad_norm": 21.269611358642578, "learning_rate": 7.166666666666667e-06, "loss": 45.1554, "step": 2405 }, { "epoch": 57.286567164179104, "grad_norm": 24.775962829589844, "learning_rate": 7.162698412698413e-06, "loss": 44.6757, "step": 2406 }, { "epoch": 57.31044776119403, "grad_norm": 23.0452880859375, "learning_rate": 7.1587301587301594e-06, "loss": 44.0688, "step": 2407 }, { "epoch": 57.33432835820896, "grad_norm": 25.50167465209961, "learning_rate": 7.154761904761906e-06, "loss": 44.7899, "step": 2408 }, { "epoch": 57.35820895522388, "grad_norm": 21.560483932495117, "learning_rate": 7.150793650793652e-06, "loss": 44.3325, "step": 2409 }, { "epoch": 57.3820895522388, "grad_norm": 22.5815486907959, "learning_rate": 7.146825396825397e-06, "loss": 44.1531, "step": 2410 }, { "epoch": 57.40597014925373, "grad_norm": 22.582660675048828, "learning_rate": 7.1428571428571436e-06, "loss": 45.738, "step": 2411 }, { "epoch": 57.429850746268656, "grad_norm": 21.388498306274414, "learning_rate": 7.13888888888889e-06, "loss": 44.2737, "step": 2412 }, { "epoch": 57.45373134328358, "grad_norm": 25.76162338256836, "learning_rate": 7.134920634920636e-06, "loss": 45.273, "step": 2413 }, { "epoch": 57.47761194029851, "grad_norm": 22.20672035217285, "learning_rate": 7.1309523809523814e-06, "loss": 44.8847, "step": 2414 }, { "epoch": 57.50149253731343, "grad_norm": 22.07602310180664, "learning_rate": 7.126984126984128e-06, "loss": 45.0162, "step": 2415 }, { "epoch": 57.525373134328355, "grad_norm": 23.339630126953125, "learning_rate": 7.123015873015874e-06, "loss": 45.1952, "step": 2416 }, { "epoch": 57.549253731343285, "grad_norm": 21.66541290283203, "learning_rate": 7.11904761904762e-06, "loss": 43.586, "step": 2417 }, { "epoch": 57.57313432835821, "grad_norm": 19.015748977661133, "learning_rate": 7.115079365079366e-06, "loss": 44.391, "step": 2418 }, { "epoch": 57.59701492537313, "grad_norm": 20.501789093017578, "learning_rate": 7.111111111111112e-06, "loss": 43.8462, "step": 2419 }, { "epoch": 57.62089552238806, "grad_norm": 20.447154998779297, "learning_rate": 7.107142857142858e-06, "loss": 44.0195, "step": 2420 }, { "epoch": 57.644776119402984, "grad_norm": 17.749000549316406, "learning_rate": 7.103174603174604e-06, "loss": 43.0026, "step": 2421 }, { "epoch": 57.668656716417914, "grad_norm": 22.757408142089844, "learning_rate": 7.09920634920635e-06, "loss": 44.1692, "step": 2422 }, { "epoch": 57.69253731343284, "grad_norm": 17.98983383178711, "learning_rate": 7.095238095238096e-06, "loss": 43.4101, "step": 2423 }, { "epoch": 57.71641791044776, "grad_norm": 24.20079803466797, "learning_rate": 7.091269841269842e-06, "loss": 44.8966, "step": 2424 }, { "epoch": 57.74029850746269, "grad_norm": 22.89764404296875, "learning_rate": 7.0873015873015884e-06, "loss": 43.2216, "step": 2425 }, { "epoch": 57.76417910447761, "grad_norm": 22.002994537353516, "learning_rate": 7.083333333333335e-06, "loss": 44.6504, "step": 2426 }, { "epoch": 57.788059701492536, "grad_norm": 21.76395606994629, "learning_rate": 7.07936507936508e-06, "loss": 44.2117, "step": 2427 }, { "epoch": 57.811940298507466, "grad_norm": 21.774457931518555, "learning_rate": 7.075396825396826e-06, "loss": 42.8879, "step": 2428 }, { "epoch": 57.83582089552239, "grad_norm": 20.503652572631836, "learning_rate": 7.0714285714285726e-06, "loss": 45.954, "step": 2429 }, { "epoch": 57.85970149253731, "grad_norm": 23.904399871826172, "learning_rate": 7.067460317460319e-06, "loss": 43.356, "step": 2430 }, { "epoch": 57.88358208955224, "grad_norm": 19.87941551208496, "learning_rate": 7.063492063492064e-06, "loss": 43.2895, "step": 2431 }, { "epoch": 57.907462686567165, "grad_norm": 21.451114654541016, "learning_rate": 7.0595238095238105e-06, "loss": 45.5597, "step": 2432 }, { "epoch": 57.93134328358209, "grad_norm": 19.936561584472656, "learning_rate": 7.055555555555557e-06, "loss": 44.9952, "step": 2433 }, { "epoch": 57.95522388059702, "grad_norm": 28.258575439453125, "learning_rate": 7.051587301587303e-06, "loss": 44.6988, "step": 2434 }, { "epoch": 57.97910447761194, "grad_norm": 17.210622787475586, "learning_rate": 7.047619047619048e-06, "loss": 43.6215, "step": 2435 }, { "epoch": 58.0, "grad_norm": 24.241130828857422, "learning_rate": 7.043650793650795e-06, "loss": 38.4149, "step": 2436 }, { "epoch": 58.02388059701492, "grad_norm": 16.801782608032227, "learning_rate": 7.039682539682541e-06, "loss": 43.5018, "step": 2437 }, { "epoch": 58.04776119402985, "grad_norm": 29.925647735595703, "learning_rate": 7.035714285714287e-06, "loss": 43.2125, "step": 2438 }, { "epoch": 58.071641791044776, "grad_norm": 18.95758056640625, "learning_rate": 7.0317460317460325e-06, "loss": 44.4259, "step": 2439 }, { "epoch": 58.0955223880597, "grad_norm": 23.302980422973633, "learning_rate": 7.027777777777778e-06, "loss": 44.0357, "step": 2440 }, { "epoch": 58.11940298507463, "grad_norm": 20.59381866455078, "learning_rate": 7.023809523809524e-06, "loss": 43.307, "step": 2441 }, { "epoch": 58.14328358208955, "grad_norm": 22.13384246826172, "learning_rate": 7.0198412698412695e-06, "loss": 44.4651, "step": 2442 }, { "epoch": 58.167164179104475, "grad_norm": 19.862943649291992, "learning_rate": 7.015873015873016e-06, "loss": 44.2492, "step": 2443 }, { "epoch": 58.191044776119405, "grad_norm": 17.979585647583008, "learning_rate": 7.011904761904762e-06, "loss": 44.8333, "step": 2444 }, { "epoch": 58.21492537313433, "grad_norm": 24.35641860961914, "learning_rate": 7.007936507936508e-06, "loss": 43.9583, "step": 2445 }, { "epoch": 58.23880597014925, "grad_norm": 20.068201065063477, "learning_rate": 7.0039682539682545e-06, "loss": 44.2379, "step": 2446 }, { "epoch": 58.26268656716418, "grad_norm": 24.976778030395508, "learning_rate": 7e-06, "loss": 45.3054, "step": 2447 }, { "epoch": 58.286567164179104, "grad_norm": 19.772478103637695, "learning_rate": 6.996031746031746e-06, "loss": 44.7523, "step": 2448 }, { "epoch": 58.31044776119403, "grad_norm": 24.084999084472656, "learning_rate": 6.992063492063492e-06, "loss": 43.7454, "step": 2449 }, { "epoch": 58.33432835820896, "grad_norm": 23.313718795776367, "learning_rate": 6.988095238095239e-06, "loss": 44.8838, "step": 2450 }, { "epoch": 58.35820895522388, "grad_norm": 22.206872940063477, "learning_rate": 6.984126984126984e-06, "loss": 45.3141, "step": 2451 }, { "epoch": 58.3820895522388, "grad_norm": 17.005720138549805, "learning_rate": 6.98015873015873e-06, "loss": 43.8783, "step": 2452 }, { "epoch": 58.40597014925373, "grad_norm": NaN, "learning_rate": 6.9761904761904765e-06, "loss": 43.2019, "step": 2453 }, { "epoch": 58.429850746268656, "grad_norm": 20.627288818359375, "learning_rate": 6.9761904761904765e-06, "loss": 44.6086, "step": 2454 }, { "epoch": 58.45373134328358, "grad_norm": 19.47835350036621, "learning_rate": 6.972222222222223e-06, "loss": 44.6011, "step": 2455 }, { "epoch": 58.47761194029851, "grad_norm": 23.258609771728516, "learning_rate": 6.968253968253968e-06, "loss": 44.0184, "step": 2456 }, { "epoch": 58.50149253731343, "grad_norm": 22.79289436340332, "learning_rate": 6.964285714285714e-06, "loss": 44.1053, "step": 2457 }, { "epoch": 58.525373134328355, "grad_norm": 20.517322540283203, "learning_rate": 6.960317460317461e-06, "loss": 43.8647, "step": 2458 }, { "epoch": 58.549253731343285, "grad_norm": 15.967523574829102, "learning_rate": 6.956349206349207e-06, "loss": 44.9457, "step": 2459 }, { "epoch": 58.57313432835821, "grad_norm": 26.704971313476562, "learning_rate": 6.952380952380952e-06, "loss": 42.5455, "step": 2460 }, { "epoch": 58.59701492537313, "grad_norm": 17.790040969848633, "learning_rate": 6.9484126984126985e-06, "loss": 45.0774, "step": 2461 }, { "epoch": 58.62089552238806, "grad_norm": 24.100841522216797, "learning_rate": 6.944444444444445e-06, "loss": 44.7616, "step": 2462 }, { "epoch": 58.644776119402984, "grad_norm": 17.18956184387207, "learning_rate": 6.940476190476191e-06, "loss": 44.6506, "step": 2463 }, { "epoch": 58.668656716417914, "grad_norm": 25.185026168823242, "learning_rate": 6.936507936507937e-06, "loss": 44.9502, "step": 2464 }, { "epoch": 58.69253731343284, "grad_norm": 22.515111923217773, "learning_rate": 6.932539682539683e-06, "loss": 43.802, "step": 2465 }, { "epoch": 58.71641791044776, "grad_norm": 26.777843475341797, "learning_rate": 6.928571428571429e-06, "loss": 44.0202, "step": 2466 }, { "epoch": 58.74029850746269, "grad_norm": 22.73821449279785, "learning_rate": 6.924603174603175e-06, "loss": 44.1536, "step": 2467 }, { "epoch": 58.76417910447761, "grad_norm": 20.890169143676758, "learning_rate": 6.920634920634921e-06, "loss": 43.6729, "step": 2468 }, { "epoch": 58.788059701492536, "grad_norm": 21.856304168701172, "learning_rate": 6.916666666666667e-06, "loss": 44.5367, "step": 2469 }, { "epoch": 58.811940298507466, "grad_norm": 23.25510025024414, "learning_rate": 6.912698412698413e-06, "loss": 44.8127, "step": 2470 }, { "epoch": 58.83582089552239, "grad_norm": 26.32556915283203, "learning_rate": 6.908730158730159e-06, "loss": 44.8174, "step": 2471 }, { "epoch": 58.85970149253731, "grad_norm": 17.11307716369629, "learning_rate": 6.9047619047619055e-06, "loss": 42.9014, "step": 2472 }, { "epoch": 58.88358208955224, "grad_norm": 22.380199432373047, "learning_rate": 6.900793650793651e-06, "loss": 43.9276, "step": 2473 }, { "epoch": 58.907462686567165, "grad_norm": 17.855484008789062, "learning_rate": 6.896825396825397e-06, "loss": 43.5554, "step": 2474 }, { "epoch": 58.93134328358209, "grad_norm": 18.112934112548828, "learning_rate": 6.892857142857143e-06, "loss": 43.1567, "step": 2475 }, { "epoch": 58.95522388059702, "grad_norm": 21.682844161987305, "learning_rate": 6.88888888888889e-06, "loss": 44.4182, "step": 2476 }, { "epoch": 58.97910447761194, "grad_norm": 22.78960609436035, "learning_rate": 6.884920634920635e-06, "loss": 44.5085, "step": 2477 }, { "epoch": 59.0, "grad_norm": 26.30175018310547, "learning_rate": 6.880952380952381e-06, "loss": 38.3404, "step": 2478 }, { "epoch": 59.02388059701492, "grad_norm": 23.714672088623047, "learning_rate": 6.8769841269841275e-06, "loss": 44.131, "step": 2479 }, { "epoch": 59.04776119402985, "grad_norm": 21.14238929748535, "learning_rate": 6.873015873015874e-06, "loss": 44.0208, "step": 2480 }, { "epoch": 59.071641791044776, "grad_norm": 25.2884464263916, "learning_rate": 6.86904761904762e-06, "loss": 43.4771, "step": 2481 }, { "epoch": 59.0955223880597, "grad_norm": 21.62645149230957, "learning_rate": 6.8650793650793654e-06, "loss": 43.3618, "step": 2482 }, { "epoch": 59.11940298507463, "grad_norm": 21.61750602722168, "learning_rate": 6.861111111111112e-06, "loss": 45.1006, "step": 2483 }, { "epoch": 59.14328358208955, "grad_norm": 25.124187469482422, "learning_rate": 6.857142857142858e-06, "loss": 44.2154, "step": 2484 }, { "epoch": 59.167164179104475, "grad_norm": 21.88067626953125, "learning_rate": 6.853174603174604e-06, "loss": 44.6725, "step": 2485 }, { "epoch": 59.191044776119405, "grad_norm": 26.95779037475586, "learning_rate": 6.8492063492063496e-06, "loss": 43.2419, "step": 2486 }, { "epoch": 59.21492537313433, "grad_norm": 18.35979461669922, "learning_rate": 6.845238095238096e-06, "loss": 42.7172, "step": 2487 }, { "epoch": 59.23880597014925, "grad_norm": 27.409034729003906, "learning_rate": 6.841269841269842e-06, "loss": 44.7929, "step": 2488 }, { "epoch": 59.26268656716418, "grad_norm": 23.257780075073242, "learning_rate": 6.837301587301588e-06, "loss": 45.0742, "step": 2489 }, { "epoch": 59.286567164179104, "grad_norm": 23.64436912536621, "learning_rate": 6.833333333333334e-06, "loss": 45.3608, "step": 2490 }, { "epoch": 59.31044776119403, "grad_norm": 19.836320877075195, "learning_rate": 6.82936507936508e-06, "loss": 43.3152, "step": 2491 }, { "epoch": 59.33432835820896, "grad_norm": 23.7291259765625, "learning_rate": 6.825396825396826e-06, "loss": 45.0224, "step": 2492 }, { "epoch": 59.35820895522388, "grad_norm": 22.776365280151367, "learning_rate": 6.8214285714285724e-06, "loss": 44.0495, "step": 2493 }, { "epoch": 59.3820895522388, "grad_norm": 19.556560516357422, "learning_rate": 6.817460317460318e-06, "loss": 43.0716, "step": 2494 }, { "epoch": 59.40597014925373, "grad_norm": 19.592493057250977, "learning_rate": 6.813492063492064e-06, "loss": 43.8102, "step": 2495 }, { "epoch": 59.429850746268656, "grad_norm": 18.4060001373291, "learning_rate": 6.80952380952381e-06, "loss": 42.5119, "step": 2496 }, { "epoch": 59.45373134328358, "grad_norm": 18.86701202392578, "learning_rate": 6.8055555555555566e-06, "loss": 42.7576, "step": 2497 }, { "epoch": 59.47761194029851, "grad_norm": 19.602235794067383, "learning_rate": 6.801587301587303e-06, "loss": 44.8432, "step": 2498 }, { "epoch": 59.50149253731343, "grad_norm": NaN, "learning_rate": 6.797619047619048e-06, "loss": 77.3141, "step": 2499 }, { "epoch": 59.525373134328355, "grad_norm": 18.09695816040039, "learning_rate": 6.797619047619048e-06, "loss": 44.5263, "step": 2500 }, { "epoch": 59.549253731343285, "grad_norm": 20.728633880615234, "learning_rate": 6.7936507936507944e-06, "loss": 45.2417, "step": 2501 }, { "epoch": 59.57313432835821, "grad_norm": 19.164405822753906, "learning_rate": 6.789682539682541e-06, "loss": 44.1673, "step": 2502 }, { "epoch": 59.59701492537313, "grad_norm": 17.986604690551758, "learning_rate": 6.785714285714287e-06, "loss": 44.9311, "step": 2503 }, { "epoch": 59.62089552238806, "grad_norm": 21.212223052978516, "learning_rate": 6.781746031746032e-06, "loss": 43.9186, "step": 2504 }, { "epoch": 59.644776119402984, "grad_norm": 19.34587287902832, "learning_rate": 6.777777777777779e-06, "loss": 43.9205, "step": 2505 }, { "epoch": 59.668656716417914, "grad_norm": 21.107257843017578, "learning_rate": 6.773809523809525e-06, "loss": 43.7522, "step": 2506 }, { "epoch": 59.69253731343284, "grad_norm": 19.897724151611328, "learning_rate": 6.769841269841271e-06, "loss": 43.7826, "step": 2507 }, { "epoch": 59.71641791044776, "grad_norm": 22.272457122802734, "learning_rate": 6.7658730158730165e-06, "loss": 44.4984, "step": 2508 }, { "epoch": 59.74029850746269, "grad_norm": 20.087844848632812, "learning_rate": 6.761904761904763e-06, "loss": 43.6682, "step": 2509 }, { "epoch": 59.76417910447761, "grad_norm": 22.083215713500977, "learning_rate": 6.757936507936509e-06, "loss": 43.1799, "step": 2510 }, { "epoch": 59.788059701492536, "grad_norm": 18.583234786987305, "learning_rate": 6.753968253968255e-06, "loss": 44.9017, "step": 2511 }, { "epoch": 59.811940298507466, "grad_norm": 20.20134735107422, "learning_rate": 6.750000000000001e-06, "loss": 44.1051, "step": 2512 }, { "epoch": 59.83582089552239, "grad_norm": 20.973419189453125, "learning_rate": 6.746031746031747e-06, "loss": 44.138, "step": 2513 }, { "epoch": 59.85970149253731, "grad_norm": 15.97662353515625, "learning_rate": 6.742063492063493e-06, "loss": 44.9675, "step": 2514 }, { "epoch": 59.88358208955224, "grad_norm": 23.126541137695312, "learning_rate": 6.738095238095239e-06, "loss": 44.4417, "step": 2515 }, { "epoch": 59.907462686567165, "grad_norm": 19.715782165527344, "learning_rate": 6.7341269841269856e-06, "loss": 44.768, "step": 2516 }, { "epoch": 59.93134328358209, "grad_norm": 27.000070571899414, "learning_rate": 6.730158730158731e-06, "loss": 45.0485, "step": 2517 }, { "epoch": 59.95522388059702, "grad_norm": 21.746152877807617, "learning_rate": 6.726190476190477e-06, "loss": 44.5848, "step": 2518 }, { "epoch": 59.97910447761194, "grad_norm": 17.86555290222168, "learning_rate": 6.7222222222222235e-06, "loss": 44.0213, "step": 2519 }, { "epoch": 60.0, "grad_norm": 20.369977951049805, "learning_rate": 6.718253968253968e-06, "loss": 38.2947, "step": 2520 }, { "epoch": 60.0, "step": 2520, "total_flos": 1.2389502240404026e+17, "train_loss": 14.973776844569615, "train_runtime": 26580.6785, "train_samples_per_second": 12.081, "train_steps_per_second": 0.095 }, { "epoch": 60.02388059701492, "grad_norm": 22.268356323242188, "learning_rate": 1e-05, "loss": 43.4551, "step": 2521 }, { "epoch": 60.04776119402985, "grad_norm": Infinity, "learning_rate": 9.99702380952381e-06, "loss": 49.1255, "step": 2522 }, { "epoch": 60.071641791044776, "grad_norm": 202.42481994628906, "learning_rate": 9.99702380952381e-06, "loss": 48.2461, "step": 2523 }, { "epoch": 60.0955223880597, "grad_norm": 190.5668182373047, "learning_rate": 9.99404761904762e-06, "loss": 49.3017, "step": 2524 }, { "epoch": 60.11940298507463, "grad_norm": 69.69708251953125, "learning_rate": 9.99107142857143e-06, "loss": 47.4179, "step": 2525 }, { "epoch": 60.14328358208955, "grad_norm": 94.14574432373047, "learning_rate": 9.988095238095239e-06, "loss": 46.2633, "step": 2526 }, { "epoch": 60.167164179104475, "grad_norm": 59.666481018066406, "learning_rate": 9.985119047619048e-06, "loss": 45.9356, "step": 2527 }, { "epoch": 60.191044776119405, "grad_norm": 66.49242401123047, "learning_rate": 9.982142857142858e-06, "loss": 46.0376, "step": 2528 }, { "epoch": 60.21492537313433, "grad_norm": 46.52798080444336, "learning_rate": 9.979166666666668e-06, "loss": 42.4879, "step": 2529 }, { "epoch": 60.23880597014925, "grad_norm": 41.28635025024414, "learning_rate": 9.976190476190477e-06, "loss": 43.5567, "step": 2530 }, { "epoch": 60.26268656716418, "grad_norm": 48.749210357666016, "learning_rate": 9.973214285714287e-06, "loss": 44.6395, "step": 2531 }, { "epoch": 60.286567164179104, "grad_norm": 40.452842712402344, "learning_rate": 9.970238095238096e-06, "loss": 44.65, "step": 2532 }, { "epoch": 60.31044776119403, "grad_norm": 41.35492706298828, "learning_rate": 9.967261904761905e-06, "loss": 44.8214, "step": 2533 }, { "epoch": 60.33432835820896, "grad_norm": 32.109527587890625, "learning_rate": 9.964285714285714e-06, "loss": 46.1763, "step": 2534 }, { "epoch": 60.35820895522388, "grad_norm": 28.22223472595215, "learning_rate": 9.961309523809525e-06, "loss": 45.4326, "step": 2535 }, { "epoch": 60.3820895522388, "grad_norm": 42.460872650146484, "learning_rate": 9.958333333333334e-06, "loss": 44.2028, "step": 2536 }, { "epoch": 60.40597014925373, "grad_norm": 24.426565170288086, "learning_rate": 9.955357142857143e-06, "loss": 44.141, "step": 2537 }, { "epoch": 60.429850746268656, "grad_norm": 30.63353729248047, "learning_rate": 9.952380952380954e-06, "loss": 44.1465, "step": 2538 }, { "epoch": 60.45373134328358, "grad_norm": 24.15091323852539, "learning_rate": 9.949404761904763e-06, "loss": 44.4579, "step": 2539 }, { "epoch": 60.47761194029851, "grad_norm": 30.757854461669922, "learning_rate": 9.946428571428572e-06, "loss": 44.251, "step": 2540 }, { "epoch": 60.50149253731343, "grad_norm": 24.651243209838867, "learning_rate": 9.943452380952381e-06, "loss": 45.6042, "step": 2541 }, { "epoch": 60.525373134328355, "grad_norm": 23.414180755615234, "learning_rate": 9.940476190476192e-06, "loss": 43.8686, "step": 2542 }, { "epoch": 60.549253731343285, "grad_norm": 29.41202735900879, "learning_rate": 9.937500000000001e-06, "loss": 45.111, "step": 2543 }, { "epoch": 60.57313432835821, "grad_norm": 25.947559356689453, "learning_rate": 9.93452380952381e-06, "loss": 43.6131, "step": 2544 }, { "epoch": 60.59701492537313, "grad_norm": 30.613439559936523, "learning_rate": 9.93154761904762e-06, "loss": 44.9413, "step": 2545 }, { "epoch": 60.62089552238806, "grad_norm": 28.363725662231445, "learning_rate": 9.92857142857143e-06, "loss": 43.3202, "step": 2546 }, { "epoch": 60.644776119402984, "grad_norm": 24.05493927001953, "learning_rate": 9.925595238095239e-06, "loss": 45.434, "step": 2547 }, { "epoch": 60.668656716417914, "grad_norm": 30.285140991210938, "learning_rate": 9.922619047619048e-06, "loss": 44.2144, "step": 2548 }, { "epoch": 60.69253731343284, "grad_norm": 24.850799560546875, "learning_rate": 9.919642857142859e-06, "loss": 43.4151, "step": 2549 }, { "epoch": 60.71641791044776, "grad_norm": 23.70008087158203, "learning_rate": 9.916666666666668e-06, "loss": 44.0035, "step": 2550 }, { "epoch": 60.74029850746269, "grad_norm": 29.343976974487305, "learning_rate": 9.913690476190477e-06, "loss": 44.9651, "step": 2551 }, { "epoch": 60.76417910447761, "grad_norm": 19.914142608642578, "learning_rate": 9.910714285714288e-06, "loss": 44.3699, "step": 2552 }, { "epoch": 60.788059701492536, "grad_norm": 23.31308937072754, "learning_rate": 9.907738095238097e-06, "loss": 45.7724, "step": 2553 }, { "epoch": 60.811940298507466, "grad_norm": 26.089832305908203, "learning_rate": 9.904761904761906e-06, "loss": 44.9611, "step": 2554 }, { "epoch": 60.83582089552239, "grad_norm": 22.15082550048828, "learning_rate": 9.901785714285715e-06, "loss": 43.1386, "step": 2555 }, { "epoch": 60.85970149253731, "grad_norm": 30.4031925201416, "learning_rate": 9.898809523809525e-06, "loss": 44.324, "step": 2556 }, { "epoch": 60.88358208955224, "grad_norm": 22.16672134399414, "learning_rate": 9.895833333333334e-06, "loss": 43.6286, "step": 2557 }, { "epoch": 60.907462686567165, "grad_norm": 21.490468978881836, "learning_rate": 9.892857142857143e-06, "loss": 42.8494, "step": 2558 }, { "epoch": 60.93134328358209, "grad_norm": 28.823944091796875, "learning_rate": 9.889880952380954e-06, "loss": 42.9944, "step": 2559 }, { "epoch": 60.95522388059702, "grad_norm": 19.950031280517578, "learning_rate": 9.886904761904763e-06, "loss": 44.7105, "step": 2560 }, { "epoch": 60.97910447761194, "grad_norm": 31.069194793701172, "learning_rate": 9.883928571428572e-06, "loss": 43.6064, "step": 2561 }, { "epoch": 61.0, "grad_norm": 19.411388397216797, "learning_rate": 9.880952380952381e-06, "loss": 38.8114, "step": 2562 }, { "epoch": 61.02388059701492, "grad_norm": 26.66782569885254, "learning_rate": 9.877976190476192e-06, "loss": 43.0697, "step": 2563 }, { "epoch": 61.04776119402985, "grad_norm": 25.78309440612793, "learning_rate": 9.875000000000001e-06, "loss": 43.8682, "step": 2564 }, { "epoch": 61.071641791044776, "grad_norm": 19.312572479248047, "learning_rate": 9.87202380952381e-06, "loss": 44.7347, "step": 2565 }, { "epoch": 61.0955223880597, "grad_norm": 33.189598083496094, "learning_rate": 9.869047619047621e-06, "loss": 43.6253, "step": 2566 }, { "epoch": 61.11940298507463, "grad_norm": 27.351518630981445, "learning_rate": 9.86607142857143e-06, "loss": 43.6716, "step": 2567 }, { "epoch": 61.14328358208955, "grad_norm": 21.69624900817871, "learning_rate": 9.863095238095239e-06, "loss": 44.2277, "step": 2568 }, { "epoch": 61.167164179104475, "grad_norm": 27.58108901977539, "learning_rate": 9.860119047619048e-06, "loss": 44.3117, "step": 2569 }, { "epoch": 61.191044776119405, "grad_norm": 18.802303314208984, "learning_rate": 9.857142857142859e-06, "loss": 44.4119, "step": 2570 }, { "epoch": 61.21492537313433, "grad_norm": 21.36333656311035, "learning_rate": 9.854166666666668e-06, "loss": 45.0113, "step": 2571 }, { "epoch": 61.23880597014925, "grad_norm": 23.86919593811035, "learning_rate": 9.851190476190477e-06, "loss": 43.8577, "step": 2572 }, { "epoch": 61.26268656716418, "grad_norm": 20.259685516357422, "learning_rate": 9.848214285714288e-06, "loss": 43.921, "step": 2573 }, { "epoch": 61.286567164179104, "grad_norm": 29.262182235717773, "learning_rate": 9.845238095238097e-06, "loss": 44.1589, "step": 2574 }, { "epoch": 61.31044776119403, "grad_norm": 21.13313102722168, "learning_rate": 9.842261904761906e-06, "loss": 44.3854, "step": 2575 }, { "epoch": 61.33432835820896, "grad_norm": 23.83458137512207, "learning_rate": 9.839285714285715e-06, "loss": 43.5571, "step": 2576 }, { "epoch": 61.35820895522388, "grad_norm": 18.778934478759766, "learning_rate": 9.836309523809524e-06, "loss": 43.827, "step": 2577 }, { "epoch": 61.3820895522388, "grad_norm": 22.37734031677246, "learning_rate": 9.833333333333333e-06, "loss": 45.207, "step": 2578 }, { "epoch": 61.40597014925373, "grad_norm": 25.046817779541016, "learning_rate": 9.830357142857144e-06, "loss": 43.7649, "step": 2579 }, { "epoch": 61.429850746268656, "grad_norm": 21.867618560791016, "learning_rate": 9.827380952380953e-06, "loss": 43.7025, "step": 2580 }, { "epoch": 61.45373134328358, "grad_norm": 22.729969024658203, "learning_rate": 9.824404761904762e-06, "loss": 44.3454, "step": 2581 }, { "epoch": 61.47761194029851, "grad_norm": 23.03755760192871, "learning_rate": 9.821428571428573e-06, "loss": 43.6942, "step": 2582 }, { "epoch": 61.50149253731343, "grad_norm": 20.736244201660156, "learning_rate": 9.818452380952382e-06, "loss": 44.6758, "step": 2583 }, { "epoch": 61.525373134328355, "grad_norm": 17.63121795654297, "learning_rate": 9.81547619047619e-06, "loss": 43.3541, "step": 2584 }, { "epoch": 61.549253731343285, "grad_norm": 18.32825469970703, "learning_rate": 9.8125e-06, "loss": 43.8749, "step": 2585 }, { "epoch": 61.57313432835821, "grad_norm": 19.190811157226562, "learning_rate": 9.80952380952381e-06, "loss": 44.1191, "step": 2586 }, { "epoch": 61.59701492537313, "grad_norm": 17.29913330078125, "learning_rate": 9.80654761904762e-06, "loss": 44.5885, "step": 2587 }, { "epoch": 61.62089552238806, "grad_norm": 21.12825584411621, "learning_rate": 9.803571428571428e-06, "loss": 43.9206, "step": 2588 }, { "epoch": 61.644776119402984, "grad_norm": 26.83173942565918, "learning_rate": 9.80059523809524e-06, "loss": 44.7224, "step": 2589 }, { "epoch": 61.668656716417914, "grad_norm": 18.34907341003418, "learning_rate": 9.797619047619048e-06, "loss": 44.3625, "step": 2590 }, { "epoch": 61.69253731343284, "grad_norm": 18.709016799926758, "learning_rate": 9.794642857142857e-06, "loss": 43.2573, "step": 2591 }, { "epoch": 61.71641791044776, "grad_norm": 24.946210861206055, "learning_rate": 9.791666666666666e-06, "loss": 43.4754, "step": 2592 }, { "epoch": 61.74029850746269, "grad_norm": 22.253814697265625, "learning_rate": 9.788690476190477e-06, "loss": 43.7911, "step": 2593 }, { "epoch": 61.76417910447761, "grad_norm": 21.44365692138672, "learning_rate": 9.785714285714286e-06, "loss": 44.9724, "step": 2594 }, { "epoch": 61.788059701492536, "grad_norm": 19.002973556518555, "learning_rate": 9.782738095238095e-06, "loss": 44.5918, "step": 2595 }, { "epoch": 61.811940298507466, "grad_norm": 23.515329360961914, "learning_rate": 9.779761904761906e-06, "loss": 43.7888, "step": 2596 }, { "epoch": 61.83582089552239, "grad_norm": 22.531938552856445, "learning_rate": 9.776785714285715e-06, "loss": 43.3796, "step": 2597 }, { "epoch": 61.85970149253731, "grad_norm": 21.187646865844727, "learning_rate": 9.773809523809524e-06, "loss": 44.5443, "step": 2598 }, { "epoch": 61.88358208955224, "grad_norm": 16.221466064453125, "learning_rate": 9.770833333333333e-06, "loss": 43.3915, "step": 2599 }, { "epoch": 61.907462686567165, "grad_norm": 26.778657913208008, "learning_rate": 9.767857142857144e-06, "loss": 43.5025, "step": 2600 }, { "epoch": 61.93134328358209, "grad_norm": 22.768152236938477, "learning_rate": 9.764880952380953e-06, "loss": 44.5111, "step": 2601 }, { "epoch": 61.95522388059702, "grad_norm": 27.74826431274414, "learning_rate": 9.761904761904762e-06, "loss": 44.5974, "step": 2602 }, { "epoch": 61.97910447761194, "grad_norm": 22.05244255065918, "learning_rate": 9.758928571428573e-06, "loss": 43.5759, "step": 2603 }, { "epoch": 62.0, "grad_norm": 19.424118041992188, "learning_rate": 9.755952380952382e-06, "loss": 39.0078, "step": 2604 }, { "epoch": 62.02388059701492, "grad_norm": 26.683176040649414, "learning_rate": 9.75297619047619e-06, "loss": 43.6656, "step": 2605 }, { "epoch": 62.04776119402985, "grad_norm": 20.35538673400879, "learning_rate": 9.75e-06, "loss": 45.2448, "step": 2606 }, { "epoch": 62.071641791044776, "grad_norm": 20.601633071899414, "learning_rate": 9.74702380952381e-06, "loss": 45.3538, "step": 2607 }, { "epoch": 62.0955223880597, "grad_norm": 17.472143173217773, "learning_rate": 9.74404761904762e-06, "loss": 44.3077, "step": 2608 }, { "epoch": 62.11940298507463, "grad_norm": 21.365177154541016, "learning_rate": 9.741071428571429e-06, "loss": 43.7227, "step": 2609 }, { "epoch": 62.14328358208955, "grad_norm": 25.560060501098633, "learning_rate": 9.73809523809524e-06, "loss": 44.6434, "step": 2610 }, { "epoch": 62.167164179104475, "grad_norm": 20.327442169189453, "learning_rate": 9.735119047619048e-06, "loss": 43.5369, "step": 2611 }, { "epoch": 62.191044776119405, "grad_norm": 19.99593734741211, "learning_rate": 9.732142857142858e-06, "loss": 44.1446, "step": 2612 }, { "epoch": 62.21492537313433, "grad_norm": 19.648574829101562, "learning_rate": 9.729166666666667e-06, "loss": 43.4092, "step": 2613 }, { "epoch": 62.23880597014925, "grad_norm": 26.606019973754883, "learning_rate": 9.726190476190477e-06, "loss": 44.3531, "step": 2614 }, { "epoch": 62.26268656716418, "grad_norm": 27.989334106445312, "learning_rate": 9.723214285714286e-06, "loss": 44.3962, "step": 2615 }, { "epoch": 62.286567164179104, "grad_norm": 22.545223236083984, "learning_rate": 9.720238095238095e-06, "loss": 42.8718, "step": 2616 }, { "epoch": 62.31044776119403, "grad_norm": 26.62592315673828, "learning_rate": 9.717261904761906e-06, "loss": 43.004, "step": 2617 }, { "epoch": 62.33432835820896, "grad_norm": 26.52608299255371, "learning_rate": 9.714285714285715e-06, "loss": 43.2152, "step": 2618 }, { "epoch": 62.35820895522388, "grad_norm": 20.17901611328125, "learning_rate": 9.711309523809524e-06, "loss": 43.2228, "step": 2619 }, { "epoch": 62.3820895522388, "grad_norm": 30.335596084594727, "learning_rate": 9.708333333333333e-06, "loss": 45.0079, "step": 2620 }, { "epoch": 62.40597014925373, "grad_norm": 26.02729606628418, "learning_rate": 9.705357142857144e-06, "loss": 43.3155, "step": 2621 }, { "epoch": 62.429850746268656, "grad_norm": 25.07903480529785, "learning_rate": 9.702380952380953e-06, "loss": 45.2378, "step": 2622 }, { "epoch": 62.45373134328358, "grad_norm": 28.89750099182129, "learning_rate": 9.699404761904762e-06, "loss": 42.2074, "step": 2623 }, { "epoch": 62.47761194029851, "grad_norm": 20.475128173828125, "learning_rate": 9.696428571428573e-06, "loss": 43.8792, "step": 2624 }, { "epoch": 62.50149253731343, "grad_norm": 26.103612899780273, "learning_rate": 9.693452380952382e-06, "loss": 44.961, "step": 2625 }, { "epoch": 62.525373134328355, "grad_norm": 21.666906356811523, "learning_rate": 9.690476190476191e-06, "loss": 43.4776, "step": 2626 }, { "epoch": 62.549253731343285, "grad_norm": 22.16206932067871, "learning_rate": 9.6875e-06, "loss": 43.1076, "step": 2627 }, { "epoch": 62.57313432835821, "grad_norm": 18.17336082458496, "learning_rate": 9.68452380952381e-06, "loss": 43.7046, "step": 2628 }, { "epoch": 62.59701492537313, "grad_norm": 25.36472511291504, "learning_rate": 9.68154761904762e-06, "loss": 44.8569, "step": 2629 }, { "epoch": 62.62089552238806, "grad_norm": 18.750511169433594, "learning_rate": 9.678571428571429e-06, "loss": 43.73, "step": 2630 }, { "epoch": 62.644776119402984, "grad_norm": 18.174638748168945, "learning_rate": 9.67559523809524e-06, "loss": 44.4229, "step": 2631 }, { "epoch": 62.668656716417914, "grad_norm": 17.8627872467041, "learning_rate": 9.672619047619049e-06, "loss": 44.9343, "step": 2632 }, { "epoch": 62.69253731343284, "grad_norm": 28.161239624023438, "learning_rate": 9.669642857142858e-06, "loss": 43.7649, "step": 2633 }, { "epoch": 62.71641791044776, "grad_norm": 27.701793670654297, "learning_rate": 9.666666666666667e-06, "loss": 43.7122, "step": 2634 }, { "epoch": 62.74029850746269, "grad_norm": 18.659244537353516, "learning_rate": 9.663690476190477e-06, "loss": 43.5179, "step": 2635 }, { "epoch": 62.76417910447761, "grad_norm": 35.163169860839844, "learning_rate": 9.660714285714287e-06, "loss": 44.4568, "step": 2636 }, { "epoch": 62.788059701492536, "grad_norm": NaN, "learning_rate": 9.657738095238096e-06, "loss": 42.4459, "step": 2637 }, { "epoch": 62.811940298507466, "grad_norm": 26.87259292602539, "learning_rate": 9.657738095238096e-06, "loss": 42.7683, "step": 2638 }, { "epoch": 62.83582089552239, "grad_norm": 31.837942123413086, "learning_rate": 9.654761904761906e-06, "loss": 44.2405, "step": 2639 }, { "epoch": 62.85970149253731, "grad_norm": 24.40672492980957, "learning_rate": 9.651785714285715e-06, "loss": 44.4058, "step": 2640 }, { "epoch": 62.88358208955224, "grad_norm": 29.0338134765625, "learning_rate": 9.648809523809524e-06, "loss": 43.405, "step": 2641 }, { "epoch": 62.907462686567165, "grad_norm": 28.022174835205078, "learning_rate": 9.645833333333333e-06, "loss": 43.6733, "step": 2642 }, { "epoch": 62.93134328358209, "grad_norm": 19.517061233520508, "learning_rate": 9.642857142857144e-06, "loss": 44.2386, "step": 2643 }, { "epoch": 62.95522388059702, "grad_norm": 22.2393741607666, "learning_rate": 9.639880952380953e-06, "loss": 45.1874, "step": 2644 }, { "epoch": 62.97910447761194, "grad_norm": 27.02622413635254, "learning_rate": 9.636904761904762e-06, "loss": 43.8541, "step": 2645 }, { "epoch": 63.0, "grad_norm": 20.51211929321289, "learning_rate": 9.633928571428573e-06, "loss": 39.6487, "step": 2646 }, { "epoch": 63.02388059701492, "grad_norm": 24.02116584777832, "learning_rate": 9.630952380952382e-06, "loss": 44.5685, "step": 2647 }, { "epoch": 63.04776119402985, "grad_norm": 30.00434112548828, "learning_rate": 9.627976190476191e-06, "loss": 43.2549, "step": 2648 }, { "epoch": 63.071641791044776, "grad_norm": 23.16147804260254, "learning_rate": 9.625e-06, "loss": 44.1254, "step": 2649 }, { "epoch": 63.0955223880597, "grad_norm": 30.86275291442871, "learning_rate": 9.622023809523811e-06, "loss": 43.4804, "step": 2650 }, { "epoch": 63.11940298507463, "grad_norm": 27.942575454711914, "learning_rate": 9.61904761904762e-06, "loss": 44.4437, "step": 2651 }, { "epoch": 63.14328358208955, "grad_norm": 22.330169677734375, "learning_rate": 9.616071428571429e-06, "loss": 44.1067, "step": 2652 }, { "epoch": 63.167164179104475, "grad_norm": 27.878795623779297, "learning_rate": 9.61309523809524e-06, "loss": 42.5768, "step": 2653 }, { "epoch": 63.191044776119405, "grad_norm": 23.200098037719727, "learning_rate": 9.610119047619049e-06, "loss": 43.5906, "step": 2654 }, { "epoch": 63.21492537313433, "grad_norm": 23.872238159179688, "learning_rate": 9.607142857142858e-06, "loss": 43.8177, "step": 2655 }, { "epoch": 63.23880597014925, "grad_norm": 31.89397430419922, "learning_rate": 9.604166666666669e-06, "loss": 43.5719, "step": 2656 }, { "epoch": 63.26268656716418, "grad_norm": 24.745256423950195, "learning_rate": 9.601190476190478e-06, "loss": 43.1085, "step": 2657 }, { "epoch": 63.286567164179104, "grad_norm": 31.859682083129883, "learning_rate": 9.598214285714287e-06, "loss": 42.8871, "step": 2658 }, { "epoch": 63.31044776119403, "grad_norm": 25.792551040649414, "learning_rate": 9.595238095238096e-06, "loss": 42.7027, "step": 2659 }, { "epoch": 63.33432835820896, "grad_norm": 25.225967407226562, "learning_rate": 9.592261904761906e-06, "loss": 43.0075, "step": 2660 }, { "epoch": 63.35820895522388, "grad_norm": 27.146207809448242, "learning_rate": 9.589285714285716e-06, "loss": 44.3992, "step": 2661 }, { "epoch": 63.3820895522388, "grad_norm": 16.27069664001465, "learning_rate": 9.586309523809525e-06, "loss": 44.1708, "step": 2662 }, { "epoch": 63.40597014925373, "grad_norm": 34.79555892944336, "learning_rate": 9.583333333333335e-06, "loss": 44.4863, "step": 2663 }, { "epoch": 63.429850746268656, "grad_norm": 23.31925392150879, "learning_rate": 9.580357142857144e-06, "loss": 44.3615, "step": 2664 }, { "epoch": 63.45373134328358, "grad_norm": 28.239566802978516, "learning_rate": 9.577380952380953e-06, "loss": 45.042, "step": 2665 }, { "epoch": 63.47761194029851, "grad_norm": 25.107566833496094, "learning_rate": 9.574404761904762e-06, "loss": 44.4372, "step": 2666 }, { "epoch": 63.50149253731343, "grad_norm": 20.601322174072266, "learning_rate": 9.571428571428573e-06, "loss": 43.5807, "step": 2667 }, { "epoch": 63.525373134328355, "grad_norm": 34.94065475463867, "learning_rate": 9.568452380952382e-06, "loss": 42.8904, "step": 2668 }, { "epoch": 63.549253731343285, "grad_norm": 26.336591720581055, "learning_rate": 9.565476190476191e-06, "loss": 44.1117, "step": 2669 }, { "epoch": 63.57313432835821, "grad_norm": 35.81476974487305, "learning_rate": 9.562500000000002e-06, "loss": 44.847, "step": 2670 }, { "epoch": 63.59701492537313, "grad_norm": 21.011463165283203, "learning_rate": 9.559523809523811e-06, "loss": 43.9457, "step": 2671 }, { "epoch": 63.62089552238806, "grad_norm": 36.32665252685547, "learning_rate": 9.55654761904762e-06, "loss": 43.4279, "step": 2672 }, { "epoch": 63.644776119402984, "grad_norm": 21.384214401245117, "learning_rate": 9.55357142857143e-06, "loss": 45.3947, "step": 2673 }, { "epoch": 63.668656716417914, "grad_norm": 38.893680572509766, "learning_rate": 9.55059523809524e-06, "loss": 43.9397, "step": 2674 }, { "epoch": 63.69253731343284, "grad_norm": 26.085948944091797, "learning_rate": 9.547619047619049e-06, "loss": 43.118, "step": 2675 }, { "epoch": 63.71641791044776, "grad_norm": 35.67828369140625, "learning_rate": 9.544642857142858e-06, "loss": 44.8236, "step": 2676 }, { "epoch": 63.74029850746269, "grad_norm": 25.065685272216797, "learning_rate": 9.541666666666669e-06, "loss": 43.8344, "step": 2677 }, { "epoch": 63.76417910447761, "grad_norm": 26.32991600036621, "learning_rate": 9.538690476190478e-06, "loss": 45.5309, "step": 2678 }, { "epoch": 63.788059701492536, "grad_norm": 26.833250045776367, "learning_rate": 9.535714285714287e-06, "loss": 43.7626, "step": 2679 }, { "epoch": 63.811940298507466, "grad_norm": 23.64604949951172, "learning_rate": 9.532738095238096e-06, "loss": 43.7545, "step": 2680 }, { "epoch": 63.83582089552239, "grad_norm": 24.57122230529785, "learning_rate": 9.529761904761905e-06, "loss": 43.5666, "step": 2681 }, { "epoch": 63.85970149253731, "grad_norm": 21.429603576660156, "learning_rate": 9.526785714285714e-06, "loss": 44.8421, "step": 2682 }, { "epoch": 63.88358208955224, "grad_norm": NaN, "learning_rate": 9.523809523809525e-06, "loss": 77.6969, "step": 2683 }, { "epoch": 63.907462686567165, "grad_norm": 25.843442916870117, "learning_rate": 9.523809523809525e-06, "loss": 42.4353, "step": 2684 }, { "epoch": 63.93134328358209, "grad_norm": 26.74856185913086, "learning_rate": 9.520833333333334e-06, "loss": 45.0478, "step": 2685 }, { "epoch": 63.95522388059702, "grad_norm": 22.9956111907959, "learning_rate": 9.517857142857143e-06, "loss": 44.3383, "step": 2686 }, { "epoch": 63.97910447761194, "grad_norm": 17.62372589111328, "learning_rate": 9.514880952380952e-06, "loss": 43.2765, "step": 2687 }, { "epoch": 64.0, "grad_norm": 21.372318267822266, "learning_rate": 9.511904761904763e-06, "loss": 37.8049, "step": 2688 }, { "epoch": 64.02388059701492, "grad_norm": 23.412595748901367, "learning_rate": 9.508928571428572e-06, "loss": 45.446, "step": 2689 }, { "epoch": 64.04776119402985, "grad_norm": 21.825000762939453, "learning_rate": 9.50595238095238e-06, "loss": 42.871, "step": 2690 }, { "epoch": 64.07164179104478, "grad_norm": 18.50835418701172, "learning_rate": 9.502976190476191e-06, "loss": 43.1485, "step": 2691 }, { "epoch": 64.0955223880597, "grad_norm": 22.428272247314453, "learning_rate": 9.5e-06, "loss": 42.7172, "step": 2692 }, { "epoch": 64.11940298507463, "grad_norm": 19.58050537109375, "learning_rate": 9.49702380952381e-06, "loss": 43.4599, "step": 2693 }, { "epoch": 64.14328358208955, "grad_norm": 21.66231346130371, "learning_rate": 9.494047619047619e-06, "loss": 43.601, "step": 2694 }, { "epoch": 64.16716417910447, "grad_norm": 29.5888729095459, "learning_rate": 9.49107142857143e-06, "loss": 44.8395, "step": 2695 }, { "epoch": 64.1910447761194, "grad_norm": 16.3875675201416, "learning_rate": 9.488095238095238e-06, "loss": 43.8201, "step": 2696 }, { "epoch": 64.21492537313434, "grad_norm": 28.326553344726562, "learning_rate": 9.485119047619047e-06, "loss": 45.1189, "step": 2697 }, { "epoch": 64.23880597014926, "grad_norm": 20.549386978149414, "learning_rate": 9.482142857142858e-06, "loss": 44.0127, "step": 2698 }, { "epoch": 64.26268656716418, "grad_norm": 25.79012680053711, "learning_rate": 9.479166666666667e-06, "loss": 43.0571, "step": 2699 }, { "epoch": 64.2865671641791, "grad_norm": 31.000024795532227, "learning_rate": 9.476190476190476e-06, "loss": 42.4615, "step": 2700 }, { "epoch": 64.31044776119403, "grad_norm": 19.49623680114746, "learning_rate": 9.473214285714285e-06, "loss": 45.6714, "step": 2701 }, { "epoch": 64.33432835820895, "grad_norm": 23.13125991821289, "learning_rate": 9.470238095238096e-06, "loss": 44.1373, "step": 2702 }, { "epoch": 64.35820895522389, "grad_norm": 32.59320068359375, "learning_rate": 9.467261904761905e-06, "loss": 42.5976, "step": 2703 }, { "epoch": 64.38208955223881, "grad_norm": 19.396995544433594, "learning_rate": 9.464285714285714e-06, "loss": 43.9782, "step": 2704 }, { "epoch": 64.40597014925373, "grad_norm": 28.275136947631836, "learning_rate": 9.461309523809525e-06, "loss": 44.3116, "step": 2705 }, { "epoch": 64.42985074626866, "grad_norm": 25.157663345336914, "learning_rate": 9.458333333333334e-06, "loss": 45.07, "step": 2706 }, { "epoch": 64.45373134328358, "grad_norm": 22.684513092041016, "learning_rate": 9.455357142857143e-06, "loss": 44.1489, "step": 2707 }, { "epoch": 64.4776119402985, "grad_norm": 19.41883659362793, "learning_rate": 9.452380952380952e-06, "loss": 43.5031, "step": 2708 }, { "epoch": 64.50149253731344, "grad_norm": 27.202308654785156, "learning_rate": 9.449404761904763e-06, "loss": 44.4811, "step": 2709 }, { "epoch": 64.52537313432836, "grad_norm": 17.535995483398438, "learning_rate": 9.446428571428572e-06, "loss": 43.7648, "step": 2710 }, { "epoch": 64.54925373134328, "grad_norm": 21.47702980041504, "learning_rate": 9.443452380952381e-06, "loss": 42.7421, "step": 2711 }, { "epoch": 64.57313432835821, "grad_norm": 20.23499870300293, "learning_rate": 9.440476190476192e-06, "loss": 43.8339, "step": 2712 }, { "epoch": 64.59701492537313, "grad_norm": 19.41843032836914, "learning_rate": 9.4375e-06, "loss": 44.182, "step": 2713 }, { "epoch": 64.62089552238805, "grad_norm": 22.892518997192383, "learning_rate": 9.43452380952381e-06, "loss": 42.7459, "step": 2714 }, { "epoch": 64.64477611940299, "grad_norm": 25.601083755493164, "learning_rate": 9.431547619047619e-06, "loss": 44.316, "step": 2715 }, { "epoch": 64.66865671641791, "grad_norm": 20.23451042175293, "learning_rate": 9.42857142857143e-06, "loss": 44.6613, "step": 2716 }, { "epoch": 64.69253731343284, "grad_norm": 16.326499938964844, "learning_rate": 9.425595238095239e-06, "loss": 43.9092, "step": 2717 }, { "epoch": 64.71641791044776, "grad_norm": 29.170324325561523, "learning_rate": 9.422619047619048e-06, "loss": 42.957, "step": 2718 }, { "epoch": 64.74029850746268, "grad_norm": 24.257295608520508, "learning_rate": 9.419642857142858e-06, "loss": 44.119, "step": 2719 }, { "epoch": 64.7641791044776, "grad_norm": 21.303083419799805, "learning_rate": 9.416666666666667e-06, "loss": 43.4882, "step": 2720 }, { "epoch": 64.78805970149254, "grad_norm": 20.77082633972168, "learning_rate": 9.413690476190476e-06, "loss": 43.9079, "step": 2721 }, { "epoch": 64.81194029850747, "grad_norm": 24.470279693603516, "learning_rate": 9.410714285714286e-06, "loss": 45.0313, "step": 2722 }, { "epoch": 64.83582089552239, "grad_norm": 22.445308685302734, "learning_rate": 9.407738095238096e-06, "loss": 43.0798, "step": 2723 }, { "epoch": 64.85970149253731, "grad_norm": 27.02490234375, "learning_rate": 9.404761904761905e-06, "loss": 43.1318, "step": 2724 }, { "epoch": 64.88358208955223, "grad_norm": 22.678592681884766, "learning_rate": 9.401785714285714e-06, "loss": 44.1473, "step": 2725 }, { "epoch": 64.90746268656716, "grad_norm": 26.62460708618164, "learning_rate": 9.398809523809525e-06, "loss": 43.2439, "step": 2726 }, { "epoch": 64.9313432835821, "grad_norm": 24.331209182739258, "learning_rate": 9.395833333333334e-06, "loss": 43.517, "step": 2727 }, { "epoch": 64.95522388059702, "grad_norm": 20.00579071044922, "learning_rate": 9.392857142857143e-06, "loss": 44.7679, "step": 2728 }, { "epoch": 64.97910447761194, "grad_norm": 28.423246383666992, "learning_rate": 9.389880952380954e-06, "loss": 43.5171, "step": 2729 }, { "epoch": 65.0, "grad_norm": 22.855792999267578, "learning_rate": 9.386904761904763e-06, "loss": 38.3847, "step": 2730 }, { "epoch": 65.02388059701492, "grad_norm": 22.053749084472656, "learning_rate": 9.383928571428572e-06, "loss": 44.0032, "step": 2731 }, { "epoch": 65.04776119402985, "grad_norm": 24.45530891418457, "learning_rate": 9.380952380952381e-06, "loss": 43.4024, "step": 2732 }, { "epoch": 65.07164179104478, "grad_norm": 24.508438110351562, "learning_rate": 9.377976190476192e-06, "loss": 43.4435, "step": 2733 }, { "epoch": 65.0955223880597, "grad_norm": 22.03391456604004, "learning_rate": 9.375000000000001e-06, "loss": 43.413, "step": 2734 }, { "epoch": 65.11940298507463, "grad_norm": 23.95793342590332, "learning_rate": 9.37202380952381e-06, "loss": 44.1908, "step": 2735 }, { "epoch": 65.14328358208955, "grad_norm": 23.00299072265625, "learning_rate": 9.36904761904762e-06, "loss": 43.9526, "step": 2736 }, { "epoch": 65.16716417910447, "grad_norm": 21.019451141357422, "learning_rate": 9.36607142857143e-06, "loss": 44.114, "step": 2737 }, { "epoch": 65.1910447761194, "grad_norm": 21.974138259887695, "learning_rate": 9.363095238095239e-06, "loss": 43.8516, "step": 2738 }, { "epoch": 65.21492537313434, "grad_norm": 17.860519409179688, "learning_rate": 9.360119047619048e-06, "loss": 44.2829, "step": 2739 }, { "epoch": 65.23880597014926, "grad_norm": 18.76349639892578, "learning_rate": 9.357142857142859e-06, "loss": 42.7555, "step": 2740 }, { "epoch": 65.26268656716418, "grad_norm": 22.4278621673584, "learning_rate": 9.354166666666668e-06, "loss": 43.807, "step": 2741 }, { "epoch": 65.2865671641791, "grad_norm": 17.867431640625, "learning_rate": 9.351190476190477e-06, "loss": 43.474, "step": 2742 }, { "epoch": 65.31044776119403, "grad_norm": 16.060117721557617, "learning_rate": 9.348214285714287e-06, "loss": 44.2362, "step": 2743 }, { "epoch": 65.33432835820895, "grad_norm": 27.756179809570312, "learning_rate": 9.345238095238096e-06, "loss": 43.9566, "step": 2744 }, { "epoch": 65.35820895522389, "grad_norm": 18.937381744384766, "learning_rate": 9.342261904761905e-06, "loss": 42.2936, "step": 2745 }, { "epoch": 65.38208955223881, "grad_norm": 23.91965675354004, "learning_rate": 9.339285714285715e-06, "loss": 43.1194, "step": 2746 }, { "epoch": 65.40597014925373, "grad_norm": 28.585317611694336, "learning_rate": 9.336309523809525e-06, "loss": 43.7419, "step": 2747 }, { "epoch": 65.42985074626866, "grad_norm": 18.788578033447266, "learning_rate": 9.333333333333334e-06, "loss": 43.7811, "step": 2748 }, { "epoch": 65.45373134328358, "grad_norm": 24.84532928466797, "learning_rate": 9.330357142857143e-06, "loss": 44.4898, "step": 2749 }, { "epoch": 65.4776119402985, "grad_norm": 20.880659103393555, "learning_rate": 9.327380952380954e-06, "loss": 44.5627, "step": 2750 }, { "epoch": 65.50149253731344, "grad_norm": 18.502254486083984, "learning_rate": 9.324404761904763e-06, "loss": 43.621, "step": 2751 }, { "epoch": 65.52537313432836, "grad_norm": 23.150991439819336, "learning_rate": 9.321428571428572e-06, "loss": 43.9683, "step": 2752 }, { "epoch": 65.54925373134328, "grad_norm": 20.03653907775879, "learning_rate": 9.318452380952381e-06, "loss": 42.4545, "step": 2753 }, { "epoch": 65.57313432835821, "grad_norm": 24.8642635345459, "learning_rate": 9.315476190476192e-06, "loss": 43.1368, "step": 2754 }, { "epoch": 65.59701492537313, "grad_norm": 19.812273025512695, "learning_rate": 9.312500000000001e-06, "loss": 44.5991, "step": 2755 }, { "epoch": 65.62089552238805, "grad_norm": 20.746320724487305, "learning_rate": 9.30952380952381e-06, "loss": 42.3573, "step": 2756 }, { "epoch": 65.64477611940299, "grad_norm": 28.684810638427734, "learning_rate": 9.30654761904762e-06, "loss": 43.798, "step": 2757 }, { "epoch": 65.66865671641791, "grad_norm": 17.441326141357422, "learning_rate": 9.30357142857143e-06, "loss": 44.0526, "step": 2758 }, { "epoch": 65.69253731343284, "grad_norm": 27.091472625732422, "learning_rate": 9.300595238095239e-06, "loss": 43.5748, "step": 2759 }, { "epoch": 65.71641791044776, "grad_norm": 23.270544052124023, "learning_rate": 9.297619047619048e-06, "loss": 44.0027, "step": 2760 }, { "epoch": 65.74029850746268, "grad_norm": 28.322011947631836, "learning_rate": 9.294642857142859e-06, "loss": 44.4845, "step": 2761 }, { "epoch": 65.7641791044776, "grad_norm": 22.097503662109375, "learning_rate": 9.291666666666668e-06, "loss": 42.6655, "step": 2762 }, { "epoch": 65.78805970149254, "grad_norm": 28.492340087890625, "learning_rate": 9.288690476190477e-06, "loss": 44.669, "step": 2763 }, { "epoch": 65.81194029850747, "grad_norm": 18.208921432495117, "learning_rate": 9.285714285714288e-06, "loss": 44.1797, "step": 2764 }, { "epoch": 65.83582089552239, "grad_norm": 32.15492248535156, "learning_rate": 9.282738095238097e-06, "loss": 43.1981, "step": 2765 }, { "epoch": 65.85970149253731, "grad_norm": 22.335176467895508, "learning_rate": 9.279761904761906e-06, "loss": 43.0925, "step": 2766 }, { "epoch": 65.88358208955223, "grad_norm": 26.412460327148438, "learning_rate": 9.276785714285715e-06, "loss": 43.3175, "step": 2767 }, { "epoch": 65.90746268656716, "grad_norm": 21.380569458007812, "learning_rate": 9.273809523809525e-06, "loss": 45.7111, "step": 2768 }, { "epoch": 65.9313432835821, "grad_norm": 26.568763732910156, "learning_rate": 9.270833333333334e-06, "loss": 44.4841, "step": 2769 }, { "epoch": 65.95522388059702, "grad_norm": 22.947973251342773, "learning_rate": 9.267857142857144e-06, "loss": 44.0597, "step": 2770 }, { "epoch": 65.97910447761194, "grad_norm": 28.732847213745117, "learning_rate": 9.264880952380954e-06, "loss": 43.9232, "step": 2771 }, { "epoch": 66.0, "grad_norm": 19.51029396057129, "learning_rate": 9.261904761904763e-06, "loss": 38.3696, "step": 2772 }, { "epoch": 66.02388059701492, "grad_norm": 26.772396087646484, "learning_rate": 9.258928571428572e-06, "loss": 44.29, "step": 2773 }, { "epoch": 66.04776119402985, "grad_norm": 28.08762550354004, "learning_rate": 9.255952380952381e-06, "loss": 43.5123, "step": 2774 }, { "epoch": 66.07164179104478, "grad_norm": 23.839458465576172, "learning_rate": 9.252976190476192e-06, "loss": 42.9248, "step": 2775 }, { "epoch": 66.0955223880597, "grad_norm": 34.98361587524414, "learning_rate": 9.250000000000001e-06, "loss": 44.5183, "step": 2776 }, { "epoch": 66.11940298507463, "grad_norm": 18.406028747558594, "learning_rate": 9.24702380952381e-06, "loss": 43.6267, "step": 2777 }, { "epoch": 66.14328358208955, "grad_norm": 24.17736053466797, "learning_rate": 9.244047619047621e-06, "loss": 43.9814, "step": 2778 }, { "epoch": 66.16716417910447, "grad_norm": 25.904033660888672, "learning_rate": 9.24107142857143e-06, "loss": 44.2089, "step": 2779 }, { "epoch": 66.1910447761194, "grad_norm": 18.518312454223633, "learning_rate": 9.238095238095239e-06, "loss": 43.8829, "step": 2780 }, { "epoch": 66.21492537313434, "grad_norm": 13.93060302734375, "learning_rate": 9.235119047619048e-06, "loss": 43.0088, "step": 2781 }, { "epoch": 66.23880597014926, "grad_norm": 18.91266632080078, "learning_rate": 9.232142857142859e-06, "loss": 43.9835, "step": 2782 }, { "epoch": 66.26268656716418, "grad_norm": 22.63692283630371, "learning_rate": 9.229166666666668e-06, "loss": 43.8378, "step": 2783 }, { "epoch": 66.2865671641791, "grad_norm": 19.935054779052734, "learning_rate": 9.226190476190477e-06, "loss": 43.5139, "step": 2784 }, { "epoch": 66.31044776119403, "grad_norm": 20.09627342224121, "learning_rate": 9.223214285714288e-06, "loss": 42.9882, "step": 2785 }, { "epoch": 66.33432835820895, "grad_norm": 16.47371482849121, "learning_rate": 9.220238095238097e-06, "loss": 44.0665, "step": 2786 }, { "epoch": 66.35820895522389, "grad_norm": 25.363866806030273, "learning_rate": 9.217261904761904e-06, "loss": 44.696, "step": 2787 }, { "epoch": 66.38208955223881, "grad_norm": 19.95291519165039, "learning_rate": 9.214285714285715e-06, "loss": 44.1116, "step": 2788 }, { "epoch": 66.40597014925373, "grad_norm": NaN, "learning_rate": 9.211309523809524e-06, "loss": 76.4785, "step": 2789 }, { "epoch": 66.42985074626866, "grad_norm": 19.490074157714844, "learning_rate": 9.211309523809524e-06, "loss": 44.0432, "step": 2790 }, { "epoch": 66.45373134328358, "grad_norm": 17.4990234375, "learning_rate": 9.208333333333333e-06, "loss": 43.2972, "step": 2791 }, { "epoch": 66.4776119402985, "grad_norm": 18.9461727142334, "learning_rate": 9.205357142857144e-06, "loss": 43.6698, "step": 2792 }, { "epoch": 66.50149253731344, "grad_norm": 27.035369873046875, "learning_rate": 9.202380952380953e-06, "loss": 43.0748, "step": 2793 }, { "epoch": 66.52537313432836, "grad_norm": 18.747451782226562, "learning_rate": 9.199404761904762e-06, "loss": 43.4684, "step": 2794 }, { "epoch": 66.54925373134328, "grad_norm": 22.31947135925293, "learning_rate": 9.196428571428571e-06, "loss": 43.224, "step": 2795 }, { "epoch": 66.57313432835821, "grad_norm": 20.444355010986328, "learning_rate": 9.193452380952382e-06, "loss": 44.7154, "step": 2796 }, { "epoch": 66.59701492537313, "grad_norm": 20.574586868286133, "learning_rate": 9.19047619047619e-06, "loss": 42.8251, "step": 2797 }, { "epoch": 66.62089552238805, "grad_norm": 21.91870880126953, "learning_rate": 9.1875e-06, "loss": 42.1616, "step": 2798 }, { "epoch": 66.64477611940299, "grad_norm": 20.03777503967285, "learning_rate": 9.18452380952381e-06, "loss": 43.9713, "step": 2799 }, { "epoch": 66.66865671641791, "grad_norm": 25.128442764282227, "learning_rate": 9.18154761904762e-06, "loss": 43.1631, "step": 2800 }, { "epoch": 66.69253731343284, "grad_norm": 21.742931365966797, "learning_rate": 9.178571428571429e-06, "loss": 43.8442, "step": 2801 }, { "epoch": 66.71641791044776, "grad_norm": 25.366992950439453, "learning_rate": 9.17559523809524e-06, "loss": 42.6068, "step": 2802 }, { "epoch": 66.74029850746268, "grad_norm": 22.109886169433594, "learning_rate": 9.172619047619048e-06, "loss": 43.0879, "step": 2803 }, { "epoch": 66.7641791044776, "grad_norm": 26.36429786682129, "learning_rate": 9.169642857142857e-06, "loss": 43.9465, "step": 2804 }, { "epoch": 66.78805970149254, "grad_norm": 20.30027198791504, "learning_rate": 9.166666666666666e-06, "loss": 44.1096, "step": 2805 }, { "epoch": 66.81194029850747, "grad_norm": 25.123811721801758, "learning_rate": 9.163690476190477e-06, "loss": 44.2981, "step": 2806 }, { "epoch": 66.83582089552239, "grad_norm": 23.127016067504883, "learning_rate": 9.160714285714286e-06, "loss": 42.5751, "step": 2807 }, { "epoch": 66.85970149253731, "grad_norm": NaN, "learning_rate": 9.157738095238095e-06, "loss": 66.1901, "step": 2808 }, { "epoch": 66.88358208955223, "grad_norm": 26.10099220275879, "learning_rate": 9.157738095238095e-06, "loss": 44.763, "step": 2809 }, { "epoch": 66.90746268656716, "grad_norm": 23.104337692260742, "learning_rate": 9.154761904761906e-06, "loss": 43.0964, "step": 2810 }, { "epoch": 66.9313432835821, "grad_norm": 25.94508934020996, "learning_rate": 9.151785714285715e-06, "loss": 44.2004, "step": 2811 }, { "epoch": 66.95522388059702, "grad_norm": 19.599328994750977, "learning_rate": 9.148809523809524e-06, "loss": 43.9027, "step": 2812 }, { "epoch": 66.97910447761194, "grad_norm": NaN, "learning_rate": 9.145833333333333e-06, "loss": 53.695, "step": 2813 }, { "epoch": 67.0, "grad_norm": 26.630434036254883, "learning_rate": 9.145833333333333e-06, "loss": 39.2172, "step": 2814 }, { "epoch": 67.02388059701492, "grad_norm": 20.954557418823242, "learning_rate": 9.142857142857144e-06, "loss": 45.022, "step": 2815 }, { "epoch": 67.04776119402985, "grad_norm": 34.23554611206055, "learning_rate": 9.139880952380953e-06, "loss": 44.5962, "step": 2816 }, { "epoch": 67.07164179104478, "grad_norm": 23.212488174438477, "learning_rate": 9.136904761904762e-06, "loss": 43.3898, "step": 2817 }, { "epoch": 67.0955223880597, "grad_norm": 28.811594009399414, "learning_rate": 9.133928571428573e-06, "loss": 43.0525, "step": 2818 }, { "epoch": 67.11940298507463, "grad_norm": 25.612319946289062, "learning_rate": 9.130952380952382e-06, "loss": 45.0229, "step": 2819 }, { "epoch": 67.14328358208955, "grad_norm": 19.928409576416016, "learning_rate": 9.12797619047619e-06, "loss": 42.2313, "step": 2820 }, { "epoch": 67.16716417910447, "grad_norm": 21.425382614135742, "learning_rate": 9.125e-06, "loss": 43.8085, "step": 2821 }, { "epoch": 67.1910447761194, "grad_norm": 24.726892471313477, "learning_rate": 9.12202380952381e-06, "loss": 42.5952, "step": 2822 }, { "epoch": 67.21492537313434, "grad_norm": 21.010210037231445, "learning_rate": 9.11904761904762e-06, "loss": 44.5508, "step": 2823 }, { "epoch": 67.23880597014926, "grad_norm": 17.506437301635742, "learning_rate": 9.116071428571429e-06, "loss": 42.7668, "step": 2824 }, { "epoch": 67.26268656716418, "grad_norm": 20.494918823242188, "learning_rate": 9.11309523809524e-06, "loss": 42.8061, "step": 2825 }, { "epoch": 67.2865671641791, "grad_norm": 20.985994338989258, "learning_rate": 9.110119047619049e-06, "loss": 44.8666, "step": 2826 }, { "epoch": 67.31044776119403, "grad_norm": 22.91364097595215, "learning_rate": 9.107142857142858e-06, "loss": 44.1208, "step": 2827 }, { "epoch": 67.33432835820895, "grad_norm": 19.81299591064453, "learning_rate": 9.104166666666667e-06, "loss": 43.939, "step": 2828 }, { "epoch": 67.35820895522389, "grad_norm": 23.234989166259766, "learning_rate": 9.101190476190477e-06, "loss": 42.0411, "step": 2829 }, { "epoch": 67.38208955223881, "grad_norm": 22.17540168762207, "learning_rate": 9.098214285714286e-06, "loss": 43.5693, "step": 2830 }, { "epoch": 67.40597014925373, "grad_norm": 21.292728424072266, "learning_rate": 9.095238095238095e-06, "loss": 44.0742, "step": 2831 }, { "epoch": 67.42985074626866, "grad_norm": 28.952625274658203, "learning_rate": 9.092261904761906e-06, "loss": 42.8393, "step": 2832 }, { "epoch": 67.45373134328358, "grad_norm": 19.387126922607422, "learning_rate": 9.089285714285715e-06, "loss": 41.7117, "step": 2833 }, { "epoch": 67.4776119402985, "grad_norm": 23.430946350097656, "learning_rate": 9.086309523809524e-06, "loss": 42.7006, "step": 2834 }, { "epoch": 67.50149253731344, "grad_norm": 28.108659744262695, "learning_rate": 9.083333333333333e-06, "loss": 45.417, "step": 2835 }, { "epoch": 67.52537313432836, "grad_norm": 22.115737915039062, "learning_rate": 9.080357142857144e-06, "loss": 44.4444, "step": 2836 }, { "epoch": 67.54925373134328, "grad_norm": 29.82137107849121, "learning_rate": 9.077380952380953e-06, "loss": 43.4888, "step": 2837 }, { "epoch": 67.57313432835821, "grad_norm": 25.010299682617188, "learning_rate": 9.074404761904762e-06, "loss": 43.9609, "step": 2838 }, { "epoch": 67.59701492537313, "grad_norm": 21.027952194213867, "learning_rate": 9.071428571428573e-06, "loss": 44.2489, "step": 2839 }, { "epoch": 67.62089552238805, "grad_norm": 27.009239196777344, "learning_rate": 9.068452380952382e-06, "loss": 43.6564, "step": 2840 }, { "epoch": 67.64477611940299, "grad_norm": 19.743545532226562, "learning_rate": 9.065476190476191e-06, "loss": 43.9997, "step": 2841 }, { "epoch": 67.66865671641791, "grad_norm": 28.90030288696289, "learning_rate": 9.0625e-06, "loss": 42.6926, "step": 2842 }, { "epoch": 67.69253731343284, "grad_norm": 25.418079376220703, "learning_rate": 9.05952380952381e-06, "loss": 43.2036, "step": 2843 }, { "epoch": 67.71641791044776, "grad_norm": 18.400348663330078, "learning_rate": 9.05654761904762e-06, "loss": 44.4565, "step": 2844 }, { "epoch": 67.74029850746268, "grad_norm": 26.924072265625, "learning_rate": 9.053571428571429e-06, "loss": 44.4893, "step": 2845 }, { "epoch": 67.7641791044776, "grad_norm": 25.352108001708984, "learning_rate": 9.05059523809524e-06, "loss": 43.0946, "step": 2846 }, { "epoch": 67.78805970149254, "grad_norm": 19.23507308959961, "learning_rate": 9.047619047619049e-06, "loss": 43.0507, "step": 2847 }, { "epoch": 67.81194029850747, "grad_norm": 28.143768310546875, "learning_rate": 9.044642857142858e-06, "loss": 43.2408, "step": 2848 }, { "epoch": 67.83582089552239, "grad_norm": 20.298084259033203, "learning_rate": 9.041666666666667e-06, "loss": 44.288, "step": 2849 }, { "epoch": 67.85970149253731, "grad_norm": 21.65508460998535, "learning_rate": 9.038690476190478e-06, "loss": 43.4648, "step": 2850 }, { "epoch": 67.88358208955223, "grad_norm": 23.92845916748047, "learning_rate": 9.035714285714287e-06, "loss": 42.1811, "step": 2851 }, { "epoch": 67.90746268656716, "grad_norm": 20.545076370239258, "learning_rate": 9.032738095238096e-06, "loss": 42.9022, "step": 2852 }, { "epoch": 67.9313432835821, "grad_norm": 32.36678695678711, "learning_rate": 9.029761904761906e-06, "loss": 43.4491, "step": 2853 }, { "epoch": 67.95522388059702, "grad_norm": 23.14188003540039, "learning_rate": 9.026785714285715e-06, "loss": 44.4911, "step": 2854 }, { "epoch": 67.97910447761194, "grad_norm": 31.488239288330078, "learning_rate": 9.023809523809524e-06, "loss": 44.5784, "step": 2855 }, { "epoch": 68.0, "grad_norm": 24.96849250793457, "learning_rate": 9.020833333333334e-06, "loss": 38.8154, "step": 2856 }, { "epoch": 68.02388059701492, "grad_norm": 26.00245475769043, "learning_rate": 9.017857142857144e-06, "loss": 43.6339, "step": 2857 }, { "epoch": 68.04776119402985, "grad_norm": 33.977596282958984, "learning_rate": 9.014880952380953e-06, "loss": 43.7634, "step": 2858 }, { "epoch": 68.07164179104478, "grad_norm": 21.762340545654297, "learning_rate": 9.011904761904762e-06, "loss": 43.8865, "step": 2859 }, { "epoch": 68.0955223880597, "grad_norm": 38.268455505371094, "learning_rate": 9.008928571428573e-06, "loss": 43.8947, "step": 2860 }, { "epoch": 68.11940298507463, "grad_norm": 26.789215087890625, "learning_rate": 9.005952380952382e-06, "loss": 42.0072, "step": 2861 }, { "epoch": 68.14328358208955, "grad_norm": 44.15632629394531, "learning_rate": 9.002976190476191e-06, "loss": 43.1045, "step": 2862 }, { "epoch": 68.16716417910447, "grad_norm": 36.71260070800781, "learning_rate": 9e-06, "loss": 43.7232, "step": 2863 }, { "epoch": 68.1910447761194, "grad_norm": 38.94734191894531, "learning_rate": 8.997023809523811e-06, "loss": 43.4934, "step": 2864 }, { "epoch": 68.21492537313434, "grad_norm": 38.295501708984375, "learning_rate": 8.99404761904762e-06, "loss": 43.3372, "step": 2865 }, { "epoch": 68.23880597014926, "grad_norm": 30.496740341186523, "learning_rate": 8.991071428571429e-06, "loss": 43.4905, "step": 2866 }, { "epoch": 68.26268656716418, "grad_norm": 30.001113891601562, "learning_rate": 8.98809523809524e-06, "loss": 44.0486, "step": 2867 }, { "epoch": 68.2865671641791, "grad_norm": 39.6522216796875, "learning_rate": 8.985119047619049e-06, "loss": 43.4828, "step": 2868 }, { "epoch": 68.31044776119403, "grad_norm": 31.28143310546875, "learning_rate": 8.982142857142858e-06, "loss": 43.7613, "step": 2869 }, { "epoch": 68.33432835820895, "grad_norm": 31.950016021728516, "learning_rate": 8.979166666666667e-06, "loss": 43.4209, "step": 2870 }, { "epoch": 68.35820895522389, "grad_norm": 30.61543083190918, "learning_rate": 8.976190476190478e-06, "loss": 43.2437, "step": 2871 }, { "epoch": 68.38208955223881, "grad_norm": 39.3588752746582, "learning_rate": 8.973214285714287e-06, "loss": 43.1893, "step": 2872 }, { "epoch": 68.40597014925373, "grad_norm": 29.70042610168457, "learning_rate": 8.970238095238096e-06, "loss": 42.1193, "step": 2873 }, { "epoch": 68.42985074626866, "grad_norm": 40.6136474609375, "learning_rate": 8.967261904761907e-06, "loss": 41.7532, "step": 2874 }, { "epoch": 68.45373134328358, "grad_norm": 36.44509506225586, "learning_rate": 8.964285714285716e-06, "loss": 44.5191, "step": 2875 }, { "epoch": 68.4776119402985, "grad_norm": NaN, "learning_rate": 8.961309523809525e-06, "loss": 70.4286, "step": 2876 }, { "epoch": 68.50149253731344, "grad_norm": 27.74057960510254, "learning_rate": 8.961309523809525e-06, "loss": 43.254, "step": 2877 }, { "epoch": 68.52537313432836, "grad_norm": 29.346860885620117, "learning_rate": 8.958333333333334e-06, "loss": 43.5863, "step": 2878 }, { "epoch": 68.54925373134328, "grad_norm": 36.642398834228516, "learning_rate": 8.955357142857144e-06, "loss": 43.3733, "step": 2879 }, { "epoch": 68.57313432835821, "grad_norm": 33.670162200927734, "learning_rate": 8.952380952380953e-06, "loss": 43.7232, "step": 2880 }, { "epoch": 68.59701492537313, "grad_norm": 33.738712310791016, "learning_rate": 8.949404761904763e-06, "loss": 42.704, "step": 2881 }, { "epoch": 68.62089552238805, "grad_norm": 31.452713012695312, "learning_rate": 8.946428571428573e-06, "loss": 43.8197, "step": 2882 }, { "epoch": 68.64477611940299, "grad_norm": 35.86618423461914, "learning_rate": 8.943452380952382e-06, "loss": 44.9871, "step": 2883 }, { "epoch": 68.66865671641791, "grad_norm": 30.94584083557129, "learning_rate": 8.940476190476191e-06, "loss": 43.8781, "step": 2884 }, { "epoch": 68.69253731343284, "grad_norm": 30.81380271911621, "learning_rate": 8.9375e-06, "loss": 44.2209, "step": 2885 }, { "epoch": 68.71641791044776, "grad_norm": 28.51966094970703, "learning_rate": 8.934523809523811e-06, "loss": 44.16, "step": 2886 }, { "epoch": 68.74029850746268, "grad_norm": NaN, "learning_rate": 8.93154761904762e-06, "loss": 77.0854, "step": 2887 }, { "epoch": 68.7641791044776, "grad_norm": 34.56461715698242, "learning_rate": 8.93154761904762e-06, "loss": 42.7519, "step": 2888 }, { "epoch": 68.78805970149254, "grad_norm": 27.781518936157227, "learning_rate": 8.92857142857143e-06, "loss": 44.0645, "step": 2889 }, { "epoch": 68.81194029850747, "grad_norm": 33.2479133605957, "learning_rate": 8.92559523809524e-06, "loss": 44.109, "step": 2890 }, { "epoch": 68.83582089552239, "grad_norm": 30.329626083374023, "learning_rate": 8.922619047619049e-06, "loss": 42.8678, "step": 2891 }, { "epoch": 68.85970149253731, "grad_norm": 32.120269775390625, "learning_rate": 8.919642857142858e-06, "loss": 44.2325, "step": 2892 }, { "epoch": 68.88358208955223, "grad_norm": 27.283164978027344, "learning_rate": 8.916666666666667e-06, "loss": 43.7788, "step": 2893 }, { "epoch": 68.90746268656716, "grad_norm": 31.86570930480957, "learning_rate": 8.913690476190478e-06, "loss": 44.3469, "step": 2894 }, { "epoch": 68.9313432835821, "grad_norm": 22.55097007751465, "learning_rate": 8.910714285714287e-06, "loss": 43.85, "step": 2895 }, { "epoch": 68.95522388059702, "grad_norm": 34.648773193359375, "learning_rate": 8.907738095238096e-06, "loss": 44.526, "step": 2896 }, { "epoch": 68.97910447761194, "grad_norm": 26.3565731048584, "learning_rate": 8.904761904761905e-06, "loss": 44.2, "step": 2897 }, { "epoch": 69.0, "grad_norm": 34.48598098754883, "learning_rate": 8.901785714285714e-06, "loss": 37.5511, "step": 2898 }, { "epoch": 69.02388059701492, "grad_norm": 36.6775016784668, "learning_rate": 8.898809523809525e-06, "loss": 42.6231, "step": 2899 }, { "epoch": 69.04776119402985, "grad_norm": 33.529296875, "learning_rate": 8.895833333333334e-06, "loss": 43.2504, "step": 2900 }, { "epoch": 69.07164179104478, "grad_norm": 31.762542724609375, "learning_rate": 8.892857142857143e-06, "loss": 43.752, "step": 2901 }, { "epoch": 69.0955223880597, "grad_norm": 28.147245407104492, "learning_rate": 8.889880952380952e-06, "loss": 43.8319, "step": 2902 }, { "epoch": 69.11940298507463, "grad_norm": 27.896669387817383, "learning_rate": 8.886904761904763e-06, "loss": 42.1947, "step": 2903 }, { "epoch": 69.14328358208955, "grad_norm": 28.625850677490234, "learning_rate": 8.883928571428572e-06, "loss": 43.3085, "step": 2904 }, { "epoch": 69.16716417910447, "grad_norm": 28.545974731445312, "learning_rate": 8.88095238095238e-06, "loss": 42.9532, "step": 2905 }, { "epoch": 69.1910447761194, "grad_norm": 34.869781494140625, "learning_rate": 8.877976190476192e-06, "loss": 44.2308, "step": 2906 }, { "epoch": 69.21492537313434, "grad_norm": 30.4566650390625, "learning_rate": 8.875e-06, "loss": 43.3219, "step": 2907 }, { "epoch": 69.23880597014926, "grad_norm": 29.15296745300293, "learning_rate": 8.87202380952381e-06, "loss": 44.0736, "step": 2908 }, { "epoch": 69.26268656716418, "grad_norm": 29.97230339050293, "learning_rate": 8.869047619047619e-06, "loss": 43.3511, "step": 2909 }, { "epoch": 69.2865671641791, "grad_norm": 30.087413787841797, "learning_rate": 8.86607142857143e-06, "loss": 43.8738, "step": 2910 }, { "epoch": 69.31044776119403, "grad_norm": 23.660356521606445, "learning_rate": 8.863095238095238e-06, "loss": 43.5448, "step": 2911 }, { "epoch": 69.33432835820895, "grad_norm": 28.7690372467041, "learning_rate": 8.860119047619048e-06, "loss": 42.9617, "step": 2912 }, { "epoch": 69.35820895522389, "grad_norm": 25.60896110534668, "learning_rate": 8.857142857142858e-06, "loss": 45.3785, "step": 2913 }, { "epoch": 69.38208955223881, "grad_norm": 31.27063751220703, "learning_rate": 8.854166666666667e-06, "loss": 43.376, "step": 2914 }, { "epoch": 69.40597014925373, "grad_norm": 18.817829132080078, "learning_rate": 8.851190476190476e-06, "loss": 44.0999, "step": 2915 }, { "epoch": 69.42985074626866, "grad_norm": 36.125919342041016, "learning_rate": 8.848214285714285e-06, "loss": 43.8083, "step": 2916 }, { "epoch": 69.45373134328358, "grad_norm": 25.558866500854492, "learning_rate": 8.845238095238096e-06, "loss": 45.1369, "step": 2917 }, { "epoch": 69.4776119402985, "grad_norm": 38.35983657836914, "learning_rate": 8.842261904761905e-06, "loss": 43.587, "step": 2918 }, { "epoch": 69.50149253731344, "grad_norm": 30.0064754486084, "learning_rate": 8.839285714285714e-06, "loss": 43.1645, "step": 2919 }, { "epoch": 69.52537313432836, "grad_norm": 31.177242279052734, "learning_rate": 8.836309523809525e-06, "loss": 42.2305, "step": 2920 }, { "epoch": 69.54925373134328, "grad_norm": 29.04176139831543, "learning_rate": 8.833333333333334e-06, "loss": 45.1403, "step": 2921 }, { "epoch": 69.57313432835821, "grad_norm": 35.95783233642578, "learning_rate": 8.830357142857143e-06, "loss": 43.4081, "step": 2922 }, { "epoch": 69.59701492537313, "grad_norm": 27.838382720947266, "learning_rate": 8.827380952380952e-06, "loss": 44.7195, "step": 2923 }, { "epoch": 69.62089552238805, "grad_norm": 30.860624313354492, "learning_rate": 8.824404761904763e-06, "loss": 42.7175, "step": 2924 }, { "epoch": 69.64477611940299, "grad_norm": 21.701316833496094, "learning_rate": 8.821428571428572e-06, "loss": 43.0401, "step": 2925 }, { "epoch": 69.66865671641791, "grad_norm": 27.270732879638672, "learning_rate": 8.818452380952381e-06, "loss": 43.686, "step": 2926 }, { "epoch": 69.69253731343284, "grad_norm": 25.814538955688477, "learning_rate": 8.815476190476192e-06, "loss": 44.3424, "step": 2927 }, { "epoch": 69.71641791044776, "grad_norm": 26.155197143554688, "learning_rate": 8.8125e-06, "loss": 43.6455, "step": 2928 }, { "epoch": 69.74029850746268, "grad_norm": 20.438846588134766, "learning_rate": 8.80952380952381e-06, "loss": 44.3784, "step": 2929 }, { "epoch": 69.7641791044776, "grad_norm": 26.45317268371582, "learning_rate": 8.806547619047619e-06, "loss": 42.6501, "step": 2930 }, { "epoch": 69.78805970149254, "grad_norm": 22.06026840209961, "learning_rate": 8.80357142857143e-06, "loss": 42.4144, "step": 2931 }, { "epoch": 69.81194029850747, "grad_norm": 24.45191764831543, "learning_rate": 8.800595238095239e-06, "loss": 43.7415, "step": 2932 }, { "epoch": 69.83582089552239, "grad_norm": 26.77782440185547, "learning_rate": 8.797619047619048e-06, "loss": 43.2565, "step": 2933 }, { "epoch": 69.85970149253731, "grad_norm": 22.350242614746094, "learning_rate": 8.794642857142858e-06, "loss": 43.5869, "step": 2934 }, { "epoch": 69.88358208955223, "grad_norm": 23.063016891479492, "learning_rate": 8.791666666666667e-06, "loss": 43.3821, "step": 2935 }, { "epoch": 69.90746268656716, "grad_norm": 18.864139556884766, "learning_rate": 8.788690476190477e-06, "loss": 42.8421, "step": 2936 }, { "epoch": 69.9313432835821, "grad_norm": 19.763843536376953, "learning_rate": 8.785714285714286e-06, "loss": 43.3783, "step": 2937 }, { "epoch": 69.95522388059702, "grad_norm": 19.347801208496094, "learning_rate": 8.782738095238096e-06, "loss": 42.8249, "step": 2938 }, { "epoch": 69.97910447761194, "grad_norm": 16.3013858795166, "learning_rate": 8.779761904761905e-06, "loss": 42.9306, "step": 2939 }, { "epoch": 70.0, "grad_norm": 15.056166648864746, "learning_rate": 8.776785714285714e-06, "loss": 38.2541, "step": 2940 }, { "epoch": 70.02388059701492, "grad_norm": 14.691337585449219, "learning_rate": 8.773809523809525e-06, "loss": 42.8378, "step": 2941 }, { "epoch": 70.04776119402985, "grad_norm": 24.51978874206543, "learning_rate": 8.770833333333334e-06, "loss": 43.2568, "step": 2942 }, { "epoch": 70.07164179104478, "grad_norm": 16.552833557128906, "learning_rate": 8.767857142857143e-06, "loss": 44.9251, "step": 2943 }, { "epoch": 70.0955223880597, "grad_norm": 26.28189468383789, "learning_rate": 8.764880952380952e-06, "loss": 43.8141, "step": 2944 }, { "epoch": 70.11940298507463, "grad_norm": 21.29142951965332, "learning_rate": 8.761904761904763e-06, "loss": 43.8724, "step": 2945 }, { "epoch": 70.14328358208955, "grad_norm": 23.159542083740234, "learning_rate": 8.758928571428572e-06, "loss": 44.4016, "step": 2946 }, { "epoch": 70.16716417910447, "grad_norm": 21.08184051513672, "learning_rate": 8.755952380952381e-06, "loss": 44.1115, "step": 2947 }, { "epoch": 70.1910447761194, "grad_norm": 18.838504791259766, "learning_rate": 8.752976190476192e-06, "loss": 43.2276, "step": 2948 }, { "epoch": 70.21492537313434, "grad_norm": 21.613079071044922, "learning_rate": 8.750000000000001e-06, "loss": 42.679, "step": 2949 }, { "epoch": 70.23880597014926, "grad_norm": 21.29805564880371, "learning_rate": 8.74702380952381e-06, "loss": 43.1822, "step": 2950 }, { "epoch": 70.26268656716418, "grad_norm": NaN, "learning_rate": 8.744047619047619e-06, "loss": 43.3802, "step": 2951 }, { "epoch": 70.2865671641791, "grad_norm": 22.393659591674805, "learning_rate": 8.744047619047619e-06, "loss": 43.3569, "step": 2952 }, { "epoch": 70.31044776119403, "grad_norm": 17.94029998779297, "learning_rate": 8.74107142857143e-06, "loss": 42.4398, "step": 2953 }, { "epoch": 70.33432835820895, "grad_norm": 17.575550079345703, "learning_rate": 8.738095238095239e-06, "loss": 42.3488, "step": 2954 }, { "epoch": 70.35820895522389, "grad_norm": 18.268203735351562, "learning_rate": 8.735119047619048e-06, "loss": 42.6199, "step": 2955 }, { "epoch": 70.38208955223881, "grad_norm": 25.415603637695312, "learning_rate": 8.732142857142859e-06, "loss": 43.803, "step": 2956 }, { "epoch": 70.40597014925373, "grad_norm": 23.37176513671875, "learning_rate": 8.729166666666668e-06, "loss": 44.5072, "step": 2957 }, { "epoch": 70.42985074626866, "grad_norm": 24.91670036315918, "learning_rate": 8.726190476190477e-06, "loss": 44.1411, "step": 2958 }, { "epoch": 70.45373134328358, "grad_norm": 20.50780487060547, "learning_rate": 8.723214285714286e-06, "loss": 45.4114, "step": 2959 }, { "epoch": 70.4776119402985, "grad_norm": 21.885364532470703, "learning_rate": 8.720238095238096e-06, "loss": 43.1786, "step": 2960 }, { "epoch": 70.50149253731344, "grad_norm": 18.620540618896484, "learning_rate": 8.717261904761906e-06, "loss": 42.5272, "step": 2961 }, { "epoch": 70.52537313432836, "grad_norm": 27.28016471862793, "learning_rate": 8.714285714285715e-06, "loss": 44.0531, "step": 2962 }, { "epoch": 70.54925373134328, "grad_norm": 22.124799728393555, "learning_rate": 8.711309523809525e-06, "loss": 43.445, "step": 2963 }, { "epoch": 70.57313432835821, "grad_norm": 25.905492782592773, "learning_rate": 8.708333333333334e-06, "loss": 43.619, "step": 2964 }, { "epoch": 70.59701492537313, "grad_norm": 23.890172958374023, "learning_rate": 8.705357142857143e-06, "loss": 43.1365, "step": 2965 }, { "epoch": 70.62089552238805, "grad_norm": 20.158838272094727, "learning_rate": 8.702380952380952e-06, "loss": 41.9394, "step": 2966 }, { "epoch": 70.64477611940299, "grad_norm": 24.878849029541016, "learning_rate": 8.699404761904763e-06, "loss": 43.7568, "step": 2967 }, { "epoch": 70.66865671641791, "grad_norm": 20.08368492126465, "learning_rate": 8.696428571428572e-06, "loss": 43.7444, "step": 2968 }, { "epoch": 70.69253731343284, "grad_norm": 24.59374237060547, "learning_rate": 8.693452380952381e-06, "loss": 43.6659, "step": 2969 }, { "epoch": 70.71641791044776, "grad_norm": 21.96346664428711, "learning_rate": 8.690476190476192e-06, "loss": 42.8718, "step": 2970 }, { "epoch": 70.74029850746268, "grad_norm": 20.61510467529297, "learning_rate": 8.687500000000001e-06, "loss": 43.8264, "step": 2971 }, { "epoch": 70.7641791044776, "grad_norm": 25.367786407470703, "learning_rate": 8.68452380952381e-06, "loss": 42.2802, "step": 2972 }, { "epoch": 70.78805970149254, "grad_norm": 21.911298751831055, "learning_rate": 8.68154761904762e-06, "loss": 44.4695, "step": 2973 }, { "epoch": 70.81194029850747, "grad_norm": 26.7462100982666, "learning_rate": 8.67857142857143e-06, "loss": 43.4564, "step": 2974 }, { "epoch": 70.83582089552239, "grad_norm": 23.370485305786133, "learning_rate": 8.675595238095239e-06, "loss": 45.0502, "step": 2975 }, { "epoch": 70.85970149253731, "grad_norm": 26.052675247192383, "learning_rate": 8.672619047619048e-06, "loss": 42.6782, "step": 2976 }, { "epoch": 70.88358208955223, "grad_norm": 21.637617111206055, "learning_rate": 8.669642857142859e-06, "loss": 44.426, "step": 2977 }, { "epoch": 70.90746268656716, "grad_norm": 26.575313568115234, "learning_rate": 8.666666666666668e-06, "loss": 43.6968, "step": 2978 }, { "epoch": 70.9313432835821, "grad_norm": 23.814599990844727, "learning_rate": 8.663690476190477e-06, "loss": 43.3269, "step": 2979 }, { "epoch": 70.95522388059702, "grad_norm": 21.367717742919922, "learning_rate": 8.660714285714286e-06, "loss": 43.1399, "step": 2980 }, { "epoch": 70.97910447761194, "grad_norm": 19.98285484313965, "learning_rate": 8.657738095238097e-06, "loss": 42.9342, "step": 2981 }, { "epoch": 71.0, "grad_norm": 22.52842140197754, "learning_rate": 8.654761904761906e-06, "loss": 36.6415, "step": 2982 }, { "epoch": 71.02388059701492, "grad_norm": 22.04327392578125, "learning_rate": 8.651785714285715e-06, "loss": 43.0825, "step": 2983 }, { "epoch": 71.04776119402985, "grad_norm": 21.24346351623535, "learning_rate": 8.648809523809526e-06, "loss": 43.511, "step": 2984 }, { "epoch": 71.07164179104478, "grad_norm": 23.4123592376709, "learning_rate": 8.645833333333335e-06, "loss": 43.1464, "step": 2985 }, { "epoch": 71.0955223880597, "grad_norm": 23.918460845947266, "learning_rate": 8.642857142857144e-06, "loss": 44.1223, "step": 2986 }, { "epoch": 71.11940298507463, "grad_norm": 16.164955139160156, "learning_rate": 8.639880952380953e-06, "loss": 43.2759, "step": 2987 }, { "epoch": 71.14328358208955, "grad_norm": 22.15060043334961, "learning_rate": 8.636904761904763e-06, "loss": 43.1227, "step": 2988 }, { "epoch": 71.16716417910447, "grad_norm": 19.598203659057617, "learning_rate": 8.633928571428572e-06, "loss": 41.9802, "step": 2989 }, { "epoch": 71.1910447761194, "grad_norm": 16.25682830810547, "learning_rate": 8.630952380952381e-06, "loss": 42.1285, "step": 2990 }, { "epoch": 71.21492537313434, "grad_norm": 20.54530143737793, "learning_rate": 8.627976190476192e-06, "loss": 43.4601, "step": 2991 }, { "epoch": 71.23880597014926, "grad_norm": 25.911041259765625, "learning_rate": 8.625000000000001e-06, "loss": 42.6006, "step": 2992 }, { "epoch": 71.26268656716418, "grad_norm": 16.15741539001465, "learning_rate": 8.62202380952381e-06, "loss": 42.6621, "step": 2993 }, { "epoch": 71.2865671641791, "grad_norm": 30.334243774414062, "learning_rate": 8.61904761904762e-06, "loss": 43.4206, "step": 2994 }, { "epoch": 71.31044776119403, "grad_norm": 26.023889541625977, "learning_rate": 8.61607142857143e-06, "loss": 43.5777, "step": 2995 }, { "epoch": 71.33432835820895, "grad_norm": 21.3012638092041, "learning_rate": 8.61309523809524e-06, "loss": 42.4823, "step": 2996 }, { "epoch": 71.35820895522389, "grad_norm": 25.109596252441406, "learning_rate": 8.610119047619048e-06, "loss": 44.2666, "step": 2997 }, { "epoch": 71.38208955223881, "grad_norm": 22.26563835144043, "learning_rate": 8.607142857142859e-06, "loss": 40.9261, "step": 2998 }, { "epoch": 71.40597014925373, "grad_norm": 30.94297218322754, "learning_rate": 8.604166666666668e-06, "loss": 43.5651, "step": 2999 }, { "epoch": 71.42985074626866, "grad_norm": 24.670034408569336, "learning_rate": 8.601190476190477e-06, "loss": 43.6695, "step": 3000 }, { "epoch": 71.45373134328358, "grad_norm": 29.290430068969727, "learning_rate": 8.598214285714288e-06, "loss": 43.6725, "step": 3001 }, { "epoch": 71.4776119402985, "grad_norm": 23.058176040649414, "learning_rate": 8.595238095238097e-06, "loss": 43.695, "step": 3002 }, { "epoch": 71.50149253731344, "grad_norm": 21.41179084777832, "learning_rate": 8.592261904761904e-06, "loss": 43.1715, "step": 3003 }, { "epoch": 71.52537313432836, "grad_norm": 22.226594924926758, "learning_rate": 8.589285714285715e-06, "loss": 43.1411, "step": 3004 }, { "epoch": 71.54925373134328, "grad_norm": 19.892719268798828, "learning_rate": 8.586309523809524e-06, "loss": 44.4913, "step": 3005 }, { "epoch": 71.57313432835821, "grad_norm": 18.263708114624023, "learning_rate": 8.583333333333333e-06, "loss": 43.2348, "step": 3006 }, { "epoch": 71.59701492537313, "grad_norm": 22.065439224243164, "learning_rate": 8.580357142857144e-06, "loss": 44.3296, "step": 3007 }, { "epoch": 71.62089552238805, "grad_norm": 19.95087432861328, "learning_rate": 8.577380952380953e-06, "loss": 44.827, "step": 3008 }, { "epoch": 71.64477611940299, "grad_norm": 19.371231079101562, "learning_rate": 8.574404761904762e-06, "loss": 43.9034, "step": 3009 }, { "epoch": 71.66865671641791, "grad_norm": 22.265600204467773, "learning_rate": 8.571428571428571e-06, "loss": 42.659, "step": 3010 }, { "epoch": 71.69253731343284, "grad_norm": 18.449695587158203, "learning_rate": 8.568452380952382e-06, "loss": 43.674, "step": 3011 }, { "epoch": 71.71641791044776, "grad_norm": 25.14525604248047, "learning_rate": 8.56547619047619e-06, "loss": 43.5625, "step": 3012 }, { "epoch": 71.74029850746268, "grad_norm": 23.78099822998047, "learning_rate": 8.5625e-06, "loss": 44.3134, "step": 3013 }, { "epoch": 71.7641791044776, "grad_norm": 18.84084129333496, "learning_rate": 8.55952380952381e-06, "loss": 44.8436, "step": 3014 }, { "epoch": 71.78805970149254, "grad_norm": 28.59735107421875, "learning_rate": 8.55654761904762e-06, "loss": 43.3521, "step": 3015 }, { "epoch": 71.81194029850747, "grad_norm": 22.86484718322754, "learning_rate": 8.553571428571429e-06, "loss": 44.0742, "step": 3016 }, { "epoch": 71.83582089552239, "grad_norm": 17.34327507019043, "learning_rate": 8.550595238095238e-06, "loss": 43.5721, "step": 3017 }, { "epoch": 71.85970149253731, "grad_norm": 32.2520637512207, "learning_rate": 8.547619047619048e-06, "loss": 42.3465, "step": 3018 }, { "epoch": 71.88358208955223, "grad_norm": 23.380569458007812, "learning_rate": 8.544642857142857e-06, "loss": 43.2287, "step": 3019 }, { "epoch": 71.90746268656716, "grad_norm": 31.07112693786621, "learning_rate": 8.541666666666666e-06, "loss": 44.3177, "step": 3020 }, { "epoch": 71.9313432835821, "grad_norm": 24.860567092895508, "learning_rate": 8.538690476190477e-06, "loss": 43.6361, "step": 3021 }, { "epoch": 71.95522388059702, "grad_norm": 22.43517303466797, "learning_rate": 8.535714285714286e-06, "loss": 43.5824, "step": 3022 }, { "epoch": 71.97910447761194, "grad_norm": 27.975297927856445, "learning_rate": 8.532738095238095e-06, "loss": 43.4829, "step": 3023 }, { "epoch": 72.0, "grad_norm": 17.978660583496094, "learning_rate": 8.529761904761904e-06, "loss": 38.115, "step": 3024 }, { "epoch": 72.02388059701492, "grad_norm": 31.69437599182129, "learning_rate": 8.526785714285715e-06, "loss": 43.5237, "step": 3025 }, { "epoch": 72.04776119402985, "grad_norm": 27.577686309814453, "learning_rate": 8.523809523809524e-06, "loss": 43.1406, "step": 3026 }, { "epoch": 72.07164179104478, "grad_norm": 28.320255279541016, "learning_rate": 8.520833333333333e-06, "loss": 44.5784, "step": 3027 }, { "epoch": 72.0955223880597, "grad_norm": 26.59323501586914, "learning_rate": 8.517857142857144e-06, "loss": 42.2067, "step": 3028 }, { "epoch": 72.11940298507463, "grad_norm": 21.94460105895996, "learning_rate": 8.514880952380953e-06, "loss": 43.4262, "step": 3029 }, { "epoch": 72.14328358208955, "grad_norm": 23.56421661376953, "learning_rate": 8.511904761904762e-06, "loss": 41.1196, "step": 3030 }, { "epoch": 72.16716417910447, "grad_norm": 19.21329689025879, "learning_rate": 8.508928571428571e-06, "loss": 42.5441, "step": 3031 }, { "epoch": 72.1910447761194, "grad_norm": 23.377782821655273, "learning_rate": 8.505952380952382e-06, "loss": 43.0296, "step": 3032 }, { "epoch": 72.21492537313434, "grad_norm": 24.402435302734375, "learning_rate": 8.502976190476191e-06, "loss": 44.2474, "step": 3033 }, { "epoch": 72.23880597014926, "grad_norm": 18.61969566345215, "learning_rate": 8.5e-06, "loss": 43.6984, "step": 3034 }, { "epoch": 72.26268656716418, "grad_norm": 30.627338409423828, "learning_rate": 8.49702380952381e-06, "loss": 42.5441, "step": 3035 }, { "epoch": 72.2865671641791, "grad_norm": 26.115427017211914, "learning_rate": 8.49404761904762e-06, "loss": 41.8235, "step": 3036 }, { "epoch": 72.31044776119403, "grad_norm": 24.971904754638672, "learning_rate": 8.491071428571429e-06, "loss": 43.9344, "step": 3037 }, { "epoch": 72.33432835820895, "grad_norm": 26.42667007446289, "learning_rate": 8.488095238095238e-06, "loss": 43.2757, "step": 3038 }, { "epoch": 72.35820895522389, "grad_norm": 23.19200897216797, "learning_rate": 8.485119047619049e-06, "loss": 42.9536, "step": 3039 }, { "epoch": 72.38208955223881, "grad_norm": 31.263626098632812, "learning_rate": 8.482142857142858e-06, "loss": 42.8037, "step": 3040 }, { "epoch": 72.40597014925373, "grad_norm": 25.049270629882812, "learning_rate": 8.479166666666667e-06, "loss": 42.8005, "step": 3041 }, { "epoch": 72.42985074626866, "grad_norm": 20.71118927001953, "learning_rate": 8.476190476190477e-06, "loss": 43.106, "step": 3042 }, { "epoch": 72.45373134328358, "grad_norm": 22.156679153442383, "learning_rate": 8.473214285714286e-06, "loss": 42.6742, "step": 3043 }, { "epoch": 72.4776119402985, "grad_norm": 22.091957092285156, "learning_rate": 8.470238095238095e-06, "loss": 43.6855, "step": 3044 }, { "epoch": 72.50149253731344, "grad_norm": 21.12959861755371, "learning_rate": 8.467261904761905e-06, "loss": 42.9416, "step": 3045 }, { "epoch": 72.52537313432836, "grad_norm": 20.53251075744629, "learning_rate": 8.464285714285715e-06, "loss": 44.3919, "step": 3046 }, { "epoch": 72.54925373134328, "grad_norm": 19.188758850097656, "learning_rate": 8.461309523809524e-06, "loss": 43.3152, "step": 3047 }, { "epoch": 72.57313432835821, "grad_norm": 26.149826049804688, "learning_rate": 8.458333333333333e-06, "loss": 44.7382, "step": 3048 }, { "epoch": 72.59701492537313, "grad_norm": 18.40545082092285, "learning_rate": 8.455357142857144e-06, "loss": 44.0886, "step": 3049 }, { "epoch": 72.62089552238805, "grad_norm": 21.535911560058594, "learning_rate": 8.452380952380953e-06, "loss": 43.2625, "step": 3050 }, { "epoch": 72.64477611940299, "grad_norm": 17.798324584960938, "learning_rate": 8.449404761904762e-06, "loss": 43.5087, "step": 3051 }, { "epoch": 72.66865671641791, "grad_norm": 22.086271286010742, "learning_rate": 8.446428571428571e-06, "loss": 43.7427, "step": 3052 }, { "epoch": 72.69253731343284, "grad_norm": 20.795154571533203, "learning_rate": 8.443452380952382e-06, "loss": 43.6492, "step": 3053 }, { "epoch": 72.71641791044776, "grad_norm": 23.004671096801758, "learning_rate": 8.440476190476191e-06, "loss": 43.2841, "step": 3054 }, { "epoch": 72.74029850746268, "grad_norm": 19.808507919311523, "learning_rate": 8.4375e-06, "loss": 43.2447, "step": 3055 }, { "epoch": 72.7641791044776, "grad_norm": 25.06849479675293, "learning_rate": 8.434523809523811e-06, "loss": 42.7637, "step": 3056 }, { "epoch": 72.78805970149254, "grad_norm": 25.014245986938477, "learning_rate": 8.43154761904762e-06, "loss": 43.4822, "step": 3057 }, { "epoch": 72.81194029850747, "grad_norm": 22.324596405029297, "learning_rate": 8.428571428571429e-06, "loss": 43.3555, "step": 3058 }, { "epoch": 72.83582089552239, "grad_norm": 28.37264060974121, "learning_rate": 8.425595238095238e-06, "loss": 45.1914, "step": 3059 }, { "epoch": 72.85970149253731, "grad_norm": 20.218700408935547, "learning_rate": 8.422619047619049e-06, "loss": 43.7534, "step": 3060 }, { "epoch": 72.88358208955223, "grad_norm": 23.96106719970703, "learning_rate": 8.419642857142858e-06, "loss": 43.3726, "step": 3061 }, { "epoch": 72.90746268656716, "grad_norm": 24.620227813720703, "learning_rate": 8.416666666666667e-06, "loss": 43.2246, "step": 3062 }, { "epoch": 72.9313432835821, "grad_norm": 17.006282806396484, "learning_rate": 8.413690476190478e-06, "loss": 43.0239, "step": 3063 }, { "epoch": 72.95522388059702, "grad_norm": 32.321250915527344, "learning_rate": 8.410714285714287e-06, "loss": 43.8265, "step": 3064 }, { "epoch": 72.97910447761194, "grad_norm": 26.541305541992188, "learning_rate": 8.407738095238096e-06, "loss": 43.715, "step": 3065 }, { "epoch": 73.0, "grad_norm": 20.71360969543457, "learning_rate": 8.404761904761905e-06, "loss": 38.4916, "step": 3066 }, { "epoch": 73.02388059701492, "grad_norm": 25.500295639038086, "learning_rate": 8.401785714285715e-06, "loss": 43.3955, "step": 3067 }, { "epoch": 73.04776119402985, "grad_norm": 26.59987449645996, "learning_rate": 8.398809523809525e-06, "loss": 43.3811, "step": 3068 }, { "epoch": 73.07164179104478, "grad_norm": 22.731945037841797, "learning_rate": 8.395833333333334e-06, "loss": 43.2902, "step": 3069 }, { "epoch": 73.0955223880597, "grad_norm": 20.676626205444336, "learning_rate": 8.392857142857144e-06, "loss": 44.4288, "step": 3070 }, { "epoch": 73.11940298507463, "grad_norm": 24.257009506225586, "learning_rate": 8.389880952380953e-06, "loss": 42.6346, "step": 3071 }, { "epoch": 73.14328358208955, "grad_norm": 20.27753448486328, "learning_rate": 8.386904761904762e-06, "loss": 43.195, "step": 3072 }, { "epoch": 73.16716417910447, "grad_norm": 22.37655258178711, "learning_rate": 8.383928571428573e-06, "loss": 43.7297, "step": 3073 }, { "epoch": 73.1910447761194, "grad_norm": 22.078298568725586, "learning_rate": 8.380952380952382e-06, "loss": 45.0908, "step": 3074 }, { "epoch": 73.21492537313434, "grad_norm": 22.645662307739258, "learning_rate": 8.377976190476191e-06, "loss": 43.4577, "step": 3075 }, { "epoch": 73.23880597014926, "grad_norm": 18.159029006958008, "learning_rate": 8.375e-06, "loss": 42.8618, "step": 3076 }, { "epoch": 73.26268656716418, "grad_norm": 22.44676399230957, "learning_rate": 8.372023809523811e-06, "loss": 41.7892, "step": 3077 }, { "epoch": 73.2865671641791, "grad_norm": 21.480403900146484, "learning_rate": 8.36904761904762e-06, "loss": 44.0939, "step": 3078 }, { "epoch": 73.31044776119403, "grad_norm": 19.49287986755371, "learning_rate": 8.366071428571429e-06, "loss": 44.0851, "step": 3079 }, { "epoch": 73.33432835820895, "grad_norm": 18.453174591064453, "learning_rate": 8.36309523809524e-06, "loss": 42.5673, "step": 3080 }, { "epoch": 73.35820895522389, "grad_norm": NaN, "learning_rate": 8.360119047619049e-06, "loss": 71.053, "step": 3081 }, { "epoch": 73.38208955223881, "grad_norm": 20.119003295898438, "learning_rate": 8.360119047619049e-06, "loss": 42.981, "step": 3082 }, { "epoch": 73.40597014925373, "grad_norm": 18.897857666015625, "learning_rate": 8.357142857142858e-06, "loss": 42.5696, "step": 3083 }, { "epoch": 73.42985074626866, "grad_norm": 26.755035400390625, "learning_rate": 8.354166666666667e-06, "loss": 43.2951, "step": 3084 }, { "epoch": 73.45373134328358, "grad_norm": 19.104629516601562, "learning_rate": 8.351190476190478e-06, "loss": 42.5016, "step": 3085 }, { "epoch": 73.4776119402985, "grad_norm": 25.36631965637207, "learning_rate": 8.348214285714287e-06, "loss": 42.6552, "step": 3086 }, { "epoch": 73.50149253731344, "grad_norm": 27.23288345336914, "learning_rate": 8.345238095238096e-06, "loss": 42.6917, "step": 3087 }, { "epoch": 73.52537313432836, "grad_norm": 16.930316925048828, "learning_rate": 8.342261904761907e-06, "loss": 43.1315, "step": 3088 }, { "epoch": 73.54925373134328, "grad_norm": 26.30918312072754, "learning_rate": 8.339285714285716e-06, "loss": 42.7197, "step": 3089 }, { "epoch": 73.57313432835821, "grad_norm": 24.781511306762695, "learning_rate": 8.336309523809525e-06, "loss": 42.5099, "step": 3090 }, { "epoch": 73.59701492537313, "grad_norm": 19.516469955444336, "learning_rate": 8.333333333333334e-06, "loss": 43.0713, "step": 3091 }, { "epoch": 73.62089552238805, "grad_norm": 22.657184600830078, "learning_rate": 8.330357142857144e-06, "loss": 43.3808, "step": 3092 }, { "epoch": 73.64477611940299, "grad_norm": 18.468502044677734, "learning_rate": 8.327380952380954e-06, "loss": 43.7249, "step": 3093 }, { "epoch": 73.66865671641791, "grad_norm": 17.16704750061035, "learning_rate": 8.324404761904763e-06, "loss": 43.8457, "step": 3094 }, { "epoch": 73.69253731343284, "grad_norm": 21.254226684570312, "learning_rate": 8.321428571428573e-06, "loss": 43.5131, "step": 3095 }, { "epoch": 73.71641791044776, "grad_norm": 24.988006591796875, "learning_rate": 8.318452380952382e-06, "loss": 43.419, "step": 3096 }, { "epoch": 73.74029850746268, "grad_norm": 18.345117568969727, "learning_rate": 8.315476190476191e-06, "loss": 43.89, "step": 3097 }, { "epoch": 73.7641791044776, "grad_norm": 19.947589874267578, "learning_rate": 8.3125e-06, "loss": 41.9095, "step": 3098 }, { "epoch": 73.78805970149254, "grad_norm": 21.689882278442383, "learning_rate": 8.309523809523811e-06, "loss": 43.5629, "step": 3099 }, { "epoch": 73.81194029850747, "grad_norm": 18.021583557128906, "learning_rate": 8.30654761904762e-06, "loss": 45.2045, "step": 3100 }, { "epoch": 73.83582089552239, "grad_norm": 21.016939163208008, "learning_rate": 8.30357142857143e-06, "loss": 42.9508, "step": 3101 }, { "epoch": 73.85970149253731, "grad_norm": 19.921489715576172, "learning_rate": 8.30059523809524e-06, "loss": 45.0384, "step": 3102 }, { "epoch": 73.88358208955223, "grad_norm": 17.989734649658203, "learning_rate": 8.297619047619049e-06, "loss": 43.6752, "step": 3103 }, { "epoch": 73.90746268656716, "grad_norm": 19.126956939697266, "learning_rate": 8.294642857142858e-06, "loss": 42.4258, "step": 3104 }, { "epoch": 73.9313432835821, "grad_norm": 18.107421875, "learning_rate": 8.291666666666667e-06, "loss": 42.0089, "step": 3105 }, { "epoch": 73.95522388059702, "grad_norm": 22.599328994750977, "learning_rate": 8.288690476190478e-06, "loss": 43.1967, "step": 3106 }, { "epoch": 73.97910447761194, "grad_norm": 17.103744506835938, "learning_rate": 8.285714285714287e-06, "loss": 42.932, "step": 3107 }, { "epoch": 74.0, "grad_norm": 16.514545440673828, "learning_rate": 8.282738095238096e-06, "loss": 38.5601, "step": 3108 }, { "epoch": 74.02388059701492, "grad_norm": 19.938108444213867, "learning_rate": 8.279761904761905e-06, "loss": 43.1656, "step": 3109 }, { "epoch": 74.04776119402985, "grad_norm": 23.691556930541992, "learning_rate": 8.276785714285714e-06, "loss": 43.248, "step": 3110 }, { "epoch": 74.07164179104478, "grad_norm": 24.84130859375, "learning_rate": 8.273809523809523e-06, "loss": 43.0973, "step": 3111 }, { "epoch": 74.0955223880597, "grad_norm": 16.541378021240234, "learning_rate": 8.270833333333334e-06, "loss": 43.8453, "step": 3112 }, { "epoch": 74.11940298507463, "grad_norm": 34.161293029785156, "learning_rate": 8.267857142857143e-06, "loss": 41.7, "step": 3113 }, { "epoch": 74.14328358208955, "grad_norm": 26.104328155517578, "learning_rate": 8.264880952380952e-06, "loss": 43.6119, "step": 3114 }, { "epoch": 74.16716417910447, "grad_norm": 26.31689453125, "learning_rate": 8.261904761904763e-06, "loss": 41.5545, "step": 3115 }, { "epoch": 74.1910447761194, "grad_norm": 23.808761596679688, "learning_rate": 8.258928571428572e-06, "loss": 44.5862, "step": 3116 }, { "epoch": 74.21492537313434, "grad_norm": 24.158493041992188, "learning_rate": 8.25595238095238e-06, "loss": 42.9814, "step": 3117 }, { "epoch": 74.23880597014926, "grad_norm": 25.35089874267578, "learning_rate": 8.25297619047619e-06, "loss": 42.4484, "step": 3118 }, { "epoch": 74.26268656716418, "grad_norm": 24.48615264892578, "learning_rate": 8.25e-06, "loss": 42.7431, "step": 3119 }, { "epoch": 74.2865671641791, "grad_norm": 24.813716888427734, "learning_rate": 8.24702380952381e-06, "loss": 43.1515, "step": 3120 }, { "epoch": 74.31044776119403, "grad_norm": 18.43018341064453, "learning_rate": 8.244047619047619e-06, "loss": 43.5142, "step": 3121 }, { "epoch": 74.33432835820895, "grad_norm": 25.593732833862305, "learning_rate": 8.24107142857143e-06, "loss": 44.4342, "step": 3122 }, { "epoch": 74.35820895522389, "grad_norm": 21.224576950073242, "learning_rate": 8.238095238095239e-06, "loss": 44.1973, "step": 3123 }, { "epoch": 74.38208955223881, "grad_norm": 17.604145050048828, "learning_rate": 8.235119047619048e-06, "loss": 43.4662, "step": 3124 }, { "epoch": 74.40597014925373, "grad_norm": 30.535215377807617, "learning_rate": 8.232142857142857e-06, "loss": 42.8872, "step": 3125 }, { "epoch": 74.42985074626866, "grad_norm": 22.767736434936523, "learning_rate": 8.229166666666667e-06, "loss": 43.4187, "step": 3126 }, { "epoch": 74.45373134328358, "grad_norm": 33.97389221191406, "learning_rate": 8.226190476190476e-06, "loss": 43.8719, "step": 3127 }, { "epoch": 74.4776119402985, "grad_norm": 26.33451271057129, "learning_rate": 8.223214285714285e-06, "loss": 43.6458, "step": 3128 }, { "epoch": 74.50149253731344, "grad_norm": 35.393733978271484, "learning_rate": 8.220238095238096e-06, "loss": 44.2996, "step": 3129 }, { "epoch": 74.52537313432836, "grad_norm": 27.903955459594727, "learning_rate": 8.217261904761905e-06, "loss": 43.5218, "step": 3130 }, { "epoch": 74.54925373134328, "grad_norm": 27.946807861328125, "learning_rate": 8.214285714285714e-06, "loss": 43.8993, "step": 3131 }, { "epoch": 74.57313432835821, "grad_norm": 21.519737243652344, "learning_rate": 8.211309523809523e-06, "loss": 42.9369, "step": 3132 }, { "epoch": 74.59701492537313, "grad_norm": 27.311965942382812, "learning_rate": 8.208333333333334e-06, "loss": 43.4193, "step": 3133 }, { "epoch": 74.62089552238805, "grad_norm": 23.38337516784668, "learning_rate": 8.205357142857143e-06, "loss": 43.5582, "step": 3134 }, { "epoch": 74.64477611940299, "grad_norm": 19.415571212768555, "learning_rate": 8.202380952380952e-06, "loss": 42.8066, "step": 3135 }, { "epoch": 74.66865671641791, "grad_norm": 25.44513511657715, "learning_rate": 8.199404761904763e-06, "loss": 42.8859, "step": 3136 }, { "epoch": 74.69253731343284, "grad_norm": 23.1788330078125, "learning_rate": 8.196428571428572e-06, "loss": 42.1339, "step": 3137 }, { "epoch": 74.71641791044776, "grad_norm": 14.436179161071777, "learning_rate": 8.193452380952381e-06, "loss": 42.6687, "step": 3138 }, { "epoch": 74.74029850746268, "grad_norm": 30.928714752197266, "learning_rate": 8.190476190476192e-06, "loss": 44.5744, "step": 3139 }, { "epoch": 74.7641791044776, "grad_norm": 23.915878295898438, "learning_rate": 8.1875e-06, "loss": 44.3435, "step": 3140 }, { "epoch": 74.78805970149254, "grad_norm": 27.95979881286621, "learning_rate": 8.18452380952381e-06, "loss": 42.4667, "step": 3141 }, { "epoch": 74.81194029850747, "grad_norm": 22.4390811920166, "learning_rate": 8.181547619047619e-06, "loss": 42.6036, "step": 3142 }, { "epoch": 74.83582089552239, "grad_norm": 22.94829750061035, "learning_rate": 8.17857142857143e-06, "loss": 42.4304, "step": 3143 }, { "epoch": 74.85970149253731, "grad_norm": 20.711339950561523, "learning_rate": 8.175595238095239e-06, "loss": 43.1806, "step": 3144 }, { "epoch": 74.88358208955223, "grad_norm": 21.30629539489746, "learning_rate": 8.172619047619048e-06, "loss": 42.7325, "step": 3145 }, { "epoch": 74.90746268656716, "grad_norm": 20.381263732910156, "learning_rate": 8.169642857142858e-06, "loss": 43.0491, "step": 3146 }, { "epoch": 74.9313432835821, "grad_norm": 21.54926300048828, "learning_rate": 8.166666666666668e-06, "loss": 44.8298, "step": 3147 }, { "epoch": 74.95522388059702, "grad_norm": 15.518889427185059, "learning_rate": 8.163690476190477e-06, "loss": 42.6821, "step": 3148 }, { "epoch": 74.97910447761194, "grad_norm": 24.487192153930664, "learning_rate": 8.160714285714286e-06, "loss": 43.0891, "step": 3149 }, { "epoch": 75.0, "grad_norm": 15.607013702392578, "learning_rate": 8.157738095238096e-06, "loss": 37.212, "step": 3150 }, { "epoch": 75.02388059701492, "grad_norm": 31.642353057861328, "learning_rate": 8.154761904761905e-06, "loss": 43.9061, "step": 3151 }, { "epoch": 75.04776119402985, "grad_norm": 23.92624855041504, "learning_rate": 8.151785714285714e-06, "loss": 44.0244, "step": 3152 }, { "epoch": 75.07164179104478, "grad_norm": 23.756420135498047, "learning_rate": 8.148809523809525e-06, "loss": 44.7597, "step": 3153 }, { "epoch": 75.0955223880597, "grad_norm": 26.027414321899414, "learning_rate": 8.145833333333334e-06, "loss": 42.3933, "step": 3154 }, { "epoch": 75.11940298507463, "grad_norm": 18.252239227294922, "learning_rate": 8.142857142857143e-06, "loss": 43.1075, "step": 3155 }, { "epoch": 75.14328358208955, "grad_norm": 25.58303451538086, "learning_rate": 8.139880952380952e-06, "loss": 43.3715, "step": 3156 }, { "epoch": 75.16716417910447, "grad_norm": 24.198566436767578, "learning_rate": 8.136904761904763e-06, "loss": 42.042, "step": 3157 }, { "epoch": 75.1910447761194, "grad_norm": 21.632183074951172, "learning_rate": 8.133928571428572e-06, "loss": 42.4693, "step": 3158 }, { "epoch": 75.21492537313434, "grad_norm": 27.104801177978516, "learning_rate": 8.130952380952381e-06, "loss": 42.597, "step": 3159 }, { "epoch": 75.23880597014926, "grad_norm": 21.614917755126953, "learning_rate": 8.127976190476192e-06, "loss": 42.9729, "step": 3160 }, { "epoch": 75.26268656716418, "grad_norm": 27.62027359008789, "learning_rate": 8.125000000000001e-06, "loss": 43.3302, "step": 3161 }, { "epoch": 75.2865671641791, "grad_norm": 24.087974548339844, "learning_rate": 8.12202380952381e-06, "loss": 44.1364, "step": 3162 }, { "epoch": 75.31044776119403, "grad_norm": 21.590192794799805, "learning_rate": 8.119047619047619e-06, "loss": 42.7373, "step": 3163 }, { "epoch": 75.33432835820895, "grad_norm": 27.612075805664062, "learning_rate": 8.11607142857143e-06, "loss": 43.5758, "step": 3164 }, { "epoch": 75.35820895522389, "grad_norm": 18.209209442138672, "learning_rate": 8.113095238095239e-06, "loss": 43.309, "step": 3165 }, { "epoch": 75.38208955223881, "grad_norm": 28.845134735107422, "learning_rate": 8.110119047619048e-06, "loss": 43.3125, "step": 3166 }, { "epoch": 75.40597014925373, "grad_norm": 20.03913116455078, "learning_rate": 8.107142857142859e-06, "loss": 44.666, "step": 3167 }, { "epoch": 75.42985074626866, "grad_norm": 29.69953155517578, "learning_rate": 8.104166666666668e-06, "loss": 43.3558, "step": 3168 }, { "epoch": 75.45373134328358, "grad_norm": 22.189376831054688, "learning_rate": 8.101190476190477e-06, "loss": 43.6229, "step": 3169 }, { "epoch": 75.4776119402985, "grad_norm": 23.93678092956543, "learning_rate": 8.098214285714286e-06, "loss": 42.9279, "step": 3170 }, { "epoch": 75.50149253731344, "grad_norm": 21.489761352539062, "learning_rate": 8.095238095238097e-06, "loss": 43.4537, "step": 3171 }, { "epoch": 75.52537313432836, "grad_norm": 18.95380210876465, "learning_rate": 8.092261904761906e-06, "loss": 42.9752, "step": 3172 }, { "epoch": 75.54925373134328, "grad_norm": 26.20965576171875, "learning_rate": 8.089285714285715e-06, "loss": 42.7511, "step": 3173 }, { "epoch": 75.57313432835821, "grad_norm": 19.629926681518555, "learning_rate": 8.086309523809525e-06, "loss": 43.7784, "step": 3174 }, { "epoch": 75.59701492537313, "grad_norm": 25.866622924804688, "learning_rate": 8.083333333333334e-06, "loss": 42.7349, "step": 3175 }, { "epoch": 75.62089552238805, "grad_norm": 24.383323669433594, "learning_rate": 8.080357142857143e-06, "loss": 42.5395, "step": 3176 }, { "epoch": 75.64477611940299, "grad_norm": 19.74950408935547, "learning_rate": 8.077380952380953e-06, "loss": 43.1058, "step": 3177 }, { "epoch": 75.66865671641791, "grad_norm": 28.67831039428711, "learning_rate": 8.074404761904763e-06, "loss": 43.5871, "step": 3178 }, { "epoch": 75.69253731343284, "grad_norm": 23.102951049804688, "learning_rate": 8.071428571428572e-06, "loss": 42.76, "step": 3179 }, { "epoch": 75.71641791044776, "grad_norm": 35.02995681762695, "learning_rate": 8.068452380952381e-06, "loss": 43.8252, "step": 3180 }, { "epoch": 75.74029850746268, "grad_norm": 24.358551025390625, "learning_rate": 8.065476190476192e-06, "loss": 43.1074, "step": 3181 }, { "epoch": 75.7641791044776, "grad_norm": 30.14754295349121, "learning_rate": 8.062500000000001e-06, "loss": 43.8415, "step": 3182 }, { "epoch": 75.78805970149254, "grad_norm": 24.45053482055664, "learning_rate": 8.05952380952381e-06, "loss": 43.0215, "step": 3183 }, { "epoch": 75.81194029850747, "grad_norm": 37.40525436401367, "learning_rate": 8.05654761904762e-06, "loss": 42.961, "step": 3184 }, { "epoch": 75.83582089552239, "grad_norm": 24.555240631103516, "learning_rate": 8.05357142857143e-06, "loss": 44.2708, "step": 3185 }, { "epoch": 75.85970149253731, "grad_norm": 37.460670471191406, "learning_rate": 8.050595238095239e-06, "loss": 43.5956, "step": 3186 }, { "epoch": 75.88358208955223, "grad_norm": 32.54770278930664, "learning_rate": 8.047619047619048e-06, "loss": 42.3289, "step": 3187 }, { "epoch": 75.90746268656716, "grad_norm": 38.01876449584961, "learning_rate": 8.044642857142859e-06, "loss": 42.9972, "step": 3188 }, { "epoch": 75.9313432835821, "grad_norm": 30.63246726989746, "learning_rate": 8.041666666666668e-06, "loss": 42.5397, "step": 3189 }, { "epoch": 75.95522388059702, "grad_norm": 27.40627670288086, "learning_rate": 8.038690476190477e-06, "loss": 41.377, "step": 3190 }, { "epoch": 75.97910447761194, "grad_norm": 26.620893478393555, "learning_rate": 8.035714285714286e-06, "loss": 42.9367, "step": 3191 }, { "epoch": 76.0, "grad_norm": 31.36514663696289, "learning_rate": 8.032738095238097e-06, "loss": 37.8523, "step": 3192 }, { "epoch": 76.02388059701492, "grad_norm": 27.55282974243164, "learning_rate": 8.029761904761906e-06, "loss": 43.2614, "step": 3193 }, { "epoch": 76.04776119402985, "grad_norm": 36.373634338378906, "learning_rate": 8.026785714285715e-06, "loss": 42.1966, "step": 3194 }, { "epoch": 76.07164179104478, "grad_norm": 29.89250373840332, "learning_rate": 8.023809523809526e-06, "loss": 43.0278, "step": 3195 }, { "epoch": 76.0955223880597, "grad_norm": 28.84893226623535, "learning_rate": 8.020833333333335e-06, "loss": 44.8331, "step": 3196 }, { "epoch": 76.11940298507463, "grad_norm": 27.258445739746094, "learning_rate": 8.017857142857144e-06, "loss": 42.5176, "step": 3197 }, { "epoch": 76.14328358208955, "grad_norm": 30.8077449798584, "learning_rate": 8.014880952380953e-06, "loss": 43.3045, "step": 3198 }, { "epoch": 76.16716417910447, "grad_norm": 28.528837203979492, "learning_rate": 8.011904761904763e-06, "loss": 43.1302, "step": 3199 }, { "epoch": 76.1910447761194, "grad_norm": 30.751039505004883, "learning_rate": 8.008928571428572e-06, "loss": 43.7287, "step": 3200 }, { "epoch": 76.21492537313434, "grad_norm": 27.781261444091797, "learning_rate": 8.005952380952382e-06, "loss": 43.3939, "step": 3201 }, { "epoch": 76.23880597014926, "grad_norm": 27.45984649658203, "learning_rate": 8.002976190476192e-06, "loss": 43.6411, "step": 3202 }, { "epoch": 76.26268656716418, "grad_norm": 26.628419876098633, "learning_rate": 8.000000000000001e-06, "loss": 42.6454, "step": 3203 }, { "epoch": 76.2865671641791, "grad_norm": 36.02729034423828, "learning_rate": 7.99702380952381e-06, "loss": 43.2459, "step": 3204 }, { "epoch": 76.31044776119403, "grad_norm": 28.480478286743164, "learning_rate": 7.99404761904762e-06, "loss": 42.7675, "step": 3205 }, { "epoch": 76.33432835820895, "grad_norm": 31.36353874206543, "learning_rate": 7.99107142857143e-06, "loss": 42.8571, "step": 3206 }, { "epoch": 76.35820895522389, "grad_norm": 29.178728103637695, "learning_rate": 7.98809523809524e-06, "loss": 42.7477, "step": 3207 }, { "epoch": 76.38208955223881, "grad_norm": 28.539457321166992, "learning_rate": 7.985119047619048e-06, "loss": 44.1444, "step": 3208 }, { "epoch": 76.40597014925373, "grad_norm": 26.178895950317383, "learning_rate": 7.982142857142859e-06, "loss": 42.7187, "step": 3209 }, { "epoch": 76.42985074626866, "grad_norm": 30.825010299682617, "learning_rate": 7.979166666666668e-06, "loss": 43.478, "step": 3210 }, { "epoch": 76.45373134328358, "grad_norm": 27.317245483398438, "learning_rate": 7.976190476190477e-06, "loss": 43.479, "step": 3211 }, { "epoch": 76.4776119402985, "grad_norm": 31.42888641357422, "learning_rate": 7.973214285714286e-06, "loss": 43.7278, "step": 3212 }, { "epoch": 76.50149253731344, "grad_norm": 28.949392318725586, "learning_rate": 7.970238095238097e-06, "loss": 43.6134, "step": 3213 }, { "epoch": 76.52537313432836, "grad_norm": 36.61643981933594, "learning_rate": 7.967261904761904e-06, "loss": 44.1841, "step": 3214 }, { "epoch": 76.54925373134328, "grad_norm": 31.78457260131836, "learning_rate": 7.964285714285715e-06, "loss": 43.9995, "step": 3215 }, { "epoch": 76.57313432835821, "grad_norm": 29.883163452148438, "learning_rate": 7.961309523809524e-06, "loss": 42.596, "step": 3216 }, { "epoch": 76.59701492537313, "grad_norm": 27.458534240722656, "learning_rate": 7.958333333333333e-06, "loss": 43.7156, "step": 3217 }, { "epoch": 76.62089552238805, "grad_norm": 26.423311233520508, "learning_rate": 7.955357142857144e-06, "loss": 42.2925, "step": 3218 }, { "epoch": 76.64477611940299, "grad_norm": 22.850927352905273, "learning_rate": 7.952380952380953e-06, "loss": 43.3146, "step": 3219 }, { "epoch": 76.66865671641791, "grad_norm": 32.23415756225586, "learning_rate": 7.949404761904762e-06, "loss": 43.4622, "step": 3220 }, { "epoch": 76.69253731343284, "grad_norm": 25.596759796142578, "learning_rate": 7.946428571428571e-06, "loss": 42.6238, "step": 3221 }, { "epoch": 76.71641791044776, "grad_norm": 28.371593475341797, "learning_rate": 7.943452380952382e-06, "loss": 41.2267, "step": 3222 }, { "epoch": 76.74029850746268, "grad_norm": 24.369253158569336, "learning_rate": 7.94047619047619e-06, "loss": 43.24, "step": 3223 }, { "epoch": 76.7641791044776, "grad_norm": 34.42658996582031, "learning_rate": 7.9375e-06, "loss": 42.8095, "step": 3224 }, { "epoch": 76.78805970149254, "grad_norm": 26.35492515563965, "learning_rate": 7.93452380952381e-06, "loss": 42.0312, "step": 3225 }, { "epoch": 76.81194029850747, "grad_norm": 33.34773254394531, "learning_rate": 7.93154761904762e-06, "loss": 43.4483, "step": 3226 }, { "epoch": 76.83582089552239, "grad_norm": 31.470170974731445, "learning_rate": 7.928571428571429e-06, "loss": 43.9896, "step": 3227 }, { "epoch": 76.85970149253731, "grad_norm": 28.38050651550293, "learning_rate": 7.925595238095238e-06, "loss": 43.9711, "step": 3228 }, { "epoch": 76.88358208955223, "grad_norm": NaN, "learning_rate": 7.922619047619048e-06, "loss": 75.7577, "step": 3229 }, { "epoch": 76.90746268656716, "grad_norm": 21.927776336669922, "learning_rate": 7.922619047619048e-06, "loss": 42.1852, "step": 3230 }, { "epoch": 76.9313432835821, "grad_norm": 28.636518478393555, "learning_rate": 7.919642857142857e-06, "loss": 43.103, "step": 3231 }, { "epoch": 76.95522388059702, "grad_norm": 25.48936653137207, "learning_rate": 7.916666666666667e-06, "loss": 43.1688, "step": 3232 }, { "epoch": 76.97910447761194, "grad_norm": 29.641143798828125, "learning_rate": 7.913690476190477e-06, "loss": 41.7518, "step": 3233 }, { "epoch": 77.0, "grad_norm": 22.023099899291992, "learning_rate": 7.910714285714286e-06, "loss": 38.1447, "step": 3234 }, { "epoch": 77.02388059701492, "grad_norm": 35.88689041137695, "learning_rate": 7.907738095238095e-06, "loss": 43.1578, "step": 3235 }, { "epoch": 77.04776119402985, "grad_norm": 34.37343978881836, "learning_rate": 7.904761904761904e-06, "loss": 43.0582, "step": 3236 }, { "epoch": 77.07164179104478, "grad_norm": 18.577016830444336, "learning_rate": 7.901785714285715e-06, "loss": 42.1815, "step": 3237 }, { "epoch": 77.0955223880597, "grad_norm": 23.373125076293945, "learning_rate": 7.898809523809524e-06, "loss": 44.491, "step": 3238 }, { "epoch": 77.11940298507463, "grad_norm": 28.848159790039062, "learning_rate": 7.895833333333333e-06, "loss": 43.0114, "step": 3239 }, { "epoch": 77.14328358208955, "grad_norm": 19.10719108581543, "learning_rate": 7.892857142857144e-06, "loss": 42.862, "step": 3240 }, { "epoch": 77.16716417910447, "grad_norm": 34.79095458984375, "learning_rate": 7.889880952380953e-06, "loss": 43.7736, "step": 3241 }, { "epoch": 77.1910447761194, "grad_norm": 28.950021743774414, "learning_rate": 7.886904761904762e-06, "loss": 44.5221, "step": 3242 }, { "epoch": 77.21492537313434, "grad_norm": 29.437536239624023, "learning_rate": 7.883928571428571e-06, "loss": 43.0498, "step": 3243 }, { "epoch": 77.23880597014926, "grad_norm": 26.087984085083008, "learning_rate": 7.880952380952382e-06, "loss": 42.1991, "step": 3244 }, { "epoch": 77.26268656716418, "grad_norm": 30.868637084960938, "learning_rate": 7.877976190476191e-06, "loss": 43.1896, "step": 3245 }, { "epoch": 77.2865671641791, "grad_norm": 26.28648567199707, "learning_rate": 7.875e-06, "loss": 41.9695, "step": 3246 }, { "epoch": 77.31044776119403, "grad_norm": 27.738021850585938, "learning_rate": 7.87202380952381e-06, "loss": 42.9537, "step": 3247 }, { "epoch": 77.33432835820895, "grad_norm": 23.0654296875, "learning_rate": 7.86904761904762e-06, "loss": 43.1053, "step": 3248 }, { "epoch": 77.35820895522389, "grad_norm": 31.976926803588867, "learning_rate": 7.866071428571429e-06, "loss": 42.0648, "step": 3249 }, { "epoch": 77.38208955223881, "grad_norm": 28.690933227539062, "learning_rate": 7.863095238095238e-06, "loss": 43.0786, "step": 3250 }, { "epoch": 77.40597014925373, "grad_norm": 29.870180130004883, "learning_rate": 7.860119047619049e-06, "loss": 44.1362, "step": 3251 }, { "epoch": 77.42985074626866, "grad_norm": 29.524002075195312, "learning_rate": 7.857142857142858e-06, "loss": 42.635, "step": 3252 }, { "epoch": 77.45373134328358, "grad_norm": 24.833131790161133, "learning_rate": 7.854166666666667e-06, "loss": 43.1208, "step": 3253 }, { "epoch": 77.4776119402985, "grad_norm": 24.424755096435547, "learning_rate": 7.851190476190477e-06, "loss": 44.4682, "step": 3254 }, { "epoch": 77.50149253731344, "grad_norm": 30.417823791503906, "learning_rate": 7.848214285714287e-06, "loss": 45.1353, "step": 3255 }, { "epoch": 77.52537313432836, "grad_norm": 23.12209701538086, "learning_rate": 7.845238095238096e-06, "loss": 41.7736, "step": 3256 }, { "epoch": 77.54925373134328, "grad_norm": 30.454221725463867, "learning_rate": 7.842261904761905e-06, "loss": 42.6765, "step": 3257 }, { "epoch": 77.57313432835821, "grad_norm": 30.55715560913086, "learning_rate": 7.839285714285715e-06, "loss": 43.4168, "step": 3258 }, { "epoch": 77.59701492537313, "grad_norm": 26.72547149658203, "learning_rate": 7.836309523809524e-06, "loss": 42.5388, "step": 3259 }, { "epoch": 77.62089552238805, "grad_norm": 25.03418731689453, "learning_rate": 7.833333333333333e-06, "loss": 43.3748, "step": 3260 }, { "epoch": 77.64477611940299, "grad_norm": 28.706029891967773, "learning_rate": 7.830357142857144e-06, "loss": 43.7717, "step": 3261 }, { "epoch": 77.66865671641791, "grad_norm": 30.39940643310547, "learning_rate": 7.827380952380953e-06, "loss": 42.952, "step": 3262 }, { "epoch": 77.69253731343284, "grad_norm": 25.622882843017578, "learning_rate": 7.824404761904762e-06, "loss": 42.7133, "step": 3263 }, { "epoch": 77.71641791044776, "grad_norm": 25.120025634765625, "learning_rate": 7.821428571428571e-06, "loss": 42.2453, "step": 3264 }, { "epoch": 77.74029850746268, "grad_norm": 27.227832794189453, "learning_rate": 7.818452380952382e-06, "loss": 42.4094, "step": 3265 }, { "epoch": 77.7641791044776, "grad_norm": 23.663406372070312, "learning_rate": 7.815476190476191e-06, "loss": 43.7332, "step": 3266 }, { "epoch": 77.78805970149254, "grad_norm": 28.738086700439453, "learning_rate": 7.8125e-06, "loss": 43.7881, "step": 3267 }, { "epoch": 77.81194029850747, "grad_norm": 27.955598831176758, "learning_rate": 7.809523809523811e-06, "loss": 43.4782, "step": 3268 }, { "epoch": 77.83582089552239, "grad_norm": 24.79859161376953, "learning_rate": 7.80654761904762e-06, "loss": 41.0554, "step": 3269 }, { "epoch": 77.85970149253731, "grad_norm": 25.531471252441406, "learning_rate": 7.803571428571429e-06, "loss": 43.0072, "step": 3270 }, { "epoch": 77.88358208955223, "grad_norm": 27.746000289916992, "learning_rate": 7.800595238095238e-06, "loss": 43.8641, "step": 3271 }, { "epoch": 77.90746268656716, "grad_norm": 25.056262969970703, "learning_rate": 7.797619047619049e-06, "loss": 43.1316, "step": 3272 }, { "epoch": 77.9313432835821, "grad_norm": 30.888355255126953, "learning_rate": 7.794642857142858e-06, "loss": 43.482, "step": 3273 }, { "epoch": 77.95522388059702, "grad_norm": 22.501649856567383, "learning_rate": 7.791666666666667e-06, "loss": 43.4869, "step": 3274 }, { "epoch": 77.97910447761194, "grad_norm": 31.175397872924805, "learning_rate": 7.788690476190478e-06, "loss": 43.5349, "step": 3275 }, { "epoch": 78.0, "grad_norm": 20.901432037353516, "learning_rate": 7.785714285714287e-06, "loss": 36.2874, "step": 3276 }, { "epoch": 78.02388059701492, "grad_norm": 30.319852828979492, "learning_rate": 7.782738095238096e-06, "loss": 41.102, "step": 3277 }, { "epoch": 78.04776119402985, "grad_norm": 28.31625747680664, "learning_rate": 7.779761904761905e-06, "loss": 42.4304, "step": 3278 }, { "epoch": 78.07164179104478, "grad_norm": 26.445859909057617, "learning_rate": 7.776785714285716e-06, "loss": 42.8755, "step": 3279 }, { "epoch": 78.0955223880597, "grad_norm": 20.42568588256836, "learning_rate": 7.773809523809525e-06, "loss": 44.5072, "step": 3280 }, { "epoch": 78.11940298507463, "grad_norm": 28.535858154296875, "learning_rate": 7.770833333333334e-06, "loss": 42.325, "step": 3281 }, { "epoch": 78.14328358208955, "grad_norm": 21.800678253173828, "learning_rate": 7.767857142857144e-06, "loss": 44.3283, "step": 3282 }, { "epoch": 78.16716417910447, "grad_norm": 26.74295997619629, "learning_rate": 7.764880952380953e-06, "loss": 44.3208, "step": 3283 }, { "epoch": 78.1910447761194, "grad_norm": 28.9124755859375, "learning_rate": 7.761904761904762e-06, "loss": 42.8112, "step": 3284 }, { "epoch": 78.21492537313434, "grad_norm": 24.948265075683594, "learning_rate": 7.758928571428571e-06, "loss": 42.6617, "step": 3285 }, { "epoch": 78.23880597014926, "grad_norm": 25.038854598999023, "learning_rate": 7.755952380952382e-06, "loss": 42.6089, "step": 3286 }, { "epoch": 78.26268656716418, "grad_norm": 24.622905731201172, "learning_rate": 7.752976190476191e-06, "loss": 42.9201, "step": 3287 }, { "epoch": 78.2865671641791, "grad_norm": 22.999900817871094, "learning_rate": 7.75e-06, "loss": 43.2141, "step": 3288 }, { "epoch": 78.31044776119403, "grad_norm": 22.848161697387695, "learning_rate": 7.747023809523811e-06, "loss": 44.1053, "step": 3289 }, { "epoch": 78.33432835820895, "grad_norm": 16.15705108642578, "learning_rate": 7.74404761904762e-06, "loss": 43.7009, "step": 3290 }, { "epoch": 78.35820895522389, "grad_norm": 29.3355655670166, "learning_rate": 7.74107142857143e-06, "loss": 42.3037, "step": 3291 }, { "epoch": 78.38208955223881, "grad_norm": 19.516281127929688, "learning_rate": 7.738095238095238e-06, "loss": 42.6299, "step": 3292 }, { "epoch": 78.40597014925373, "grad_norm": 34.26980209350586, "learning_rate": 7.735119047619049e-06, "loss": 43.052, "step": 3293 }, { "epoch": 78.42985074626866, "grad_norm": 32.0604133605957, "learning_rate": 7.732142857142858e-06, "loss": 42.4497, "step": 3294 }, { "epoch": 78.45373134328358, "grad_norm": 23.038795471191406, "learning_rate": 7.729166666666667e-06, "loss": 42.5542, "step": 3295 }, { "epoch": 78.4776119402985, "grad_norm": 27.498064041137695, "learning_rate": 7.726190476190478e-06, "loss": 41.834, "step": 3296 }, { "epoch": 78.50149253731344, "grad_norm": 25.38565444946289, "learning_rate": 7.723214285714287e-06, "loss": 44.7325, "step": 3297 }, { "epoch": 78.52537313432836, "grad_norm": 21.209095001220703, "learning_rate": 7.720238095238096e-06, "loss": 44.6015, "step": 3298 }, { "epoch": 78.54925373134328, "grad_norm": 27.321908950805664, "learning_rate": 7.717261904761905e-06, "loss": 43.3014, "step": 3299 }, { "epoch": 78.57313432835821, "grad_norm": 20.742706298828125, "learning_rate": 7.714285714285716e-06, "loss": 44.1572, "step": 3300 }, { "epoch": 78.59701492537313, "grad_norm": 28.640583038330078, "learning_rate": 7.711309523809525e-06, "loss": 42.1555, "step": 3301 }, { "epoch": 78.62089552238805, "grad_norm": 30.252870559692383, "learning_rate": 7.708333333333334e-06, "loss": 43.4469, "step": 3302 }, { "epoch": 78.64477611940299, "grad_norm": 23.6368350982666, "learning_rate": 7.705357142857145e-06, "loss": 44.3375, "step": 3303 }, { "epoch": 78.66865671641791, "grad_norm": 22.434412002563477, "learning_rate": 7.702380952380954e-06, "loss": 42.8106, "step": 3304 }, { "epoch": 78.69253731343284, "grad_norm": 28.329635620117188, "learning_rate": 7.699404761904763e-06, "loss": 42.8968, "step": 3305 }, { "epoch": 78.71641791044776, "grad_norm": 21.02295684814453, "learning_rate": 7.696428571428572e-06, "loss": 42.1169, "step": 3306 }, { "epoch": 78.74029850746268, "grad_norm": 30.06182861328125, "learning_rate": 7.693452380952382e-06, "loss": 43.0741, "step": 3307 }, { "epoch": 78.7641791044776, "grad_norm": 22.40550994873047, "learning_rate": 7.690476190476191e-06, "loss": 42.8449, "step": 3308 }, { "epoch": 78.78805970149254, "grad_norm": 28.855802536010742, "learning_rate": 7.6875e-06, "loss": 43.0846, "step": 3309 }, { "epoch": 78.81194029850747, "grad_norm": 25.507308959960938, "learning_rate": 7.684523809523811e-06, "loss": 41.954, "step": 3310 }, { "epoch": 78.83582089552239, "grad_norm": 26.092424392700195, "learning_rate": 7.68154761904762e-06, "loss": 42.1684, "step": 3311 }, { "epoch": 78.85970149253731, "grad_norm": 24.099889755249023, "learning_rate": 7.67857142857143e-06, "loss": 43.9231, "step": 3312 }, { "epoch": 78.88358208955223, "grad_norm": 28.72806739807129, "learning_rate": 7.675595238095238e-06, "loss": 42.546, "step": 3313 }, { "epoch": 78.90746268656716, "grad_norm": 26.489227294921875, "learning_rate": 7.672619047619049e-06, "loss": 44.1023, "step": 3314 }, { "epoch": 78.9313432835821, "grad_norm": 29.59152603149414, "learning_rate": 7.669642857142858e-06, "loss": 43.7005, "step": 3315 }, { "epoch": 78.95522388059702, "grad_norm": 23.0878963470459, "learning_rate": 7.666666666666667e-06, "loss": 41.9249, "step": 3316 }, { "epoch": 78.97910447761194, "grad_norm": 29.851896286010742, "learning_rate": 7.663690476190478e-06, "loss": 42.2078, "step": 3317 }, { "epoch": 79.0, "grad_norm": 23.739883422851562, "learning_rate": 7.660714285714287e-06, "loss": 39.1357, "step": 3318 }, { "epoch": 79.02388059701492, "grad_norm": 23.394466400146484, "learning_rate": 7.657738095238096e-06, "loss": 43.7385, "step": 3319 }, { "epoch": 79.04776119402985, "grad_norm": 22.10674285888672, "learning_rate": 7.654761904761905e-06, "loss": 43.253, "step": 3320 }, { "epoch": 79.07164179104478, "grad_norm": 25.71041488647461, "learning_rate": 7.651785714285714e-06, "loss": 43.6012, "step": 3321 }, { "epoch": 79.0955223880597, "grad_norm": 18.054738998413086, "learning_rate": 7.648809523809523e-06, "loss": 42.5356, "step": 3322 }, { "epoch": 79.11940298507463, "grad_norm": 25.66161346435547, "learning_rate": 7.645833333333334e-06, "loss": 43.796, "step": 3323 }, { "epoch": 79.14328358208955, "grad_norm": 15.92872142791748, "learning_rate": 7.642857142857143e-06, "loss": 43.4924, "step": 3324 }, { "epoch": 79.16716417910447, "grad_norm": 26.33378791809082, "learning_rate": 7.639880952380952e-06, "loss": 41.9388, "step": 3325 }, { "epoch": 79.1910447761194, "grad_norm": 18.938690185546875, "learning_rate": 7.636904761904763e-06, "loss": 42.6458, "step": 3326 }, { "epoch": 79.21492537313434, "grad_norm": 21.968505859375, "learning_rate": 7.633928571428572e-06, "loss": 43.2856, "step": 3327 }, { "epoch": 79.23880597014926, "grad_norm": 21.652313232421875, "learning_rate": 7.630952380952381e-06, "loss": 43.1669, "step": 3328 }, { "epoch": 79.26268656716418, "grad_norm": 16.064531326293945, "learning_rate": 7.627976190476191e-06, "loss": 43.09, "step": 3329 }, { "epoch": 79.2865671641791, "grad_norm": 21.19333839416504, "learning_rate": 7.625e-06, "loss": 42.7371, "step": 3330 }, { "epoch": 79.31044776119403, "grad_norm": 19.381980895996094, "learning_rate": 7.62202380952381e-06, "loss": 43.129, "step": 3331 }, { "epoch": 79.33432835820895, "grad_norm": 17.10456085205078, "learning_rate": 7.61904761904762e-06, "loss": 42.795, "step": 3332 }, { "epoch": 79.35820895522389, "grad_norm": 18.57830810546875, "learning_rate": 7.616071428571429e-06, "loss": 43.1172, "step": 3333 }, { "epoch": 79.38208955223881, "grad_norm": 16.343597412109375, "learning_rate": 7.6130952380952386e-06, "loss": 44.1413, "step": 3334 }, { "epoch": 79.40597014925373, "grad_norm": 18.999656677246094, "learning_rate": 7.610119047619048e-06, "loss": 43.154, "step": 3335 }, { "epoch": 79.42985074626866, "grad_norm": 18.70110321044922, "learning_rate": 7.6071428571428575e-06, "loss": 43.2832, "step": 3336 }, { "epoch": 79.45373134328358, "grad_norm": 17.107995986938477, "learning_rate": 7.6041666666666666e-06, "loss": 42.6499, "step": 3337 }, { "epoch": 79.4776119402985, "grad_norm": 20.98540496826172, "learning_rate": 7.6011904761904765e-06, "loss": 42.6728, "step": 3338 }, { "epoch": 79.50149253731344, "grad_norm": 18.264223098754883, "learning_rate": 7.598214285714286e-06, "loss": 42.1924, "step": 3339 }, { "epoch": 79.52537313432836, "grad_norm": 22.478178024291992, "learning_rate": 7.595238095238095e-06, "loss": 43.1835, "step": 3340 }, { "epoch": 79.54925373134328, "grad_norm": 21.464313507080078, "learning_rate": 7.592261904761905e-06, "loss": 42.8992, "step": 3341 }, { "epoch": 79.57313432835821, "grad_norm": 23.627376556396484, "learning_rate": 7.589285714285714e-06, "loss": 43.3444, "step": 3342 }, { "epoch": 79.59701492537313, "grad_norm": 20.699804306030273, "learning_rate": 7.586309523809524e-06, "loss": 43.5294, "step": 3343 }, { "epoch": 79.62089552238805, "grad_norm": 27.1911678314209, "learning_rate": 7.583333333333333e-06, "loss": 42.3842, "step": 3344 }, { "epoch": 79.64477611940299, "grad_norm": 22.591445922851562, "learning_rate": 7.580357142857143e-06, "loss": 43.2132, "step": 3345 }, { "epoch": 79.66865671641791, "grad_norm": 23.79202651977539, "learning_rate": 7.577380952380953e-06, "loss": 42.7603, "step": 3346 }, { "epoch": 79.69253731343284, "grad_norm": 21.520214080810547, "learning_rate": 7.574404761904762e-06, "loss": 42.9868, "step": 3347 }, { "epoch": 79.71641791044776, "grad_norm": 21.92240333557129, "learning_rate": 7.571428571428572e-06, "loss": 42.5148, "step": 3348 }, { "epoch": 79.74029850746268, "grad_norm": 21.808698654174805, "learning_rate": 7.568452380952381e-06, "loss": 42.2734, "step": 3349 }, { "epoch": 79.7641791044776, "grad_norm": 21.703947067260742, "learning_rate": 7.565476190476191e-06, "loss": 43.9589, "step": 3350 }, { "epoch": 79.78805970149254, "grad_norm": 21.56643295288086, "learning_rate": 7.5625e-06, "loss": 42.249, "step": 3351 }, { "epoch": 79.81194029850747, "grad_norm": 20.325498580932617, "learning_rate": 7.55952380952381e-06, "loss": 42.5246, "step": 3352 }, { "epoch": 79.83582089552239, "grad_norm": 20.19651985168457, "learning_rate": 7.55654761904762e-06, "loss": 43.1353, "step": 3353 }, { "epoch": 79.85970149253731, "grad_norm": 15.062832832336426, "learning_rate": 7.553571428571429e-06, "loss": 42.7335, "step": 3354 }, { "epoch": 79.88358208955223, "grad_norm": 21.990650177001953, "learning_rate": 7.550595238095239e-06, "loss": 44.103, "step": 3355 }, { "epoch": 79.90746268656716, "grad_norm": 17.816457748413086, "learning_rate": 7.547619047619048e-06, "loss": 43.592, "step": 3356 }, { "epoch": 79.9313432835821, "grad_norm": 21.62665557861328, "learning_rate": 7.544642857142858e-06, "loss": 44.0372, "step": 3357 }, { "epoch": 79.95522388059702, "grad_norm": 20.444469451904297, "learning_rate": 7.541666666666667e-06, "loss": 42.7547, "step": 3358 }, { "epoch": 79.97910447761194, "grad_norm": 15.230064392089844, "learning_rate": 7.538690476190477e-06, "loss": 42.4287, "step": 3359 }, { "epoch": 80.0, "grad_norm": 18.977619171142578, "learning_rate": 7.5357142857142865e-06, "loss": 36.8674, "step": 3360 }, { "epoch": 80.0, "step": 3360, "total_flos": 1.6516474192825325e+17, "train_loss": 10.921977708453223, "train_runtime": 25778.6818, "train_samples_per_second": 16.609, "train_steps_per_second": 0.13 }, { "epoch": 80.02388059701492, "grad_norm": 20.951553344726562, "learning_rate": 1e-05, "loss": 42.8953, "step": 3361 }, { "epoch": 80.04776119402985, "grad_norm": Infinity, "learning_rate": 9.997354497354498e-06, "loss": 49.2702, "step": 3362 }, { "epoch": 80.07164179104478, "grad_norm": 272.02093505859375, "learning_rate": 9.997354497354498e-06, "loss": 48.7639, "step": 3363 }, { "epoch": 80.0955223880597, "grad_norm": 136.40426635742188, "learning_rate": 9.994708994708996e-06, "loss": 48.2845, "step": 3364 }, { "epoch": 80.11940298507463, "grad_norm": 69.2103500366211, "learning_rate": 9.992063492063493e-06, "loss": 45.905, "step": 3365 }, { "epoch": 80.14328358208955, "grad_norm": 42.27269744873047, "learning_rate": 9.989417989417989e-06, "loss": 44.495, "step": 3366 }, { "epoch": 80.16716417910447, "grad_norm": 78.32905578613281, "learning_rate": 9.986772486772488e-06, "loss": 43.787, "step": 3367 }, { "epoch": 80.1910447761194, "grad_norm": 53.60576248168945, "learning_rate": 9.984126984126986e-06, "loss": 44.9412, "step": 3368 }, { "epoch": 80.21492537313434, "grad_norm": 43.58672332763672, "learning_rate": 9.981481481481482e-06, "loss": 43.5559, "step": 3369 }, { "epoch": 80.23880597014926, "grad_norm": 52.74037170410156, "learning_rate": 9.97883597883598e-06, "loss": 43.7715, "step": 3370 }, { "epoch": 80.26268656716418, "grad_norm": 36.5859260559082, "learning_rate": 9.976190476190477e-06, "loss": 44.8368, "step": 3371 }, { "epoch": 80.2865671641791, "grad_norm": 41.1060676574707, "learning_rate": 9.973544973544974e-06, "loss": 44.2442, "step": 3372 }, { "epoch": 80.31044776119403, "grad_norm": 29.22023582458496, "learning_rate": 9.970899470899472e-06, "loss": 44.9361, "step": 3373 }, { "epoch": 80.33432835820895, "grad_norm": 23.876710891723633, "learning_rate": 9.968253968253969e-06, "loss": 43.0819, "step": 3374 }, { "epoch": 80.35820895522389, "grad_norm": 29.575992584228516, "learning_rate": 9.965608465608467e-06, "loss": 43.4547, "step": 3375 }, { "epoch": 80.38208955223881, "grad_norm": 30.555126190185547, "learning_rate": 9.962962962962964e-06, "loss": 42.7816, "step": 3376 }, { "epoch": 80.40597014925373, "grad_norm": 22.153589248657227, "learning_rate": 9.960317460317462e-06, "loss": 43.225, "step": 3377 }, { "epoch": 80.42985074626866, "grad_norm": 22.4864501953125, "learning_rate": 9.957671957671959e-06, "loss": 44.3476, "step": 3378 }, { "epoch": 80.45373134328358, "grad_norm": 28.664342880249023, "learning_rate": 9.955026455026457e-06, "loss": 43.8263, "step": 3379 }, { "epoch": 80.4776119402985, "grad_norm": 20.183809280395508, "learning_rate": 9.952380952380954e-06, "loss": 43.0054, "step": 3380 }, { "epoch": 80.50149253731344, "grad_norm": 20.122495651245117, "learning_rate": 9.94973544973545e-06, "loss": 42.8467, "step": 3381 }, { "epoch": 80.52537313432836, "grad_norm": 18.21672821044922, "learning_rate": 9.947089947089947e-06, "loss": 43.1002, "step": 3382 }, { "epoch": 80.54925373134328, "grad_norm": 19.279260635375977, "learning_rate": 9.944444444444445e-06, "loss": 43.057, "step": 3383 }, { "epoch": 80.57313432835821, "grad_norm": 16.66730308532715, "learning_rate": 9.941798941798942e-06, "loss": 41.9396, "step": 3384 }, { "epoch": 80.59701492537313, "grad_norm": 23.94289779663086, "learning_rate": 9.93915343915344e-06, "loss": 41.9997, "step": 3385 }, { "epoch": 80.62089552238805, "grad_norm": 19.543209075927734, "learning_rate": 9.936507936507937e-06, "loss": 43.4446, "step": 3386 }, { "epoch": 80.64477611940299, "grad_norm": 16.7114315032959, "learning_rate": 9.933862433862435e-06, "loss": 42.8548, "step": 3387 }, { "epoch": 80.66865671641791, "grad_norm": 14.687740325927734, "learning_rate": 9.931216931216932e-06, "loss": 43.4851, "step": 3388 }, { "epoch": 80.69253731343284, "grad_norm": 20.930234909057617, "learning_rate": 9.92857142857143e-06, "loss": 43.216, "step": 3389 }, { "epoch": 80.71641791044776, "grad_norm": 18.500185012817383, "learning_rate": 9.925925925925927e-06, "loss": 43.32, "step": 3390 }, { "epoch": 80.74029850746268, "grad_norm": 17.255064010620117, "learning_rate": 9.923280423280423e-06, "loss": 41.8527, "step": 3391 }, { "epoch": 80.7641791044776, "grad_norm": 23.286033630371094, "learning_rate": 9.920634920634922e-06, "loss": 42.4732, "step": 3392 }, { "epoch": 80.78805970149254, "grad_norm": 21.66954803466797, "learning_rate": 9.917989417989418e-06, "loss": 43.0689, "step": 3393 }, { "epoch": 80.81194029850747, "grad_norm": 15.510072708129883, "learning_rate": 9.915343915343916e-06, "loss": 42.6028, "step": 3394 }, { "epoch": 80.83582089552239, "grad_norm": 17.338539123535156, "learning_rate": 9.912698412698413e-06, "loss": 43.066, "step": 3395 }, { "epoch": 80.85970149253731, "grad_norm": 28.546316146850586, "learning_rate": 9.91005291005291e-06, "loss": 42.7705, "step": 3396 }, { "epoch": 80.88358208955223, "grad_norm": 21.883974075317383, "learning_rate": 9.907407407407408e-06, "loss": 42.3245, "step": 3397 }, { "epoch": 80.90746268656716, "grad_norm": 23.212677001953125, "learning_rate": 9.904761904761906e-06, "loss": 43.1431, "step": 3398 }, { "epoch": 80.9313432835821, "grad_norm": 19.58159828186035, "learning_rate": 9.902116402116403e-06, "loss": 43.5287, "step": 3399 }, { "epoch": 80.95522388059702, "grad_norm": 26.139862060546875, "learning_rate": 9.8994708994709e-06, "loss": 42.9908, "step": 3400 }, { "epoch": 80.97910447761194, "grad_norm": 16.672977447509766, "learning_rate": 9.896825396825398e-06, "loss": 42.1315, "step": 3401 }, { "epoch": 81.0, "grad_norm": 24.852455139160156, "learning_rate": 9.894179894179896e-06, "loss": 36.8278, "step": 3402 }, { "epoch": 81.02388059701492, "grad_norm": 22.26006317138672, "learning_rate": 9.891534391534391e-06, "loss": 42.4729, "step": 3403 }, { "epoch": 81.04776119402985, "grad_norm": 16.017719268798828, "learning_rate": 9.88888888888889e-06, "loss": 42.9225, "step": 3404 }, { "epoch": 81.07164179104478, "grad_norm": 28.550519943237305, "learning_rate": 9.886243386243386e-06, "loss": 42.6745, "step": 3405 }, { "epoch": 81.0955223880597, "grad_norm": 23.507572174072266, "learning_rate": 9.883597883597884e-06, "loss": 42.0028, "step": 3406 }, { "epoch": 81.11940298507463, "grad_norm": 21.06671905517578, "learning_rate": 9.880952380952381e-06, "loss": 43.0596, "step": 3407 }, { "epoch": 81.14328358208955, "grad_norm": 30.52378273010254, "learning_rate": 9.878306878306879e-06, "loss": 42.6651, "step": 3408 }, { "epoch": 81.16716417910447, "grad_norm": 20.8646183013916, "learning_rate": 9.875661375661376e-06, "loss": 42.5492, "step": 3409 }, { "epoch": 81.1910447761194, "grad_norm": 24.76753044128418, "learning_rate": 9.873015873015874e-06, "loss": 44.1658, "step": 3410 }, { "epoch": 81.21492537313434, "grad_norm": 24.59670066833496, "learning_rate": 9.870370370370371e-06, "loss": 41.993, "step": 3411 }, { "epoch": 81.23880597014926, "grad_norm": 18.1619815826416, "learning_rate": 9.867724867724869e-06, "loss": 41.729, "step": 3412 }, { "epoch": 81.26268656716418, "grad_norm": 25.726171493530273, "learning_rate": 9.865079365079366e-06, "loss": 43.4774, "step": 3413 }, { "epoch": 81.2865671641791, "grad_norm": 19.582408905029297, "learning_rate": 9.862433862433864e-06, "loss": 44.2081, "step": 3414 }, { "epoch": 81.31044776119403, "grad_norm": 19.20425033569336, "learning_rate": 9.85978835978836e-06, "loss": 45.2273, "step": 3415 }, { "epoch": 81.33432835820895, "grad_norm": 24.18745994567871, "learning_rate": 9.857142857142859e-06, "loss": 43.2535, "step": 3416 }, { "epoch": 81.35820895522389, "grad_norm": 20.09618377685547, "learning_rate": 9.854497354497355e-06, "loss": 42.837, "step": 3417 }, { "epoch": 81.38208955223881, "grad_norm": 18.357542037963867, "learning_rate": 9.851851851851852e-06, "loss": 42.3722, "step": 3418 }, { "epoch": 81.40597014925373, "grad_norm": 21.53424644470215, "learning_rate": 9.849206349206351e-06, "loss": 42.6014, "step": 3419 }, { "epoch": 81.42985074626866, "grad_norm": 23.138153076171875, "learning_rate": 9.846560846560847e-06, "loss": 43.1802, "step": 3420 }, { "epoch": 81.45373134328358, "grad_norm": NaN, "learning_rate": 9.843915343915345e-06, "loss": 60.8525, "step": 3421 }, { "epoch": 81.4776119402985, "grad_norm": 16.697940826416016, "learning_rate": 9.843915343915345e-06, "loss": 42.6524, "step": 3422 }, { "epoch": 81.50149253731344, "grad_norm": 21.829591751098633, "learning_rate": 9.841269841269842e-06, "loss": 42.8111, "step": 3423 }, { "epoch": 81.52537313432836, "grad_norm": 24.891218185424805, "learning_rate": 9.83862433862434e-06, "loss": 43.6078, "step": 3424 }, { "epoch": 81.54925373134328, "grad_norm": 21.53104019165039, "learning_rate": 9.835978835978837e-06, "loss": 42.8522, "step": 3425 }, { "epoch": 81.57313432835821, "grad_norm": 24.85852813720703, "learning_rate": 9.833333333333333e-06, "loss": 42.5736, "step": 3426 }, { "epoch": 81.59701492537313, "grad_norm": 25.954561233520508, "learning_rate": 9.830687830687832e-06, "loss": 42.513, "step": 3427 }, { "epoch": 81.62089552238805, "grad_norm": 18.79954719543457, "learning_rate": 9.828042328042328e-06, "loss": 42.4569, "step": 3428 }, { "epoch": 81.64477611940299, "grad_norm": 21.777231216430664, "learning_rate": 9.825396825396825e-06, "loss": 41.9235, "step": 3429 }, { "epoch": 81.66865671641791, "grad_norm": 20.84613037109375, "learning_rate": 9.822751322751325e-06, "loss": 43.7221, "step": 3430 }, { "epoch": 81.69253731343284, "grad_norm": 25.095165252685547, "learning_rate": 9.82010582010582e-06, "loss": 43.7676, "step": 3431 }, { "epoch": 81.71641791044776, "grad_norm": 20.732393264770508, "learning_rate": 9.817460317460318e-06, "loss": 42.3845, "step": 3432 }, { "epoch": 81.74029850746268, "grad_norm": NaN, "learning_rate": 9.814814814814815e-06, "loss": 42.1237, "step": 3433 }, { "epoch": 81.7641791044776, "grad_norm": 33.96809768676758, "learning_rate": 9.814814814814815e-06, "loss": 43.6781, "step": 3434 }, { "epoch": 81.78805970149254, "grad_norm": 20.83742904663086, "learning_rate": 9.812169312169313e-06, "loss": 43.1676, "step": 3435 }, { "epoch": 81.81194029850747, "grad_norm": 37.817081451416016, "learning_rate": 9.80952380952381e-06, "loss": 42.5989, "step": 3436 }, { "epoch": 81.83582089552239, "grad_norm": 26.07498550415039, "learning_rate": 9.806878306878308e-06, "loss": 43.613, "step": 3437 }, { "epoch": 81.85970149253731, "grad_norm": 32.35169982910156, "learning_rate": 9.804232804232805e-06, "loss": 43.4166, "step": 3438 }, { "epoch": 81.88358208955223, "grad_norm": 23.49301528930664, "learning_rate": 9.801587301587301e-06, "loss": 40.9932, "step": 3439 }, { "epoch": 81.90746268656716, "grad_norm": 28.475976943969727, "learning_rate": 9.7989417989418e-06, "loss": 44.0779, "step": 3440 }, { "epoch": 81.9313432835821, "grad_norm": 20.77143669128418, "learning_rate": 9.796296296296298e-06, "loss": 43.0358, "step": 3441 }, { "epoch": 81.95522388059702, "grad_norm": 27.558744430541992, "learning_rate": 9.793650793650794e-06, "loss": 42.6501, "step": 3442 }, { "epoch": 81.97910447761194, "grad_norm": 17.57852554321289, "learning_rate": 9.791005291005293e-06, "loss": 43.0594, "step": 3443 }, { "epoch": 82.0, "grad_norm": 30.414134979248047, "learning_rate": 9.788359788359789e-06, "loss": 37.7772, "step": 3444 }, { "epoch": 82.02388059701492, "grad_norm": 29.184572219848633, "learning_rate": 9.785714285714286e-06, "loss": 43.0878, "step": 3445 }, { "epoch": 82.04776119402985, "grad_norm": 24.36541748046875, "learning_rate": 9.783068783068784e-06, "loss": 43.1851, "step": 3446 }, { "epoch": 82.07164179104478, "grad_norm": 24.232807159423828, "learning_rate": 9.780423280423281e-06, "loss": 43.4104, "step": 3447 }, { "epoch": 82.0955223880597, "grad_norm": 29.002002716064453, "learning_rate": 9.777777777777779e-06, "loss": 44.6274, "step": 3448 }, { "epoch": 82.11940298507463, "grad_norm": 22.997961044311523, "learning_rate": 9.775132275132276e-06, "loss": 43.2128, "step": 3449 }, { "epoch": 82.14328358208955, "grad_norm": 26.34942626953125, "learning_rate": 9.772486772486774e-06, "loss": 42.6116, "step": 3450 }, { "epoch": 82.16716417910447, "grad_norm": 19.555774688720703, "learning_rate": 9.769841269841271e-06, "loss": 42.9207, "step": 3451 }, { "epoch": 82.1910447761194, "grad_norm": 25.108083724975586, "learning_rate": 9.767195767195769e-06, "loss": 41.7188, "step": 3452 }, { "epoch": 82.21492537313434, "grad_norm": 20.387653350830078, "learning_rate": 9.764550264550266e-06, "loss": 42.8712, "step": 3453 }, { "epoch": 82.23880597014926, "grad_norm": 24.493921279907227, "learning_rate": 9.761904761904762e-06, "loss": 43.6475, "step": 3454 }, { "epoch": 82.26268656716418, "grad_norm": 23.366165161132812, "learning_rate": 9.759259259259261e-06, "loss": 42.5025, "step": 3455 }, { "epoch": 82.2865671641791, "grad_norm": 25.831466674804688, "learning_rate": 9.756613756613757e-06, "loss": 44.1183, "step": 3456 }, { "epoch": 82.31044776119403, "grad_norm": 20.5382137298584, "learning_rate": 9.753968253968254e-06, "loss": 42.0874, "step": 3457 }, { "epoch": 82.33432835820895, "grad_norm": 23.923063278198242, "learning_rate": 9.751322751322752e-06, "loss": 44.2198, "step": 3458 }, { "epoch": 82.35820895522389, "grad_norm": 21.77039909362793, "learning_rate": 9.74867724867725e-06, "loss": 42.8486, "step": 3459 }, { "epoch": 82.38208955223881, "grad_norm": 19.86173439025879, "learning_rate": 9.746031746031747e-06, "loss": 43.104, "step": 3460 }, { "epoch": 82.40597014925373, "grad_norm": 20.714754104614258, "learning_rate": 9.743386243386244e-06, "loss": 41.789, "step": 3461 }, { "epoch": 82.42985074626866, "grad_norm": 24.748607635498047, "learning_rate": 9.740740740740742e-06, "loss": 41.7835, "step": 3462 }, { "epoch": 82.45373134328358, "grad_norm": 19.247220993041992, "learning_rate": 9.73809523809524e-06, "loss": 42.3253, "step": 3463 }, { "epoch": 82.4776119402985, "grad_norm": 21.964488983154297, "learning_rate": 9.735449735449735e-06, "loss": 40.6579, "step": 3464 }, { "epoch": 82.50149253731344, "grad_norm": 19.75965118408203, "learning_rate": 9.732804232804234e-06, "loss": 42.2777, "step": 3465 }, { "epoch": 82.52537313432836, "grad_norm": 19.871715545654297, "learning_rate": 9.73015873015873e-06, "loss": 41.8654, "step": 3466 }, { "epoch": 82.54925373134328, "grad_norm": 17.353679656982422, "learning_rate": 9.727513227513228e-06, "loss": 43.1572, "step": 3467 }, { "epoch": 82.57313432835821, "grad_norm": 22.952226638793945, "learning_rate": 9.724867724867725e-06, "loss": 42.2348, "step": 3468 }, { "epoch": 82.59701492537313, "grad_norm": 19.62160873413086, "learning_rate": 9.722222222222223e-06, "loss": 43.7133, "step": 3469 }, { "epoch": 82.62089552238805, "grad_norm": NaN, "learning_rate": 9.71957671957672e-06, "loss": 44.3913, "step": 3470 }, { "epoch": 82.64477611940299, "grad_norm": 22.301387786865234, "learning_rate": 9.71957671957672e-06, "loss": 42.7776, "step": 3471 }, { "epoch": 82.66865671641791, "grad_norm": 23.42523956298828, "learning_rate": 9.716931216931218e-06, "loss": 43.9875, "step": 3472 }, { "epoch": 82.69253731343284, "grad_norm": 19.187870025634766, "learning_rate": 9.714285714285715e-06, "loss": 43.6333, "step": 3473 }, { "epoch": 82.71641791044776, "grad_norm": 17.408340454101562, "learning_rate": 9.711640211640213e-06, "loss": 42.3257, "step": 3474 }, { "epoch": 82.74029850746268, "grad_norm": 17.102418899536133, "learning_rate": 9.70899470899471e-06, "loss": 41.7486, "step": 3475 }, { "epoch": 82.7641791044776, "grad_norm": 17.715524673461914, "learning_rate": 9.706349206349208e-06, "loss": 43.9781, "step": 3476 }, { "epoch": 82.78805970149254, "grad_norm": 22.915067672729492, "learning_rate": 9.703703703703703e-06, "loss": 43.0049, "step": 3477 }, { "epoch": 82.81194029850747, "grad_norm": 18.104154586791992, "learning_rate": 9.701058201058203e-06, "loss": 43.0062, "step": 3478 }, { "epoch": 82.83582089552239, "grad_norm": 14.81946086883545, "learning_rate": 9.698412698412698e-06, "loss": 42.0968, "step": 3479 }, { "epoch": 82.85970149253731, "grad_norm": 19.58578872680664, "learning_rate": 9.695767195767196e-06, "loss": 43.6563, "step": 3480 }, { "epoch": 82.88358208955223, "grad_norm": 17.979524612426758, "learning_rate": 9.693121693121693e-06, "loss": 41.9954, "step": 3481 }, { "epoch": 82.90746268656716, "grad_norm": 17.92389488220215, "learning_rate": 9.690476190476191e-06, "loss": 42.0242, "step": 3482 }, { "epoch": 82.9313432835821, "grad_norm": 22.026195526123047, "learning_rate": 9.687830687830688e-06, "loss": 43.2985, "step": 3483 }, { "epoch": 82.95522388059702, "grad_norm": 15.080731391906738, "learning_rate": 9.685185185185186e-06, "loss": 42.8814, "step": 3484 }, { "epoch": 82.97910447761194, "grad_norm": 23.170284271240234, "learning_rate": 9.682539682539683e-06, "loss": 42.4875, "step": 3485 }, { "epoch": 83.0, "grad_norm": 15.19926929473877, "learning_rate": 9.679894179894181e-06, "loss": 38.3047, "step": 3486 }, { "epoch": 83.02388059701492, "grad_norm": 20.842618942260742, "learning_rate": 9.677248677248678e-06, "loss": 41.9214, "step": 3487 }, { "epoch": 83.04776119402985, "grad_norm": 19.11284637451172, "learning_rate": 9.674603174603176e-06, "loss": 43.2375, "step": 3488 }, { "epoch": 83.07164179104478, "grad_norm": 19.39193344116211, "learning_rate": 9.671957671957672e-06, "loss": 43.5418, "step": 3489 }, { "epoch": 83.0955223880597, "grad_norm": 19.154869079589844, "learning_rate": 9.669312169312171e-06, "loss": 42.4917, "step": 3490 }, { "epoch": 83.11940298507463, "grad_norm": 27.682418823242188, "learning_rate": 9.666666666666667e-06, "loss": 43.22, "step": 3491 }, { "epoch": 83.14328358208955, "grad_norm": 19.741304397583008, "learning_rate": 9.664021164021164e-06, "loss": 42.6503, "step": 3492 }, { "epoch": 83.16716417910447, "grad_norm": 23.25188446044922, "learning_rate": 9.661375661375663e-06, "loss": 42.7449, "step": 3493 }, { "epoch": 83.1910447761194, "grad_norm": 25.500925064086914, "learning_rate": 9.65873015873016e-06, "loss": 43.8239, "step": 3494 }, { "epoch": 83.21492537313434, "grad_norm": 22.653488159179688, "learning_rate": 9.656084656084657e-06, "loss": 42.4962, "step": 3495 }, { "epoch": 83.23880597014926, "grad_norm": 21.660871505737305, "learning_rate": 9.653439153439154e-06, "loss": 44.1403, "step": 3496 }, { "epoch": 83.26268656716418, "grad_norm": 24.922666549682617, "learning_rate": 9.650793650793652e-06, "loss": 42.4295, "step": 3497 }, { "epoch": 83.2865671641791, "grad_norm": 20.24859619140625, "learning_rate": 9.64814814814815e-06, "loss": 41.7125, "step": 3498 }, { "epoch": 83.31044776119403, "grad_norm": 16.770278930664062, "learning_rate": 9.645502645502647e-06, "loss": 43.0386, "step": 3499 }, { "epoch": 83.33432835820895, "grad_norm": 20.553585052490234, "learning_rate": 9.642857142857144e-06, "loss": 43.2005, "step": 3500 }, { "epoch": 83.35820895522389, "grad_norm": 22.309749603271484, "learning_rate": 9.64021164021164e-06, "loss": 43.9736, "step": 3501 }, { "epoch": 83.38208955223881, "grad_norm": 16.99924659729004, "learning_rate": 9.63756613756614e-06, "loss": 42.9804, "step": 3502 }, { "epoch": 83.40597014925373, "grad_norm": 17.541120529174805, "learning_rate": 9.634920634920637e-06, "loss": 41.9332, "step": 3503 }, { "epoch": 83.42985074626866, "grad_norm": 19.222923278808594, "learning_rate": 9.632275132275132e-06, "loss": 43.163, "step": 3504 }, { "epoch": 83.45373134328358, "grad_norm": 23.178749084472656, "learning_rate": 9.62962962962963e-06, "loss": 41.4791, "step": 3505 }, { "epoch": 83.4776119402985, "grad_norm": 24.103410720825195, "learning_rate": 9.626984126984127e-06, "loss": 43.5107, "step": 3506 }, { "epoch": 83.50149253731344, "grad_norm": 16.439075469970703, "learning_rate": 9.624338624338625e-06, "loss": 43.6286, "step": 3507 }, { "epoch": 83.52537313432836, "grad_norm": 29.148473739624023, "learning_rate": 9.621693121693122e-06, "loss": 44.0076, "step": 3508 }, { "epoch": 83.54925373134328, "grad_norm": 23.33673667907715, "learning_rate": 9.61904761904762e-06, "loss": 42.0299, "step": 3509 }, { "epoch": 83.57313432835821, "grad_norm": 20.69951820373535, "learning_rate": 9.616402116402117e-06, "loss": 41.9305, "step": 3510 }, { "epoch": 83.59701492537313, "grad_norm": 28.55817413330078, "learning_rate": 9.613756613756613e-06, "loss": 42.112, "step": 3511 }, { "epoch": 83.62089552238805, "grad_norm": 20.63089942932129, "learning_rate": 9.611111111111112e-06, "loss": 42.5737, "step": 3512 }, { "epoch": 83.64477611940299, "grad_norm": 18.186328887939453, "learning_rate": 9.60846560846561e-06, "loss": 42.6654, "step": 3513 }, { "epoch": 83.66865671641791, "grad_norm": 30.312583923339844, "learning_rate": 9.605820105820106e-06, "loss": 41.6198, "step": 3514 }, { "epoch": 83.69253731343284, "grad_norm": 22.397600173950195, "learning_rate": 9.603174603174605e-06, "loss": 43.7027, "step": 3515 }, { "epoch": 83.71641791044776, "grad_norm": 22.637603759765625, "learning_rate": 9.6005291005291e-06, "loss": 43.3998, "step": 3516 }, { "epoch": 83.74029850746268, "grad_norm": 24.366125106811523, "learning_rate": 9.597883597883598e-06, "loss": 42.6906, "step": 3517 }, { "epoch": 83.7641791044776, "grad_norm": 21.425613403320312, "learning_rate": 9.595238095238096e-06, "loss": 42.7173, "step": 3518 }, { "epoch": 83.78805970149254, "grad_norm": 18.075485229492188, "learning_rate": 9.592592592592593e-06, "loss": 42.9601, "step": 3519 }, { "epoch": 83.81194029850747, "grad_norm": 19.24690818786621, "learning_rate": 9.58994708994709e-06, "loss": 41.9579, "step": 3520 }, { "epoch": 83.83582089552239, "grad_norm": 21.10234832763672, "learning_rate": 9.587301587301588e-06, "loss": 42.6078, "step": 3521 }, { "epoch": 83.85970149253731, "grad_norm": 21.595741271972656, "learning_rate": 9.584656084656086e-06, "loss": 43.3926, "step": 3522 }, { "epoch": 83.88358208955223, "grad_norm": 14.618033409118652, "learning_rate": 9.582010582010583e-06, "loss": 42.7237, "step": 3523 }, { "epoch": 83.90746268656716, "grad_norm": 18.805774688720703, "learning_rate": 9.57936507936508e-06, "loss": 43.6884, "step": 3524 }, { "epoch": 83.9313432835821, "grad_norm": 17.666229248046875, "learning_rate": 9.576719576719578e-06, "loss": 43.3069, "step": 3525 }, { "epoch": 83.95522388059702, "grad_norm": NaN, "learning_rate": 9.574074074074074e-06, "loss": 47.9701, "step": 3526 }, { "epoch": 83.97910447761194, "grad_norm": 18.41876792907715, "learning_rate": 9.574074074074074e-06, "loss": 42.0814, "step": 3527 }, { "epoch": 84.0, "grad_norm": 14.344976425170898, "learning_rate": 9.571428571428573e-06, "loss": 36.702, "step": 3528 }, { "epoch": 84.02388059701492, "grad_norm": 19.47123146057129, "learning_rate": 9.568783068783069e-06, "loss": 43.0682, "step": 3529 }, { "epoch": 84.04776119402985, "grad_norm": 18.708087921142578, "learning_rate": 9.566137566137567e-06, "loss": 43.4093, "step": 3530 }, { "epoch": 84.07164179104478, "grad_norm": 21.98741340637207, "learning_rate": 9.563492063492064e-06, "loss": 42.619, "step": 3531 }, { "epoch": 84.0955223880597, "grad_norm": 21.4478816986084, "learning_rate": 9.560846560846561e-06, "loss": 43.3145, "step": 3532 }, { "epoch": 84.11940298507463, "grad_norm": 21.093963623046875, "learning_rate": 9.558201058201059e-06, "loss": 43.1162, "step": 3533 }, { "epoch": 84.14328358208955, "grad_norm": 18.37552833557129, "learning_rate": 9.555555555555556e-06, "loss": 42.4734, "step": 3534 }, { "epoch": 84.16716417910447, "grad_norm": 13.956351280212402, "learning_rate": 9.552910052910054e-06, "loss": 42.4351, "step": 3535 }, { "epoch": 84.1910447761194, "grad_norm": 20.104270935058594, "learning_rate": 9.550264550264551e-06, "loss": 43.2507, "step": 3536 }, { "epoch": 84.21492537313434, "grad_norm": 20.69384002685547, "learning_rate": 9.547619047619049e-06, "loss": 42.8764, "step": 3537 }, { "epoch": 84.23880597014926, "grad_norm": 26.53329086303711, "learning_rate": 9.544973544973546e-06, "loss": 42.4139, "step": 3538 }, { "epoch": 84.26268656716418, "grad_norm": 11.859530448913574, "learning_rate": 9.542328042328042e-06, "loss": 42.4525, "step": 3539 }, { "epoch": 84.2865671641791, "grad_norm": 22.784093856811523, "learning_rate": 9.539682539682541e-06, "loss": 42.6754, "step": 3540 }, { "epoch": 84.31044776119403, "grad_norm": 22.02342987060547, "learning_rate": 9.537037037037037e-06, "loss": 42.8119, "step": 3541 }, { "epoch": 84.33432835820895, "grad_norm": 16.383922576904297, "learning_rate": 9.534391534391535e-06, "loss": 41.9982, "step": 3542 }, { "epoch": 84.35820895522389, "grad_norm": 18.745128631591797, "learning_rate": 9.531746031746032e-06, "loss": 43.0496, "step": 3543 }, { "epoch": 84.38208955223881, "grad_norm": 33.664764404296875, "learning_rate": 9.52910052910053e-06, "loss": 42.5116, "step": 3544 }, { "epoch": 84.40597014925373, "grad_norm": 18.74268341064453, "learning_rate": 9.526455026455027e-06, "loss": 43.642, "step": 3545 }, { "epoch": 84.42985074626866, "grad_norm": 30.136598587036133, "learning_rate": 9.523809523809525e-06, "loss": 42.8695, "step": 3546 }, { "epoch": 84.45373134328358, "grad_norm": 22.268802642822266, "learning_rate": 9.521164021164022e-06, "loss": 42.6697, "step": 3547 }, { "epoch": 84.4776119402985, "grad_norm": 22.149734497070312, "learning_rate": 9.51851851851852e-06, "loss": 43.0171, "step": 3548 }, { "epoch": 84.50149253731344, "grad_norm": 33.4512825012207, "learning_rate": 9.515873015873016e-06, "loss": 43.2588, "step": 3549 }, { "epoch": 84.52537313432836, "grad_norm": 22.5905704498291, "learning_rate": 9.513227513227515e-06, "loss": 43.2581, "step": 3550 }, { "epoch": 84.54925373134328, "grad_norm": 38.85606384277344, "learning_rate": 9.51058201058201e-06, "loss": 42.2418, "step": 3551 }, { "epoch": 84.57313432835821, "grad_norm": 27.77952003479004, "learning_rate": 9.507936507936508e-06, "loss": 43.4077, "step": 3552 }, { "epoch": 84.59701492537313, "grad_norm": 46.536651611328125, "learning_rate": 9.505291005291006e-06, "loss": 42.1365, "step": 3553 }, { "epoch": 84.62089552238805, "grad_norm": 32.448482513427734, "learning_rate": 9.502645502645503e-06, "loss": 43.362, "step": 3554 }, { "epoch": 84.64477611940299, "grad_norm": 43.40568161010742, "learning_rate": 9.5e-06, "loss": 42.4134, "step": 3555 }, { "epoch": 84.66865671641791, "grad_norm": 44.625125885009766, "learning_rate": 9.497354497354498e-06, "loss": 42.3841, "step": 3556 }, { "epoch": 84.69253731343284, "grad_norm": 30.825876235961914, "learning_rate": 9.494708994708996e-06, "loss": 43.0973, "step": 3557 }, { "epoch": 84.71641791044776, "grad_norm": 32.886775970458984, "learning_rate": 9.492063492063493e-06, "loss": 42.6478, "step": 3558 }, { "epoch": 84.74029850746268, "grad_norm": 35.800621032714844, "learning_rate": 9.48941798941799e-06, "loss": 42.1319, "step": 3559 }, { "epoch": 84.7641791044776, "grad_norm": 27.23737907409668, "learning_rate": 9.486772486772488e-06, "loss": 42.0883, "step": 3560 }, { "epoch": 84.78805970149254, "grad_norm": 40.162166595458984, "learning_rate": 9.484126984126984e-06, "loss": 42.5786, "step": 3561 }, { "epoch": 84.81194029850747, "grad_norm": 32.1665153503418, "learning_rate": 9.481481481481483e-06, "loss": 41.7711, "step": 3562 }, { "epoch": 84.83582089552239, "grad_norm": 34.32803726196289, "learning_rate": 9.478835978835979e-06, "loss": 43.3193, "step": 3563 }, { "epoch": 84.85970149253731, "grad_norm": 34.155452728271484, "learning_rate": 9.476190476190476e-06, "loss": 43.2305, "step": 3564 }, { "epoch": 84.88358208955223, "grad_norm": 31.642534255981445, "learning_rate": 9.473544973544975e-06, "loss": 44.1911, "step": 3565 }, { "epoch": 84.90746268656716, "grad_norm": 33.413291931152344, "learning_rate": 9.470899470899471e-06, "loss": 41.0447, "step": 3566 }, { "epoch": 84.9313432835821, "grad_norm": 35.05025100708008, "learning_rate": 9.468253968253969e-06, "loss": 43.3446, "step": 3567 }, { "epoch": 84.95522388059702, "grad_norm": 28.946184158325195, "learning_rate": 9.465608465608466e-06, "loss": 42.4865, "step": 3568 }, { "epoch": 84.97910447761194, "grad_norm": 38.28304672241211, "learning_rate": 9.462962962962964e-06, "loss": 42.6287, "step": 3569 }, { "epoch": 85.0, "grad_norm": 27.42157745361328, "learning_rate": 9.460317460317461e-06, "loss": 37.3853, "step": 3570 }, { "epoch": 85.02388059701492, "grad_norm": 40.84228515625, "learning_rate": 9.457671957671959e-06, "loss": 43.8201, "step": 3571 }, { "epoch": 85.04776119402985, "grad_norm": 36.39906692504883, "learning_rate": 9.455026455026456e-06, "loss": 41.5254, "step": 3572 }, { "epoch": 85.07164179104478, "grad_norm": 29.795923233032227, "learning_rate": 9.452380952380952e-06, "loss": 42.4395, "step": 3573 }, { "epoch": 85.0955223880597, "grad_norm": 25.486753463745117, "learning_rate": 9.449735449735451e-06, "loss": 42.8189, "step": 3574 }, { "epoch": 85.11940298507463, "grad_norm": 37.790260314941406, "learning_rate": 9.447089947089949e-06, "loss": 42.8718, "step": 3575 }, { "epoch": 85.14328358208955, "grad_norm": 29.528602600097656, "learning_rate": 9.444444444444445e-06, "loss": 43.3714, "step": 3576 }, { "epoch": 85.16716417910447, "grad_norm": 37.71443176269531, "learning_rate": 9.441798941798944e-06, "loss": 42.4381, "step": 3577 }, { "epoch": 85.1910447761194, "grad_norm": 36.625, "learning_rate": 9.43915343915344e-06, "loss": 41.7229, "step": 3578 }, { "epoch": 85.21492537313434, "grad_norm": 30.335342407226562, "learning_rate": 9.436507936507937e-06, "loss": 41.8887, "step": 3579 }, { "epoch": 85.23880597014926, "grad_norm": 24.375329971313477, "learning_rate": 9.433862433862435e-06, "loss": 42.6894, "step": 3580 }, { "epoch": 85.26268656716418, "grad_norm": 34.27681350708008, "learning_rate": 9.431216931216932e-06, "loss": 42.4825, "step": 3581 }, { "epoch": 85.2865671641791, "grad_norm": 27.515474319458008, "learning_rate": 9.42857142857143e-06, "loss": 41.3011, "step": 3582 }, { "epoch": 85.31044776119403, "grad_norm": 39.355350494384766, "learning_rate": 9.425925925925925e-06, "loss": 42.1456, "step": 3583 }, { "epoch": 85.33432835820895, "grad_norm": 34.957523345947266, "learning_rate": 9.423280423280425e-06, "loss": 42.9466, "step": 3584 }, { "epoch": 85.35820895522389, "grad_norm": 30.264474868774414, "learning_rate": 9.420634920634922e-06, "loss": 42.5819, "step": 3585 }, { "epoch": 85.38208955223881, "grad_norm": 27.88845443725586, "learning_rate": 9.417989417989418e-06, "loss": 41.4037, "step": 3586 }, { "epoch": 85.40597014925373, "grad_norm": 30.240957260131836, "learning_rate": 9.415343915343917e-06, "loss": 44.7681, "step": 3587 }, { "epoch": 85.42985074626866, "grad_norm": 23.867399215698242, "learning_rate": 9.412698412698413e-06, "loss": 41.2699, "step": 3588 }, { "epoch": 85.45373134328358, "grad_norm": 39.2992057800293, "learning_rate": 9.41005291005291e-06, "loss": 42.2639, "step": 3589 }, { "epoch": 85.4776119402985, "grad_norm": 32.746673583984375, "learning_rate": 9.407407407407408e-06, "loss": 43.3612, "step": 3590 }, { "epoch": 85.50149253731344, "grad_norm": 33.791748046875, "learning_rate": 9.404761904761905e-06, "loss": 43.1554, "step": 3591 }, { "epoch": 85.52537313432836, "grad_norm": 35.11564254760742, "learning_rate": 9.402116402116403e-06, "loss": 43.4265, "step": 3592 }, { "epoch": 85.54925373134328, "grad_norm": 27.411352157592773, "learning_rate": 9.3994708994709e-06, "loss": 42.7959, "step": 3593 }, { "epoch": 85.57313432835821, "grad_norm": 27.369596481323242, "learning_rate": 9.396825396825398e-06, "loss": 44.1557, "step": 3594 }, { "epoch": 85.59701492537313, "grad_norm": 30.399707794189453, "learning_rate": 9.394179894179895e-06, "loss": 42.5034, "step": 3595 }, { "epoch": 85.62089552238805, "grad_norm": 24.180538177490234, "learning_rate": 9.391534391534393e-06, "loss": 42.256, "step": 3596 }, { "epoch": 85.64477611940299, "grad_norm": 35.2861328125, "learning_rate": 9.38888888888889e-06, "loss": 43.6244, "step": 3597 }, { "epoch": 85.66865671641791, "grad_norm": 28.855852127075195, "learning_rate": 9.386243386243386e-06, "loss": 43.046, "step": 3598 }, { "epoch": 85.69253731343284, "grad_norm": 33.648170471191406, "learning_rate": 9.383597883597885e-06, "loss": 42.0113, "step": 3599 }, { "epoch": 85.71641791044776, "grad_norm": 30.42345428466797, "learning_rate": 9.380952380952381e-06, "loss": 42.4, "step": 3600 }, { "epoch": 85.74029850746268, "grad_norm": 34.80357360839844, "learning_rate": 9.378306878306879e-06, "loss": 41.5381, "step": 3601 }, { "epoch": 85.7641791044776, "grad_norm": 29.07464027404785, "learning_rate": 9.375661375661376e-06, "loss": 43.5597, "step": 3602 }, { "epoch": 85.78805970149254, "grad_norm": 35.02674865722656, "learning_rate": 9.373015873015874e-06, "loss": 42.0479, "step": 3603 }, { "epoch": 85.81194029850747, "grad_norm": 29.9696044921875, "learning_rate": 9.370370370370371e-06, "loss": 42.6829, "step": 3604 }, { "epoch": 85.83582089552239, "grad_norm": 31.754671096801758, "learning_rate": 9.367724867724869e-06, "loss": 42.824, "step": 3605 }, { "epoch": 85.85970149253731, "grad_norm": 30.765913009643555, "learning_rate": 9.365079365079366e-06, "loss": 42.8912, "step": 3606 }, { "epoch": 85.88358208955223, "grad_norm": 33.485015869140625, "learning_rate": 9.362433862433864e-06, "loss": 42.7802, "step": 3607 }, { "epoch": 85.90746268656716, "grad_norm": 27.535614013671875, "learning_rate": 9.359788359788361e-06, "loss": 43.0182, "step": 3608 }, { "epoch": 85.9313432835821, "grad_norm": 28.8901309967041, "learning_rate": 9.357142857142859e-06, "loss": 43.1223, "step": 3609 }, { "epoch": 85.95522388059702, "grad_norm": 27.991809844970703, "learning_rate": 9.354497354497354e-06, "loss": 43.4701, "step": 3610 }, { "epoch": 85.97910447761194, "grad_norm": 33.3857536315918, "learning_rate": 9.351851851851854e-06, "loss": 43.115, "step": 3611 }, { "epoch": 86.0, "grad_norm": 25.59542465209961, "learning_rate": 9.34920634920635e-06, "loss": 37.2275, "step": 3612 }, { "epoch": 86.02388059701492, "grad_norm": 32.959251403808594, "learning_rate": 9.346560846560847e-06, "loss": 41.951, "step": 3613 }, { "epoch": 86.04776119402985, "grad_norm": 26.431245803833008, "learning_rate": 9.343915343915344e-06, "loss": 40.553, "step": 3614 }, { "epoch": 86.07164179104478, "grad_norm": 31.518957138061523, "learning_rate": 9.341269841269842e-06, "loss": 42.7644, "step": 3615 }, { "epoch": 86.0955223880597, "grad_norm": 30.063220977783203, "learning_rate": 9.33862433862434e-06, "loss": 41.5891, "step": 3616 }, { "epoch": 86.11940298507463, "grad_norm": 32.649227142333984, "learning_rate": 9.335978835978837e-06, "loss": 43.2927, "step": 3617 }, { "epoch": 86.14328358208955, "grad_norm": 28.61098861694336, "learning_rate": 9.333333333333334e-06, "loss": 42.4467, "step": 3618 }, { "epoch": 86.16716417910447, "grad_norm": 30.715784072875977, "learning_rate": 9.330687830687832e-06, "loss": 41.0933, "step": 3619 }, { "epoch": 86.1910447761194, "grad_norm": 27.99184799194336, "learning_rate": 9.32804232804233e-06, "loss": 42.218, "step": 3620 }, { "epoch": 86.21492537313434, "grad_norm": 32.13215255737305, "learning_rate": 9.325396825396827e-06, "loss": 42.4495, "step": 3621 }, { "epoch": 86.23880597014926, "grad_norm": 28.051559448242188, "learning_rate": 9.322751322751323e-06, "loss": 43.2451, "step": 3622 }, { "epoch": 86.26268656716418, "grad_norm": 33.464115142822266, "learning_rate": 9.32010582010582e-06, "loss": 43.6584, "step": 3623 }, { "epoch": 86.2865671641791, "grad_norm": 28.151817321777344, "learning_rate": 9.317460317460318e-06, "loss": 42.722, "step": 3624 }, { "epoch": 86.31044776119403, "grad_norm": NaN, "learning_rate": 9.314814814814815e-06, "loss": 37.1565, "step": 3625 }, { "epoch": 86.33432835820895, "grad_norm": 29.83131217956543, "learning_rate": 9.314814814814815e-06, "loss": 43.1632, "step": 3626 }, { "epoch": 86.35820895522389, "grad_norm": 24.83383560180664, "learning_rate": 9.312169312169313e-06, "loss": 43.5542, "step": 3627 }, { "epoch": 86.38208955223881, "grad_norm": 33.05693817138672, "learning_rate": 9.30952380952381e-06, "loss": 42.7432, "step": 3628 }, { "epoch": 86.40597014925373, "grad_norm": 24.293209075927734, "learning_rate": 9.306878306878308e-06, "loss": 42.9506, "step": 3629 }, { "epoch": 86.42985074626866, "grad_norm": 33.47346496582031, "learning_rate": 9.304232804232805e-06, "loss": 42.2723, "step": 3630 }, { "epoch": 86.45373134328358, "grad_norm": 28.47313117980957, "learning_rate": 9.301587301587303e-06, "loss": 43.7464, "step": 3631 }, { "epoch": 86.4776119402985, "grad_norm": 32.237510681152344, "learning_rate": 9.2989417989418e-06, "loss": 42.8704, "step": 3632 }, { "epoch": 86.50149253731344, "grad_norm": 25.842601776123047, "learning_rate": 9.296296296296296e-06, "loss": 41.6084, "step": 3633 }, { "epoch": 86.52537313432836, "grad_norm": 27.513307571411133, "learning_rate": 9.293650793650795e-06, "loss": 42.7922, "step": 3634 }, { "epoch": 86.54925373134328, "grad_norm": 25.676212310791016, "learning_rate": 9.291005291005291e-06, "loss": 42.0415, "step": 3635 }, { "epoch": 86.57313432835821, "grad_norm": 29.911081314086914, "learning_rate": 9.288359788359788e-06, "loss": 43.0526, "step": 3636 }, { "epoch": 86.59701492537313, "grad_norm": 21.788707733154297, "learning_rate": 9.285714285714288e-06, "loss": 42.2228, "step": 3637 }, { "epoch": 86.62089552238805, "grad_norm": 34.92530822753906, "learning_rate": 9.283068783068783e-06, "loss": 42.6756, "step": 3638 }, { "epoch": 86.64477611940299, "grad_norm": 31.41309928894043, "learning_rate": 9.280423280423281e-06, "loss": 43.3258, "step": 3639 }, { "epoch": 86.66865671641791, "grad_norm": 27.432342529296875, "learning_rate": 9.277777777777778e-06, "loss": 43.0612, "step": 3640 }, { "epoch": 86.69253731343284, "grad_norm": 25.92644691467285, "learning_rate": 9.275132275132276e-06, "loss": 41.7141, "step": 3641 }, { "epoch": 86.71641791044776, "grad_norm": 27.26793098449707, "learning_rate": 9.272486772486773e-06, "loss": 42.8127, "step": 3642 }, { "epoch": 86.74029850746268, "grad_norm": 22.45132827758789, "learning_rate": 9.26984126984127e-06, "loss": 43.4623, "step": 3643 }, { "epoch": 86.7641791044776, "grad_norm": 29.31770896911621, "learning_rate": 9.267195767195768e-06, "loss": 43.428, "step": 3644 }, { "epoch": 86.78805970149254, "grad_norm": 26.000110626220703, "learning_rate": 9.264550264550264e-06, "loss": 43.7773, "step": 3645 }, { "epoch": 86.81194029850747, "grad_norm": 30.51299476623535, "learning_rate": 9.261904761904763e-06, "loss": 43.2915, "step": 3646 }, { "epoch": 86.83582089552239, "grad_norm": 25.712812423706055, "learning_rate": 9.25925925925926e-06, "loss": 42.6203, "step": 3647 }, { "epoch": 86.85970149253731, "grad_norm": 32.85362243652344, "learning_rate": 9.256613756613757e-06, "loss": 42.1768, "step": 3648 }, { "epoch": 86.88358208955223, "grad_norm": 30.07919692993164, "learning_rate": 9.253968253968256e-06, "loss": 42.4139, "step": 3649 }, { "epoch": 86.90746268656716, "grad_norm": 31.38039779663086, "learning_rate": 9.251322751322752e-06, "loss": 42.23, "step": 3650 }, { "epoch": 86.9313432835821, "grad_norm": 26.601993560791016, "learning_rate": 9.248677248677249e-06, "loss": 42.2522, "step": 3651 }, { "epoch": 86.95522388059702, "grad_norm": 31.616823196411133, "learning_rate": 9.246031746031747e-06, "loss": 43.3183, "step": 3652 }, { "epoch": 86.97910447761194, "grad_norm": 25.606231689453125, "learning_rate": 9.243386243386244e-06, "loss": 42.7862, "step": 3653 }, { "epoch": 87.0, "grad_norm": 22.20980453491211, "learning_rate": 9.240740740740742e-06, "loss": 37.7077, "step": 3654 }, { "epoch": 87.02388059701492, "grad_norm": 24.519224166870117, "learning_rate": 9.238095238095239e-06, "loss": 42.4255, "step": 3655 }, { "epoch": 87.04776119402985, "grad_norm": 27.409582138061523, "learning_rate": 9.235449735449737e-06, "loss": 42.0198, "step": 3656 }, { "epoch": 87.07164179104478, "grad_norm": 20.307886123657227, "learning_rate": 9.232804232804234e-06, "loss": 41.6037, "step": 3657 }, { "epoch": 87.0955223880597, "grad_norm": 24.046375274658203, "learning_rate": 9.230158730158732e-06, "loss": 43.9297, "step": 3658 }, { "epoch": 87.11940298507463, "grad_norm": 23.58251953125, "learning_rate": 9.227513227513229e-06, "loss": 43.4211, "step": 3659 }, { "epoch": 87.14328358208955, "grad_norm": 20.67659568786621, "learning_rate": 9.224867724867725e-06, "loss": 42.7205, "step": 3660 }, { "epoch": 87.16716417910447, "grad_norm": 18.82547950744629, "learning_rate": 9.222222222222224e-06, "loss": 42.8921, "step": 3661 }, { "epoch": 87.1910447761194, "grad_norm": 21.20027732849121, "learning_rate": 9.21957671957672e-06, "loss": 41.0809, "step": 3662 }, { "epoch": 87.21492537313434, "grad_norm": 20.002410888671875, "learning_rate": 9.216931216931217e-06, "loss": 42.0559, "step": 3663 }, { "epoch": 87.23880597014926, "grad_norm": 16.792434692382812, "learning_rate": 9.214285714285715e-06, "loss": 40.659, "step": 3664 }, { "epoch": 87.26268656716418, "grad_norm": 20.209190368652344, "learning_rate": 9.211640211640212e-06, "loss": 42.387, "step": 3665 }, { "epoch": 87.2865671641791, "grad_norm": 17.87749481201172, "learning_rate": 9.20899470899471e-06, "loss": 41.6863, "step": 3666 }, { "epoch": 87.31044776119403, "grad_norm": 16.422809600830078, "learning_rate": 9.206349206349207e-06, "loss": 43.5165, "step": 3667 }, { "epoch": 87.33432835820895, "grad_norm": 17.762025833129883, "learning_rate": 9.203703703703705e-06, "loss": 41.3489, "step": 3668 }, { "epoch": 87.35820895522389, "grad_norm": 18.185434341430664, "learning_rate": 9.201058201058202e-06, "loss": 42.9896, "step": 3669 }, { "epoch": 87.38208955223881, "grad_norm": 15.573823928833008, "learning_rate": 9.198412698412698e-06, "loss": 42.5428, "step": 3670 }, { "epoch": 87.40597014925373, "grad_norm": 21.007041931152344, "learning_rate": 9.195767195767197e-06, "loss": 41.6825, "step": 3671 }, { "epoch": 87.42985074626866, "grad_norm": 21.610292434692383, "learning_rate": 9.193121693121693e-06, "loss": 42.8643, "step": 3672 }, { "epoch": 87.45373134328358, "grad_norm": 16.124156951904297, "learning_rate": 9.19047619047619e-06, "loss": 42.5377, "step": 3673 }, { "epoch": 87.4776119402985, "grad_norm": 22.14504051208496, "learning_rate": 9.187830687830688e-06, "loss": 42.878, "step": 3674 }, { "epoch": 87.50149253731344, "grad_norm": 17.33942222595215, "learning_rate": 9.185185185185186e-06, "loss": 44.3817, "step": 3675 }, { "epoch": 87.52537313432836, "grad_norm": 21.361644744873047, "learning_rate": 9.182539682539683e-06, "loss": 42.913, "step": 3676 }, { "epoch": 87.54925373134328, "grad_norm": 18.6135196685791, "learning_rate": 9.17989417989418e-06, "loss": 42.8328, "step": 3677 }, { "epoch": 87.57313432835821, "grad_norm": 23.618101119995117, "learning_rate": 9.177248677248678e-06, "loss": 42.4581, "step": 3678 }, { "epoch": 87.59701492537313, "grad_norm": 18.788637161254883, "learning_rate": 9.174603174603176e-06, "loss": 43.5344, "step": 3679 }, { "epoch": 87.62089552238805, "grad_norm": 17.69763946533203, "learning_rate": 9.171957671957673e-06, "loss": 42.8437, "step": 3680 }, { "epoch": 87.64477611940299, "grad_norm": 19.06989097595215, "learning_rate": 9.16931216931217e-06, "loss": 42.3788, "step": 3681 }, { "epoch": 87.66865671641791, "grad_norm": 18.462968826293945, "learning_rate": 9.166666666666666e-06, "loss": 42.759, "step": 3682 }, { "epoch": 87.69253731343284, "grad_norm": 21.524621963500977, "learning_rate": 9.164021164021166e-06, "loss": 43.1027, "step": 3683 }, { "epoch": 87.71641791044776, "grad_norm": 18.747453689575195, "learning_rate": 9.161375661375661e-06, "loss": 43.0803, "step": 3684 }, { "epoch": 87.74029850746268, "grad_norm": 21.170255661010742, "learning_rate": 9.158730158730159e-06, "loss": 42.641, "step": 3685 }, { "epoch": 87.7641791044776, "grad_norm": 19.89739990234375, "learning_rate": 9.156084656084656e-06, "loss": 42.5469, "step": 3686 }, { "epoch": 87.78805970149254, "grad_norm": 22.9807071685791, "learning_rate": 9.153439153439154e-06, "loss": 42.5137, "step": 3687 }, { "epoch": 87.81194029850747, "grad_norm": 19.036230087280273, "learning_rate": 9.150793650793651e-06, "loss": 42.8328, "step": 3688 }, { "epoch": 87.83582089552239, "grad_norm": 23.97933006286621, "learning_rate": 9.148148148148149e-06, "loss": 42.9784, "step": 3689 }, { "epoch": 87.85970149253731, "grad_norm": 18.06254768371582, "learning_rate": 9.145502645502646e-06, "loss": 41.7068, "step": 3690 }, { "epoch": 87.88358208955223, "grad_norm": 19.88326072692871, "learning_rate": 9.142857142857144e-06, "loss": 43.8, "step": 3691 }, { "epoch": 87.90746268656716, "grad_norm": 20.145050048828125, "learning_rate": 9.140211640211641e-06, "loss": 43.2459, "step": 3692 }, { "epoch": 87.9313432835821, "grad_norm": 16.824399948120117, "learning_rate": 9.137566137566139e-06, "loss": 42.4406, "step": 3693 }, { "epoch": 87.95522388059702, "grad_norm": 20.99275779724121, "learning_rate": 9.134920634920635e-06, "loss": 42.2506, "step": 3694 }, { "epoch": 87.97910447761194, "grad_norm": 23.64455223083496, "learning_rate": 9.132275132275134e-06, "loss": 43.1451, "step": 3695 }, { "epoch": 88.0, "grad_norm": 17.736629486083984, "learning_rate": 9.12962962962963e-06, "loss": 36.9082, "step": 3696 }, { "epoch": 88.02388059701492, "grad_norm": 20.58110809326172, "learning_rate": 9.126984126984127e-06, "loss": 41.6838, "step": 3697 }, { "epoch": 88.04776119402985, "grad_norm": 21.2742977142334, "learning_rate": 9.124338624338626e-06, "loss": 43.8259, "step": 3698 }, { "epoch": 88.07164179104478, "grad_norm": 18.40839958190918, "learning_rate": 9.121693121693122e-06, "loss": 41.6561, "step": 3699 }, { "epoch": 88.0955223880597, "grad_norm": 25.24982261657715, "learning_rate": 9.11904761904762e-06, "loss": 43.4407, "step": 3700 }, { "epoch": 88.11940298507463, "grad_norm": 16.522397994995117, "learning_rate": 9.116402116402117e-06, "loss": 42.3175, "step": 3701 }, { "epoch": 88.14328358208955, "grad_norm": 23.80354881286621, "learning_rate": 9.113756613756615e-06, "loss": 41.8656, "step": 3702 }, { "epoch": 88.16716417910447, "grad_norm": 17.915058135986328, "learning_rate": 9.111111111111112e-06, "loss": 43.4793, "step": 3703 }, { "epoch": 88.1910447761194, "grad_norm": 24.271337509155273, "learning_rate": 9.108465608465608e-06, "loss": 42.3917, "step": 3704 }, { "epoch": 88.21492537313434, "grad_norm": 21.696147918701172, "learning_rate": 9.105820105820107e-06, "loss": 42.3141, "step": 3705 }, { "epoch": 88.23880597014926, "grad_norm": 23.576507568359375, "learning_rate": 9.103174603174603e-06, "loss": 42.9454, "step": 3706 }, { "epoch": 88.26268656716418, "grad_norm": 25.030128479003906, "learning_rate": 9.1005291005291e-06, "loss": 42.8441, "step": 3707 }, { "epoch": 88.2865671641791, "grad_norm": 21.148405075073242, "learning_rate": 9.0978835978836e-06, "loss": 43.0314, "step": 3708 }, { "epoch": 88.31044776119403, "grad_norm": 25.8000431060791, "learning_rate": 9.095238095238095e-06, "loss": 42.5864, "step": 3709 }, { "epoch": 88.33432835820895, "grad_norm": 15.713743209838867, "learning_rate": 9.092592592592593e-06, "loss": 42.8121, "step": 3710 }, { "epoch": 88.35820895522389, "grad_norm": 23.208627700805664, "learning_rate": 9.08994708994709e-06, "loss": 42.9846, "step": 3711 }, { "epoch": 88.38208955223881, "grad_norm": 17.478639602661133, "learning_rate": 9.087301587301588e-06, "loss": 42.2004, "step": 3712 }, { "epoch": 88.40597014925373, "grad_norm": 21.487903594970703, "learning_rate": 9.084656084656085e-06, "loss": 41.7275, "step": 3713 }, { "epoch": 88.42985074626866, "grad_norm": 27.780941009521484, "learning_rate": 9.082010582010583e-06, "loss": 42.1269, "step": 3714 }, { "epoch": 88.45373134328358, "grad_norm": 14.19015884399414, "learning_rate": 9.07936507936508e-06, "loss": 43.512, "step": 3715 }, { "epoch": 88.4776119402985, "grad_norm": 27.63198471069336, "learning_rate": 9.076719576719576e-06, "loss": 42.4196, "step": 3716 }, { "epoch": 88.50149253731344, "grad_norm": 21.5277099609375, "learning_rate": 9.074074074074075e-06, "loss": 41.8393, "step": 3717 }, { "epoch": 88.52537313432836, "grad_norm": 20.19924545288086, "learning_rate": 9.071428571428573e-06, "loss": 41.6486, "step": 3718 }, { "epoch": 88.54925373134328, "grad_norm": 22.75286865234375, "learning_rate": 9.068783068783069e-06, "loss": 43.3116, "step": 3719 }, { "epoch": 88.57313432835821, "grad_norm": 16.763381958007812, "learning_rate": 9.066137566137568e-06, "loss": 43.0704, "step": 3720 }, { "epoch": 88.59701492537313, "grad_norm": 23.842023849487305, "learning_rate": 9.063492063492064e-06, "loss": 43.7468, "step": 3721 }, { "epoch": 88.62089552238805, "grad_norm": 20.88597297668457, "learning_rate": 9.060846560846561e-06, "loss": 42.2398, "step": 3722 }, { "epoch": 88.64477611940299, "grad_norm": 19.333271026611328, "learning_rate": 9.058201058201059e-06, "loss": 41.7667, "step": 3723 }, { "epoch": 88.66865671641791, "grad_norm": 22.313888549804688, "learning_rate": 9.055555555555556e-06, "loss": 42.3198, "step": 3724 }, { "epoch": 88.69253731343284, "grad_norm": 20.26089096069336, "learning_rate": 9.052910052910054e-06, "loss": 42.9191, "step": 3725 }, { "epoch": 88.71641791044776, "grad_norm": 17.900373458862305, "learning_rate": 9.050264550264551e-06, "loss": 41.7498, "step": 3726 }, { "epoch": 88.74029850746268, "grad_norm": 22.735700607299805, "learning_rate": 9.047619047619049e-06, "loss": 41.4744, "step": 3727 }, { "epoch": 88.7641791044776, "grad_norm": 22.933048248291016, "learning_rate": 9.044973544973546e-06, "loss": 43.4595, "step": 3728 }, { "epoch": 88.78805970149254, "grad_norm": 15.648778915405273, "learning_rate": 9.042328042328044e-06, "loss": 43.4811, "step": 3729 }, { "epoch": 88.81194029850747, "grad_norm": 35.44391632080078, "learning_rate": 9.039682539682541e-06, "loss": 42.0879, "step": 3730 }, { "epoch": 88.83582089552239, "grad_norm": 26.575231552124023, "learning_rate": 9.037037037037037e-06, "loss": 41.6883, "step": 3731 }, { "epoch": 88.85970149253731, "grad_norm": 33.38102340698242, "learning_rate": 9.034391534391536e-06, "loss": 43.2903, "step": 3732 }, { "epoch": 88.88358208955223, "grad_norm": 26.297910690307617, "learning_rate": 9.031746031746032e-06, "loss": 42.744, "step": 3733 }, { "epoch": 88.90746268656716, "grad_norm": 25.057889938354492, "learning_rate": 9.02910052910053e-06, "loss": 42.076, "step": 3734 }, { "epoch": 88.9313432835821, "grad_norm": 21.162078857421875, "learning_rate": 9.026455026455027e-06, "loss": 42.397, "step": 3735 }, { "epoch": 88.95522388059702, "grad_norm": 21.846647262573242, "learning_rate": 9.023809523809524e-06, "loss": 42.7379, "step": 3736 }, { "epoch": 88.97910447761194, "grad_norm": 19.74768829345703, "learning_rate": 9.021164021164022e-06, "loss": 42.0906, "step": 3737 }, { "epoch": 89.0, "grad_norm": 18.839765548706055, "learning_rate": 9.01851851851852e-06, "loss": 37.881, "step": 3738 }, { "epoch": 89.02388059701492, "grad_norm": 22.15633201599121, "learning_rate": 9.015873015873017e-06, "loss": 42.5544, "step": 3739 }, { "epoch": 89.04776119402985, "grad_norm": 18.709840774536133, "learning_rate": 9.013227513227514e-06, "loss": 43.2949, "step": 3740 }, { "epoch": 89.07164179104478, "grad_norm": 22.922399520874023, "learning_rate": 9.010582010582012e-06, "loss": 41.9215, "step": 3741 }, { "epoch": 89.0955223880597, "grad_norm": 18.445695877075195, "learning_rate": 9.00793650793651e-06, "loss": 42.994, "step": 3742 }, { "epoch": 89.11940298507463, "grad_norm": 22.694503784179688, "learning_rate": 9.005291005291005e-06, "loss": 42.4024, "step": 3743 }, { "epoch": 89.14328358208955, "grad_norm": 23.259532928466797, "learning_rate": 9.002645502645503e-06, "loss": 41.9366, "step": 3744 }, { "epoch": 89.16716417910447, "grad_norm": 24.131465911865234, "learning_rate": 9e-06, "loss": 42.9172, "step": 3745 }, { "epoch": 89.1910447761194, "grad_norm": 21.01772117614746, "learning_rate": 8.997354497354498e-06, "loss": 42.0505, "step": 3746 }, { "epoch": 89.21492537313434, "grad_norm": 20.675086975097656, "learning_rate": 8.994708994708995e-06, "loss": 42.7076, "step": 3747 }, { "epoch": 89.23880597014926, "grad_norm": 22.289649963378906, "learning_rate": 8.992063492063493e-06, "loss": 42.4533, "step": 3748 }, { "epoch": 89.26268656716418, "grad_norm": 22.76655387878418, "learning_rate": 8.98941798941799e-06, "loss": 42.0269, "step": 3749 }, { "epoch": 89.2865671641791, "grad_norm": 19.732887268066406, "learning_rate": 8.986772486772488e-06, "loss": 44.2783, "step": 3750 }, { "epoch": 89.31044776119403, "grad_norm": 22.45815658569336, "learning_rate": 8.984126984126985e-06, "loss": 40.1901, "step": 3751 }, { "epoch": 89.33432835820895, "grad_norm": 24.511625289916992, "learning_rate": 8.981481481481483e-06, "loss": 43.0842, "step": 3752 }, { "epoch": 89.35820895522389, "grad_norm": 19.739845275878906, "learning_rate": 8.978835978835979e-06, "loss": 43.7219, "step": 3753 }, { "epoch": 89.38208955223881, "grad_norm": 26.18813133239746, "learning_rate": 8.976190476190478e-06, "loss": 43.5427, "step": 3754 }, { "epoch": 89.40597014925373, "grad_norm": 21.95644760131836, "learning_rate": 8.973544973544973e-06, "loss": 42.9161, "step": 3755 }, { "epoch": 89.42985074626866, "grad_norm": 22.270849227905273, "learning_rate": 8.970899470899471e-06, "loss": 42.6121, "step": 3756 }, { "epoch": 89.45373134328358, "grad_norm": 18.48128318786621, "learning_rate": 8.968253968253968e-06, "loss": 42.044, "step": 3757 }, { "epoch": 89.4776119402985, "grad_norm": 22.865985870361328, "learning_rate": 8.965608465608466e-06, "loss": 42.1096, "step": 3758 }, { "epoch": 89.50149253731344, "grad_norm": 19.26102066040039, "learning_rate": 8.962962962962963e-06, "loss": 42.5147, "step": 3759 }, { "epoch": 89.52537313432836, "grad_norm": 27.352407455444336, "learning_rate": 8.960317460317461e-06, "loss": 41.7614, "step": 3760 }, { "epoch": 89.54925373134328, "grad_norm": 21.059770584106445, "learning_rate": 8.957671957671958e-06, "loss": 41.5053, "step": 3761 }, { "epoch": 89.57313432835821, "grad_norm": 23.909198760986328, "learning_rate": 8.955026455026456e-06, "loss": 43.4126, "step": 3762 }, { "epoch": 89.59701492537313, "grad_norm": 28.529970169067383, "learning_rate": 8.952380952380953e-06, "loss": 43.489, "step": 3763 }, { "epoch": 89.62089552238805, "grad_norm": 22.008472442626953, "learning_rate": 8.949735449735451e-06, "loss": 42.5781, "step": 3764 }, { "epoch": 89.64477611940299, "grad_norm": NaN, "learning_rate": 8.947089947089947e-06, "loss": 37.0211, "step": 3765 }, { "epoch": 89.66865671641791, "grad_norm": 29.881391525268555, "learning_rate": 8.947089947089947e-06, "loss": 42.6102, "step": 3766 }, { "epoch": 89.69253731343284, "grad_norm": 24.919992446899414, "learning_rate": 8.944444444444446e-06, "loss": 42.7878, "step": 3767 }, { "epoch": 89.71641791044776, "grad_norm": 29.473249435424805, "learning_rate": 8.941798941798942e-06, "loss": 41.9105, "step": 3768 }, { "epoch": 89.74029850746268, "grad_norm": 20.71428871154785, "learning_rate": 8.93915343915344e-06, "loss": 42.0715, "step": 3769 }, { "epoch": 89.7641791044776, "grad_norm": 29.31629180908203, "learning_rate": 8.936507936507938e-06, "loss": 41.3888, "step": 3770 }, { "epoch": 89.78805970149254, "grad_norm": 22.29326057434082, "learning_rate": 8.933862433862434e-06, "loss": 43.0029, "step": 3771 }, { "epoch": 89.81194029850747, "grad_norm": NaN, "learning_rate": 8.931216931216932e-06, "loss": 49.4483, "step": 3772 }, { "epoch": 89.83582089552239, "grad_norm": 23.31702423095703, "learning_rate": 8.931216931216932e-06, "loss": 42.8926, "step": 3773 }, { "epoch": 89.85970149253731, "grad_norm": 26.894012451171875, "learning_rate": 8.92857142857143e-06, "loss": 41.3476, "step": 3774 }, { "epoch": 89.88358208955223, "grad_norm": 19.226701736450195, "learning_rate": 8.925925925925927e-06, "loss": 42.9396, "step": 3775 }, { "epoch": 89.90746268656716, "grad_norm": 26.918243408203125, "learning_rate": 8.923280423280424e-06, "loss": 41.7109, "step": 3776 }, { "epoch": 89.9313432835821, "grad_norm": 22.435697555541992, "learning_rate": 8.920634920634922e-06, "loss": 42.5026, "step": 3777 }, { "epoch": 89.95522388059702, "grad_norm": 19.455547332763672, "learning_rate": 8.91798941798942e-06, "loss": 42.4964, "step": 3778 }, { "epoch": 89.97910447761194, "grad_norm": 24.792171478271484, "learning_rate": 8.915343915343915e-06, "loss": 41.6366, "step": 3779 }, { "epoch": 90.0, "grad_norm": 14.4516019821167, "learning_rate": 8.912698412698414e-06, "loss": 36.7873, "step": 3780 }, { "epoch": 90.0, "step": 3780, "total_flos": 1.857999472723437e+17, "train_loss": 4.747417533713043, "train_runtime": 12850.2933, "train_samples_per_second": 37.484, "train_steps_per_second": 0.294 }, { "epoch": 90.02388059701492, "grad_norm": 26.03937339782715, "learning_rate": 1e-05, "loss": 42.4337, "step": 3781 }, { "epoch": 90.04776119402985, "grad_norm": Infinity, "learning_rate": 9.997835497835499e-06, "loss": 51.1491, "step": 3782 }, { "epoch": 90.07164179104478, "grad_norm": Infinity, "learning_rate": 9.997835497835499e-06, "loss": 53.2051, "step": 3783 }, { "epoch": 90.0955223880597, "grad_norm": 446.1357421875, "learning_rate": 9.997835497835499e-06, "loss": 51.5745, "step": 3784 }, { "epoch": 90.11940298507463, "grad_norm": 229.35903930664062, "learning_rate": 9.995670995670996e-06, "loss": 49.5899, "step": 3785 }, { "epoch": 90.14328358208955, "grad_norm": 109.18777465820312, "learning_rate": 9.993506493506494e-06, "loss": 45.889, "step": 3786 }, { "epoch": 90.16716417910447, "grad_norm": 79.82958221435547, "learning_rate": 9.991341991341992e-06, "loss": 44.3638, "step": 3787 }, { "epoch": 90.1910447761194, "grad_norm": 69.46668243408203, "learning_rate": 9.98917748917749e-06, "loss": 43.6641, "step": 3788 }, { "epoch": 90.21492537313434, "grad_norm": 56.4055290222168, "learning_rate": 9.987012987012988e-06, "loss": 45.0336, "step": 3789 }, { "epoch": 90.23880597014926, "grad_norm": 53.48906326293945, "learning_rate": 9.984848484848485e-06, "loss": 42.9807, "step": 3790 }, { "epoch": 90.26268656716418, "grad_norm": 38.25556564331055, "learning_rate": 9.982683982683983e-06, "loss": 44.1306, "step": 3791 }, { "epoch": 90.2865671641791, "grad_norm": 41.42750549316406, "learning_rate": 9.980519480519481e-06, "loss": 42.1205, "step": 3792 }, { "epoch": 90.31044776119403, "grad_norm": 34.52850341796875, "learning_rate": 9.978354978354979e-06, "loss": 43.3744, "step": 3793 }, { "epoch": 90.33432835820895, "grad_norm": 28.61484146118164, "learning_rate": 9.976190476190477e-06, "loss": 43.487, "step": 3794 }, { "epoch": 90.35820895522389, "grad_norm": 27.961273193359375, "learning_rate": 9.974025974025974e-06, "loss": 43.9663, "step": 3795 }, { "epoch": 90.38208955223881, "grad_norm": 27.92458152770996, "learning_rate": 9.971861471861472e-06, "loss": 43.2716, "step": 3796 }, { "epoch": 90.40597014925373, "grad_norm": 21.93165397644043, "learning_rate": 9.96969696969697e-06, "loss": 43.3704, "step": 3797 }, { "epoch": 90.42985074626866, "grad_norm": 27.053754806518555, "learning_rate": 9.967532467532468e-06, "loss": 42.7038, "step": 3798 }, { "epoch": 90.45373134328358, "grad_norm": 31.030607223510742, "learning_rate": 9.965367965367966e-06, "loss": 43.1343, "step": 3799 }, { "epoch": 90.4776119402985, "grad_norm": 24.048316955566406, "learning_rate": 9.963203463203463e-06, "loss": 42.1113, "step": 3800 }, { "epoch": 90.50149253731344, "grad_norm": 17.98249053955078, "learning_rate": 9.961038961038963e-06, "loss": 42.6117, "step": 3801 }, { "epoch": 90.52537313432836, "grad_norm": 20.080669403076172, "learning_rate": 9.95887445887446e-06, "loss": 42.4281, "step": 3802 }, { "epoch": 90.54925373134328, "grad_norm": 19.842525482177734, "learning_rate": 9.956709956709958e-06, "loss": 40.8022, "step": 3803 }, { "epoch": 90.57313432835821, "grad_norm": 20.453306198120117, "learning_rate": 9.954545454545456e-06, "loss": 42.8288, "step": 3804 }, { "epoch": 90.59701492537313, "grad_norm": 19.955123901367188, "learning_rate": 9.952380952380954e-06, "loss": 40.2546, "step": 3805 }, { "epoch": 90.62089552238805, "grad_norm": 17.246713638305664, "learning_rate": 9.950216450216452e-06, "loss": 42.0433, "step": 3806 }, { "epoch": 90.64477611940299, "grad_norm": 20.76253890991211, "learning_rate": 9.94805194805195e-06, "loss": 42.7741, "step": 3807 }, { "epoch": 90.66865671641791, "grad_norm": 21.001201629638672, "learning_rate": 9.945887445887446e-06, "loss": 43.6741, "step": 3808 }, { "epoch": 90.69253731343284, "grad_norm": 20.765684127807617, "learning_rate": 9.943722943722944e-06, "loss": 41.8182, "step": 3809 }, { "epoch": 90.71641791044776, "grad_norm": 16.794981002807617, "learning_rate": 9.941558441558441e-06, "loss": 42.6478, "step": 3810 }, { "epoch": 90.74029850746268, "grad_norm": 23.377695083618164, "learning_rate": 9.939393939393939e-06, "loss": 42.0878, "step": 3811 }, { "epoch": 90.7641791044776, "grad_norm": 23.543071746826172, "learning_rate": 9.937229437229437e-06, "loss": 42.4977, "step": 3812 }, { "epoch": 90.78805970149254, "grad_norm": 18.546525955200195, "learning_rate": 9.935064935064936e-06, "loss": 42.4457, "step": 3813 }, { "epoch": 90.81194029850747, "grad_norm": 25.244186401367188, "learning_rate": 9.932900432900434e-06, "loss": 42.4906, "step": 3814 }, { "epoch": 90.83582089552239, "grad_norm": 21.267963409423828, "learning_rate": 9.930735930735932e-06, "loss": 41.7433, "step": 3815 }, { "epoch": 90.85970149253731, "grad_norm": 19.291160583496094, "learning_rate": 9.92857142857143e-06, "loss": 41.7054, "step": 3816 }, { "epoch": 90.88358208955223, "grad_norm": 21.301227569580078, "learning_rate": 9.926406926406928e-06, "loss": 42.5566, "step": 3817 }, { "epoch": 90.90746268656716, "grad_norm": 19.511821746826172, "learning_rate": 9.924242424242425e-06, "loss": 41.5064, "step": 3818 }, { "epoch": 90.9313432835821, "grad_norm": 18.419504165649414, "learning_rate": 9.922077922077923e-06, "loss": 41.4675, "step": 3819 }, { "epoch": 90.95522388059702, "grad_norm": 19.577409744262695, "learning_rate": 9.919913419913421e-06, "loss": 43.4705, "step": 3820 }, { "epoch": 90.97910447761194, "grad_norm": 23.015262603759766, "learning_rate": 9.917748917748919e-06, "loss": 42.0356, "step": 3821 }, { "epoch": 91.0, "grad_norm": 17.785385131835938, "learning_rate": 9.915584415584417e-06, "loss": 37.6509, "step": 3822 }, { "epoch": 91.02388059701492, "grad_norm": 16.111051559448242, "learning_rate": 9.913419913419914e-06, "loss": 41.7977, "step": 3823 }, { "epoch": 91.04776119402985, "grad_norm": 22.09601593017578, "learning_rate": 9.911255411255412e-06, "loss": 42.5569, "step": 3824 }, { "epoch": 91.07164179104478, "grad_norm": 18.80573081970215, "learning_rate": 9.90909090909091e-06, "loss": 41.773, "step": 3825 }, { "epoch": 91.0955223880597, "grad_norm": 14.442939758300781, "learning_rate": 9.906926406926408e-06, "loss": 42.0426, "step": 3826 }, { "epoch": 91.11940298507463, "grad_norm": 21.839468002319336, "learning_rate": 9.904761904761906e-06, "loss": 41.9993, "step": 3827 }, { "epoch": 91.14328358208955, "grad_norm": 17.792217254638672, "learning_rate": 9.902597402597403e-06, "loss": 42.1515, "step": 3828 }, { "epoch": 91.16716417910447, "grad_norm": 15.722336769104004, "learning_rate": 9.900432900432901e-06, "loss": 42.2694, "step": 3829 }, { "epoch": 91.1910447761194, "grad_norm": 20.94297218322754, "learning_rate": 9.898268398268399e-06, "loss": 42.7043, "step": 3830 }, { "epoch": 91.21492537313434, "grad_norm": 16.2196044921875, "learning_rate": 9.896103896103897e-06, "loss": 42.4405, "step": 3831 }, { "epoch": 91.23880597014926, "grad_norm": 20.381193161010742, "learning_rate": 9.893939393939395e-06, "loss": 43.424, "step": 3832 }, { "epoch": 91.26268656716418, "grad_norm": 14.948447227478027, "learning_rate": 9.891774891774892e-06, "loss": 42.7289, "step": 3833 }, { "epoch": 91.2865671641791, "grad_norm": 17.548126220703125, "learning_rate": 9.88961038961039e-06, "loss": 41.9656, "step": 3834 }, { "epoch": 91.31044776119403, "grad_norm": 20.301937103271484, "learning_rate": 9.887445887445888e-06, "loss": 42.9516, "step": 3835 }, { "epoch": 91.33432835820895, "grad_norm": 18.3472900390625, "learning_rate": 9.885281385281386e-06, "loss": 42.281, "step": 3836 }, { "epoch": 91.35820895522389, "grad_norm": 15.503434181213379, "learning_rate": 9.883116883116885e-06, "loss": 42.502, "step": 3837 }, { "epoch": 91.38208955223881, "grad_norm": 21.448226928710938, "learning_rate": 9.880952380952381e-06, "loss": 43.0384, "step": 3838 }, { "epoch": 91.40597014925373, "grad_norm": 16.685815811157227, "learning_rate": 9.87878787878788e-06, "loss": 41.798, "step": 3839 }, { "epoch": 91.42985074626866, "grad_norm": 18.722484588623047, "learning_rate": 9.876623376623377e-06, "loss": 43.4082, "step": 3840 }, { "epoch": 91.45373134328358, "grad_norm": 19.54647445678711, "learning_rate": 9.874458874458875e-06, "loss": 42.2679, "step": 3841 }, { "epoch": 91.4776119402985, "grad_norm": 18.793495178222656, "learning_rate": 9.872294372294373e-06, "loss": 42.2962, "step": 3842 }, { "epoch": 91.50149253731344, "grad_norm": 16.687400817871094, "learning_rate": 9.87012987012987e-06, "loss": 44.2949, "step": 3843 }, { "epoch": 91.52537313432836, "grad_norm": 16.13211441040039, "learning_rate": 9.867965367965368e-06, "loss": 42.602, "step": 3844 }, { "epoch": 91.54925373134328, "grad_norm": 16.72748565673828, "learning_rate": 9.865800865800866e-06, "loss": 42.3636, "step": 3845 }, { "epoch": 91.57313432835821, "grad_norm": 22.206905364990234, "learning_rate": 9.863636363636364e-06, "loss": 43.1925, "step": 3846 }, { "epoch": 91.59701492537313, "grad_norm": 19.21588134765625, "learning_rate": 9.861471861471862e-06, "loss": 43.1342, "step": 3847 }, { "epoch": 91.62089552238805, "grad_norm": 19.708059310913086, "learning_rate": 9.85930735930736e-06, "loss": 42.7964, "step": 3848 }, { "epoch": 91.64477611940299, "grad_norm": 22.789594650268555, "learning_rate": 9.857142857142859e-06, "loss": 42.7767, "step": 3849 }, { "epoch": 91.66865671641791, "grad_norm": 17.048229217529297, "learning_rate": 9.854978354978357e-06, "loss": 42.6642, "step": 3850 }, { "epoch": 91.69253731343284, "grad_norm": 21.39427375793457, "learning_rate": 9.852813852813854e-06, "loss": 42.8962, "step": 3851 }, { "epoch": 91.71641791044776, "grad_norm": 25.67850112915039, "learning_rate": 9.850649350649352e-06, "loss": 42.4072, "step": 3852 }, { "epoch": 91.74029850746268, "grad_norm": 20.17367935180664, "learning_rate": 9.84848484848485e-06, "loss": 42.3302, "step": 3853 }, { "epoch": 91.7641791044776, "grad_norm": 16.018030166625977, "learning_rate": 9.846320346320348e-06, "loss": 42.6877, "step": 3854 }, { "epoch": 91.78805970149254, "grad_norm": 18.5965576171875, "learning_rate": 9.844155844155846e-06, "loss": 41.4104, "step": 3855 }, { "epoch": 91.81194029850747, "grad_norm": 17.651378631591797, "learning_rate": 9.841991341991343e-06, "loss": 42.1591, "step": 3856 }, { "epoch": 91.83582089552239, "grad_norm": 15.912792205810547, "learning_rate": 9.839826839826841e-06, "loss": 41.0675, "step": 3857 }, { "epoch": 91.85970149253731, "grad_norm": 20.338071823120117, "learning_rate": 9.837662337662337e-06, "loss": 43.0971, "step": 3858 }, { "epoch": 91.88358208955223, "grad_norm": 19.422807693481445, "learning_rate": 9.835497835497835e-06, "loss": 41.022, "step": 3859 }, { "epoch": 91.90746268656716, "grad_norm": 18.216012954711914, "learning_rate": 9.833333333333333e-06, "loss": 42.0068, "step": 3860 }, { "epoch": 91.9313432835821, "grad_norm": 17.68181610107422, "learning_rate": 9.831168831168832e-06, "loss": 42.778, "step": 3861 }, { "epoch": 91.95522388059702, "grad_norm": 20.660480499267578, "learning_rate": 9.82900432900433e-06, "loss": 42.8923, "step": 3862 }, { "epoch": 91.97910447761194, "grad_norm": 22.78632926940918, "learning_rate": 9.826839826839828e-06, "loss": 41.5412, "step": 3863 }, { "epoch": 92.0, "grad_norm": 17.660106658935547, "learning_rate": 9.824675324675326e-06, "loss": 36.8816, "step": 3864 }, { "epoch": 92.02388059701492, "grad_norm": 19.257198333740234, "learning_rate": 9.822510822510824e-06, "loss": 41.3789, "step": 3865 }, { "epoch": 92.04776119402985, "grad_norm": 17.690038681030273, "learning_rate": 9.820346320346321e-06, "loss": 41.8596, "step": 3866 }, { "epoch": 92.07164179104478, "grad_norm": 25.88194465637207, "learning_rate": 9.81818181818182e-06, "loss": 42.1967, "step": 3867 }, { "epoch": 92.0955223880597, "grad_norm": 18.971637725830078, "learning_rate": 9.816017316017317e-06, "loss": 41.5025, "step": 3868 }, { "epoch": 92.11940298507463, "grad_norm": 18.14025115966797, "learning_rate": 9.813852813852815e-06, "loss": 42.7121, "step": 3869 }, { "epoch": 92.14328358208955, "grad_norm": 24.20391845703125, "learning_rate": 9.811688311688313e-06, "loss": 42.9952, "step": 3870 }, { "epoch": 92.16716417910447, "grad_norm": 18.484018325805664, "learning_rate": 9.80952380952381e-06, "loss": 44.174, "step": 3871 }, { "epoch": 92.1910447761194, "grad_norm": 24.238615036010742, "learning_rate": 9.807359307359308e-06, "loss": 42.933, "step": 3872 }, { "epoch": 92.21492537313434, "grad_norm": 21.95537757873535, "learning_rate": 9.805194805194806e-06, "loss": 42.5797, "step": 3873 }, { "epoch": 92.23880597014926, "grad_norm": 16.300167083740234, "learning_rate": 9.803030303030304e-06, "loss": 41.8871, "step": 3874 }, { "epoch": 92.26268656716418, "grad_norm": 31.398351669311523, "learning_rate": 9.800865800865802e-06, "loss": 42.8308, "step": 3875 }, { "epoch": 92.2865671641791, "grad_norm": 21.76424789428711, "learning_rate": 9.7987012987013e-06, "loss": 42.1119, "step": 3876 }, { "epoch": 92.31044776119403, "grad_norm": 26.037975311279297, "learning_rate": 9.796536796536797e-06, "loss": 42.0092, "step": 3877 }, { "epoch": 92.33432835820895, "grad_norm": 26.393800735473633, "learning_rate": 9.794372294372295e-06, "loss": 43.9124, "step": 3878 }, { "epoch": 92.35820895522389, "grad_norm": 21.763713836669922, "learning_rate": 9.792207792207793e-06, "loss": 42.6169, "step": 3879 }, { "epoch": 92.38208955223881, "grad_norm": 28.867443084716797, "learning_rate": 9.79004329004329e-06, "loss": 43.093, "step": 3880 }, { "epoch": 92.40597014925373, "grad_norm": 20.59787940979004, "learning_rate": 9.787878787878788e-06, "loss": 43.4976, "step": 3881 }, { "epoch": 92.42985074626866, "grad_norm": 32.58126449584961, "learning_rate": 9.785714285714286e-06, "loss": 42.2799, "step": 3882 }, { "epoch": 92.45373134328358, "grad_norm": 18.00343132019043, "learning_rate": 9.783549783549784e-06, "loss": 42.9497, "step": 3883 }, { "epoch": 92.4776119402985, "grad_norm": 31.740930557250977, "learning_rate": 9.781385281385282e-06, "loss": 42.7341, "step": 3884 }, { "epoch": 92.50149253731344, "grad_norm": 24.078405380249023, "learning_rate": 9.779220779220781e-06, "loss": 43.1077, "step": 3885 }, { "epoch": 92.52537313432836, "grad_norm": 21.194313049316406, "learning_rate": 9.777056277056279e-06, "loss": 41.9059, "step": 3886 }, { "epoch": 92.54925373134328, "grad_norm": 30.298595428466797, "learning_rate": 9.774891774891775e-06, "loss": 41.5753, "step": 3887 }, { "epoch": 92.57313432835821, "grad_norm": 21.55902099609375, "learning_rate": 9.772727272727273e-06, "loss": 41.659, "step": 3888 }, { "epoch": 92.59701492537313, "grad_norm": 27.879924774169922, "learning_rate": 9.77056277056277e-06, "loss": 42.4026, "step": 3889 }, { "epoch": 92.62089552238805, "grad_norm": 20.100893020629883, "learning_rate": 9.768398268398269e-06, "loss": 42.3196, "step": 3890 }, { "epoch": 92.64477611940299, "grad_norm": 24.352115631103516, "learning_rate": 9.766233766233766e-06, "loss": 42.4063, "step": 3891 }, { "epoch": 92.66865671641791, "grad_norm": 24.65276336669922, "learning_rate": 9.764069264069264e-06, "loss": 41.6774, "step": 3892 }, { "epoch": 92.69253731343284, "grad_norm": 18.95211410522461, "learning_rate": 9.761904761904762e-06, "loss": 40.4774, "step": 3893 }, { "epoch": 92.71641791044776, "grad_norm": 37.48885726928711, "learning_rate": 9.75974025974026e-06, "loss": 42.0188, "step": 3894 }, { "epoch": 92.74029850746268, "grad_norm": 27.999391555786133, "learning_rate": 9.757575757575758e-06, "loss": 41.9417, "step": 3895 }, { "epoch": 92.7641791044776, "grad_norm": 41.38749694824219, "learning_rate": 9.755411255411255e-06, "loss": 42.3823, "step": 3896 }, { "epoch": 92.78805970149254, "grad_norm": 30.16627311706543, "learning_rate": 9.753246753246755e-06, "loss": 42.6722, "step": 3897 }, { "epoch": 92.81194029850747, "grad_norm": 42.71925735473633, "learning_rate": 9.751082251082253e-06, "loss": 42.7932, "step": 3898 }, { "epoch": 92.83582089552239, "grad_norm": 42.11480712890625, "learning_rate": 9.74891774891775e-06, "loss": 42.3812, "step": 3899 }, { "epoch": 92.85970149253731, "grad_norm": 23.51568031311035, "learning_rate": 9.746753246753248e-06, "loss": 42.0872, "step": 3900 }, { "epoch": 92.88358208955223, "grad_norm": 29.64082145690918, "learning_rate": 9.744588744588746e-06, "loss": 42.7743, "step": 3901 }, { "epoch": 92.90746268656716, "grad_norm": 24.687829971313477, "learning_rate": 9.742424242424244e-06, "loss": 42.151, "step": 3902 }, { "epoch": 92.9313432835821, "grad_norm": 23.673076629638672, "learning_rate": 9.740259740259742e-06, "loss": 42.949, "step": 3903 }, { "epoch": 92.95522388059702, "grad_norm": 29.738771438598633, "learning_rate": 9.73809523809524e-06, "loss": 41.3754, "step": 3904 }, { "epoch": 92.97910447761194, "grad_norm": 23.26430320739746, "learning_rate": 9.735930735930737e-06, "loss": 42.2649, "step": 3905 }, { "epoch": 93.0, "grad_norm": 33.02578353881836, "learning_rate": 9.733766233766235e-06, "loss": 36.7133, "step": 3906 }, { "epoch": 93.02388059701492, "grad_norm": 29.762083053588867, "learning_rate": 9.731601731601731e-06, "loss": 42.1617, "step": 3907 }, { "epoch": 93.04776119402985, "grad_norm": 42.29904556274414, "learning_rate": 9.729437229437229e-06, "loss": 41.4727, "step": 3908 }, { "epoch": 93.07164179104478, "grad_norm": 35.2297477722168, "learning_rate": 9.727272727272728e-06, "loss": 41.8486, "step": 3909 }, { "epoch": 93.0955223880597, "grad_norm": 31.90110206604004, "learning_rate": 9.725108225108226e-06, "loss": 41.3951, "step": 3910 }, { "epoch": 93.11940298507463, "grad_norm": 33.118011474609375, "learning_rate": 9.722943722943724e-06, "loss": 42.8038, "step": 3911 }, { "epoch": 93.14328358208955, "grad_norm": 28.162616729736328, "learning_rate": 9.720779220779222e-06, "loss": 42.2424, "step": 3912 }, { "epoch": 93.16716417910447, "grad_norm": 26.799827575683594, "learning_rate": 9.71861471861472e-06, "loss": 41.9939, "step": 3913 }, { "epoch": 93.1910447761194, "grad_norm": 36.02149200439453, "learning_rate": 9.716450216450217e-06, "loss": 43.0555, "step": 3914 }, { "epoch": 93.21492537313434, "grad_norm": 30.073331832885742, "learning_rate": 9.714285714285715e-06, "loss": 40.7799, "step": 3915 }, { "epoch": 93.23880597014926, "grad_norm": 32.572547912597656, "learning_rate": 9.712121212121213e-06, "loss": 42.139, "step": 3916 }, { "epoch": 93.26268656716418, "grad_norm": 30.6304988861084, "learning_rate": 9.70995670995671e-06, "loss": 42.702, "step": 3917 }, { "epoch": 93.2865671641791, "grad_norm": 33.230812072753906, "learning_rate": 9.707792207792209e-06, "loss": 42.4281, "step": 3918 }, { "epoch": 93.31044776119403, "grad_norm": 29.524002075195312, "learning_rate": 9.705627705627706e-06, "loss": 42.5262, "step": 3919 }, { "epoch": 93.33432835820895, "grad_norm": 29.51606559753418, "learning_rate": 9.703463203463204e-06, "loss": 41.8173, "step": 3920 }, { "epoch": 93.35820895522389, "grad_norm": 22.32621192932129, "learning_rate": 9.701298701298702e-06, "loss": 43.059, "step": 3921 }, { "epoch": 93.38208955223881, "grad_norm": 36.80875778198242, "learning_rate": 9.6991341991342e-06, "loss": 41.8935, "step": 3922 }, { "epoch": 93.40597014925373, "grad_norm": 30.580604553222656, "learning_rate": 9.696969696969698e-06, "loss": 43.2128, "step": 3923 }, { "epoch": 93.42985074626866, "grad_norm": 29.170934677124023, "learning_rate": 9.694805194805195e-06, "loss": 41.6993, "step": 3924 }, { "epoch": 93.45373134328358, "grad_norm": 28.69053840637207, "learning_rate": 9.692640692640693e-06, "loss": 43.051, "step": 3925 }, { "epoch": 93.4776119402985, "grad_norm": 29.881338119506836, "learning_rate": 9.690476190476191e-06, "loss": 41.1923, "step": 3926 }, { "epoch": 93.50149253731344, "grad_norm": 25.122774124145508, "learning_rate": 9.688311688311689e-06, "loss": 42.4061, "step": 3927 }, { "epoch": 93.52537313432836, "grad_norm": 34.054847717285156, "learning_rate": 9.686147186147187e-06, "loss": 42.4354, "step": 3928 }, { "epoch": 93.54925373134328, "grad_norm": 29.546493530273438, "learning_rate": 9.683982683982684e-06, "loss": 41.9759, "step": 3929 }, { "epoch": 93.57313432835821, "grad_norm": 32.49911880493164, "learning_rate": 9.681818181818182e-06, "loss": 43.3769, "step": 3930 }, { "epoch": 93.59701492537313, "grad_norm": 28.943012237548828, "learning_rate": 9.67965367965368e-06, "loss": 41.6171, "step": 3931 }, { "epoch": 93.62089552238805, "grad_norm": 32.4178466796875, "learning_rate": 9.67748917748918e-06, "loss": 42.6111, "step": 3932 }, { "epoch": 93.64477611940299, "grad_norm": 30.295703887939453, "learning_rate": 9.675324675324677e-06, "loss": 41.6904, "step": 3933 }, { "epoch": 93.66865671641791, "grad_norm": 31.419668197631836, "learning_rate": 9.673160173160175e-06, "loss": 42.3374, "step": 3934 }, { "epoch": 93.69253731343284, "grad_norm": 29.994272232055664, "learning_rate": 9.670995670995673e-06, "loss": 42.1553, "step": 3935 }, { "epoch": 93.71641791044776, "grad_norm": 30.031116485595703, "learning_rate": 9.66883116883117e-06, "loss": 42.1101, "step": 3936 }, { "epoch": 93.74029850746268, "grad_norm": 28.21011734008789, "learning_rate": 9.666666666666667e-06, "loss": 42.0604, "step": 3937 }, { "epoch": 93.7641791044776, "grad_norm": 32.34469985961914, "learning_rate": 9.664502164502165e-06, "loss": 42.4025, "step": 3938 }, { "epoch": 93.78805970149254, "grad_norm": 25.2736759185791, "learning_rate": 9.662337662337662e-06, "loss": 42.7677, "step": 3939 }, { "epoch": 93.81194029850747, "grad_norm": 35.72128677368164, "learning_rate": 9.66017316017316e-06, "loss": 43.4687, "step": 3940 }, { "epoch": 93.83582089552239, "grad_norm": 30.39203453063965, "learning_rate": 9.658008658008658e-06, "loss": 41.7504, "step": 3941 }, { "epoch": 93.85970149253731, "grad_norm": 26.031253814697266, "learning_rate": 9.655844155844156e-06, "loss": 41.6092, "step": 3942 }, { "epoch": 93.88358208955223, "grad_norm": 23.05304718017578, "learning_rate": 9.653679653679654e-06, "loss": 42.4116, "step": 3943 }, { "epoch": 93.90746268656716, "grad_norm": 27.849210739135742, "learning_rate": 9.651515151515153e-06, "loss": 43.2295, "step": 3944 }, { "epoch": 93.9313432835821, "grad_norm": 25.089933395385742, "learning_rate": 9.64935064935065e-06, "loss": 42.6244, "step": 3945 }, { "epoch": 93.95522388059702, "grad_norm": 32.90645217895508, "learning_rate": 9.647186147186149e-06, "loss": 42.7992, "step": 3946 }, { "epoch": 93.97910447761194, "grad_norm": 28.58262825012207, "learning_rate": 9.645021645021646e-06, "loss": 43.0072, "step": 3947 }, { "epoch": 94.0, "grad_norm": 23.826631546020508, "learning_rate": 9.642857142857144e-06, "loss": 37.1225, "step": 3948 }, { "epoch": 94.02388059701492, "grad_norm": 28.149904251098633, "learning_rate": 9.640692640692642e-06, "loss": 42.374, "step": 3949 }, { "epoch": 94.04776119402985, "grad_norm": 28.40786361694336, "learning_rate": 9.63852813852814e-06, "loss": 41.6844, "step": 3950 }, { "epoch": 94.07164179104478, "grad_norm": 25.789466857910156, "learning_rate": 9.636363636363638e-06, "loss": 41.9359, "step": 3951 }, { "epoch": 94.0955223880597, "grad_norm": 31.53352928161621, "learning_rate": 9.634199134199135e-06, "loss": 41.4059, "step": 3952 }, { "epoch": 94.11940298507463, "grad_norm": 25.65757179260254, "learning_rate": 9.632034632034633e-06, "loss": 42.8445, "step": 3953 }, { "epoch": 94.14328358208955, "grad_norm": 35.67771911621094, "learning_rate": 9.629870129870131e-06, "loss": 43.0635, "step": 3954 }, { "epoch": 94.16716417910447, "grad_norm": 31.19240951538086, "learning_rate": 9.627705627705629e-06, "loss": 42.4725, "step": 3955 }, { "epoch": 94.1910447761194, "grad_norm": 31.1099853515625, "learning_rate": 9.625541125541127e-06, "loss": 42.6572, "step": 3956 }, { "epoch": 94.21492537313434, "grad_norm": 28.18238639831543, "learning_rate": 9.623376623376624e-06, "loss": 40.6298, "step": 3957 }, { "epoch": 94.23880597014926, "grad_norm": 25.916431427001953, "learning_rate": 9.621212121212122e-06, "loss": 42.1036, "step": 3958 }, { "epoch": 94.26268656716418, "grad_norm": 25.19932746887207, "learning_rate": 9.61904761904762e-06, "loss": 42.7877, "step": 3959 }, { "epoch": 94.2865671641791, "grad_norm": 31.23909568786621, "learning_rate": 9.616883116883118e-06, "loss": 42.3302, "step": 3960 }, { "epoch": 94.31044776119403, "grad_norm": 27.547996520996094, "learning_rate": 9.614718614718616e-06, "loss": 42.9115, "step": 3961 }, { "epoch": 94.33432835820895, "grad_norm": 33.331939697265625, "learning_rate": 9.612554112554113e-06, "loss": 42.9594, "step": 3962 }, { "epoch": 94.35820895522389, "grad_norm": 26.780292510986328, "learning_rate": 9.610389610389611e-06, "loss": 43.8544, "step": 3963 }, { "epoch": 94.38208955223881, "grad_norm": 25.683496475219727, "learning_rate": 9.608225108225109e-06, "loss": 41.3053, "step": 3964 }, { "epoch": 94.40597014925373, "grad_norm": 22.268705368041992, "learning_rate": 9.606060606060607e-06, "loss": 41.5663, "step": 3965 }, { "epoch": 94.42985074626866, "grad_norm": 26.915376663208008, "learning_rate": 9.603896103896105e-06, "loss": 42.8438, "step": 3966 }, { "epoch": 94.45373134328358, "grad_norm": 18.383493423461914, "learning_rate": 9.601731601731602e-06, "loss": 42.499, "step": 3967 }, { "epoch": 94.4776119402985, "grad_norm": 36.09028244018555, "learning_rate": 9.5995670995671e-06, "loss": 42.8744, "step": 3968 }, { "epoch": 94.50149253731344, "grad_norm": 27.188034057617188, "learning_rate": 9.597402597402598e-06, "loss": 41.8915, "step": 3969 }, { "epoch": 94.52537313432836, "grad_norm": 30.428661346435547, "learning_rate": 9.595238095238096e-06, "loss": 41.9762, "step": 3970 }, { "epoch": 94.54925373134328, "grad_norm": 25.777450561523438, "learning_rate": 9.593073593073594e-06, "loss": 43.0853, "step": 3971 }, { "epoch": 94.57313432835821, "grad_norm": 28.07237434387207, "learning_rate": 9.590909090909091e-06, "loss": 42.7039, "step": 3972 }, { "epoch": 94.59701492537313, "grad_norm": 22.956628799438477, "learning_rate": 9.588744588744589e-06, "loss": 42.8252, "step": 3973 }, { "epoch": 94.62089552238805, "grad_norm": 30.87279510498047, "learning_rate": 9.586580086580087e-06, "loss": 42.8992, "step": 3974 }, { "epoch": 94.64477611940299, "grad_norm": 24.29635238647461, "learning_rate": 9.584415584415585e-06, "loss": 41.1669, "step": 3975 }, { "epoch": 94.66865671641791, "grad_norm": 31.342975616455078, "learning_rate": 9.582251082251083e-06, "loss": 42.9206, "step": 3976 }, { "epoch": 94.69253731343284, "grad_norm": 32.55195999145508, "learning_rate": 9.58008658008658e-06, "loss": 41.6277, "step": 3977 }, { "epoch": 94.71641791044776, "grad_norm": 29.774578094482422, "learning_rate": 9.577922077922078e-06, "loss": 42.2551, "step": 3978 }, { "epoch": 94.74029850746268, "grad_norm": 25.456302642822266, "learning_rate": 9.575757575757576e-06, "loss": 40.1482, "step": 3979 }, { "epoch": 94.7641791044776, "grad_norm": 25.847124099731445, "learning_rate": 9.573593073593075e-06, "loss": 42.2201, "step": 3980 }, { "epoch": 94.78805970149254, "grad_norm": 27.12795066833496, "learning_rate": 9.571428571428573e-06, "loss": 41.8479, "step": 3981 }, { "epoch": 94.81194029850747, "grad_norm": 24.278888702392578, "learning_rate": 9.569264069264071e-06, "loss": 42.6692, "step": 3982 }, { "epoch": 94.83582089552239, "grad_norm": 22.567380905151367, "learning_rate": 9.567099567099569e-06, "loss": 42.3215, "step": 3983 }, { "epoch": 94.85970149253731, "grad_norm": 23.813114166259766, "learning_rate": 9.564935064935067e-06, "loss": 42.6284, "step": 3984 }, { "epoch": 94.88358208955223, "grad_norm": 19.152956008911133, "learning_rate": 9.562770562770564e-06, "loss": 41.7055, "step": 3985 }, { "epoch": 94.90746268656716, "grad_norm": 25.253353118896484, "learning_rate": 9.56060606060606e-06, "loss": 42.5487, "step": 3986 }, { "epoch": 94.9313432835821, "grad_norm": 21.04471206665039, "learning_rate": 9.558441558441558e-06, "loss": 44.019, "step": 3987 }, { "epoch": 94.95522388059702, "grad_norm": NaN, "learning_rate": 9.556277056277056e-06, "loss": 47.5805, "step": 3988 }, { "epoch": 94.97910447761194, "grad_norm": 20.38011932373047, "learning_rate": 9.556277056277056e-06, "loss": 40.8306, "step": 3989 }, { "epoch": 95.0, "grad_norm": 20.988080978393555, "learning_rate": 9.554112554112554e-06, "loss": 35.8475, "step": 3990 }, { "epoch": 95.02388059701492, "grad_norm": 25.182218551635742, "learning_rate": 9.551948051948052e-06, "loss": 42.7702, "step": 3991 }, { "epoch": 95.04776119402985, "grad_norm": 18.022729873657227, "learning_rate": 9.54978354978355e-06, "loss": 41.3642, "step": 3992 }, { "epoch": 95.07164179104478, "grad_norm": 28.234127044677734, "learning_rate": 9.547619047619049e-06, "loss": 41.819, "step": 3993 }, { "epoch": 95.0955223880597, "grad_norm": 22.71247100830078, "learning_rate": 9.545454545454547e-06, "loss": 43.0423, "step": 3994 }, { "epoch": 95.11940298507463, "grad_norm": 26.776891708374023, "learning_rate": 9.543290043290045e-06, "loss": 42.4988, "step": 3995 }, { "epoch": 95.14328358208955, "grad_norm": 21.445236206054688, "learning_rate": 9.541125541125542e-06, "loss": 41.4199, "step": 3996 }, { "epoch": 95.16716417910447, "grad_norm": 23.514680862426758, "learning_rate": 9.53896103896104e-06, "loss": 42.3048, "step": 3997 }, { "epoch": 95.1910447761194, "grad_norm": 19.648818969726562, "learning_rate": 9.536796536796538e-06, "loss": 41.8681, "step": 3998 }, { "epoch": 95.21492537313434, "grad_norm": 21.146074295043945, "learning_rate": 9.534632034632036e-06, "loss": 42.3901, "step": 3999 }, { "epoch": 95.23880597014926, "grad_norm": 17.257108688354492, "learning_rate": 9.532467532467534e-06, "loss": 42.5485, "step": 4000 }, { "epoch": 95.26268656716418, "grad_norm": 20.980907440185547, "learning_rate": 9.530303030303031e-06, "loss": 42.5298, "step": 4001 }, { "epoch": 95.2865671641791, "grad_norm": 22.18124771118164, "learning_rate": 9.52813852813853e-06, "loss": 42.7032, "step": 4002 }, { "epoch": 95.31044776119403, "grad_norm": 20.432281494140625, "learning_rate": 9.525974025974027e-06, "loss": 43.449, "step": 4003 }, { "epoch": 95.33432835820895, "grad_norm": 19.2701473236084, "learning_rate": 9.523809523809525e-06, "loss": 40.555, "step": 4004 }, { "epoch": 95.35820895522389, "grad_norm": 19.681455612182617, "learning_rate": 9.521645021645023e-06, "loss": 41.2141, "step": 4005 }, { "epoch": 95.38208955223881, "grad_norm": 18.39265251159668, "learning_rate": 9.51948051948052e-06, "loss": 42.6937, "step": 4006 }, { "epoch": 95.40597014925373, "grad_norm": 19.818313598632812, "learning_rate": 9.517316017316018e-06, "loss": 43.3448, "step": 4007 }, { "epoch": 95.42985074626866, "grad_norm": 22.540481567382812, "learning_rate": 9.515151515151516e-06, "loss": 42.9516, "step": 4008 }, { "epoch": 95.45373134328358, "grad_norm": 19.422515869140625, "learning_rate": 9.512987012987014e-06, "loss": 42.7121, "step": 4009 }, { "epoch": 95.4776119402985, "grad_norm": 22.789037704467773, "learning_rate": 9.510822510822512e-06, "loss": 42.5243, "step": 4010 }, { "epoch": 95.50149253731344, "grad_norm": 18.70187759399414, "learning_rate": 9.50865800865801e-06, "loss": 40.3263, "step": 4011 }, { "epoch": 95.52537313432836, "grad_norm": 24.231351852416992, "learning_rate": 9.506493506493507e-06, "loss": 42.1699, "step": 4012 }, { "epoch": 95.54925373134328, "grad_norm": 26.356748580932617, "learning_rate": 9.504329004329005e-06, "loss": 42.181, "step": 4013 }, { "epoch": 95.57313432835821, "grad_norm": 18.702556610107422, "learning_rate": 9.502164502164503e-06, "loss": 42.4881, "step": 4014 }, { "epoch": 95.59701492537313, "grad_norm": 27.878799438476562, "learning_rate": 9.5e-06, "loss": 42.2801, "step": 4015 }, { "epoch": 95.62089552238805, "grad_norm": 20.791034698486328, "learning_rate": 9.497835497835498e-06, "loss": 41.909, "step": 4016 }, { "epoch": 95.64477611940299, "grad_norm": 24.874574661254883, "learning_rate": 9.495670995670996e-06, "loss": 42.2108, "step": 4017 }, { "epoch": 95.66865671641791, "grad_norm": 18.562255859375, "learning_rate": 9.493506493506494e-06, "loss": 41.687, "step": 4018 }, { "epoch": 95.69253731343284, "grad_norm": 27.460060119628906, "learning_rate": 9.491341991341992e-06, "loss": 42.3688, "step": 4019 }, { "epoch": 95.71641791044776, "grad_norm": 21.485797882080078, "learning_rate": 9.48917748917749e-06, "loss": 42.6037, "step": 4020 }, { "epoch": 95.74029850746268, "grad_norm": 29.475221633911133, "learning_rate": 9.487012987012987e-06, "loss": 39.9582, "step": 4021 }, { "epoch": 95.7641791044776, "grad_norm": 24.83645248413086, "learning_rate": 9.484848484848485e-06, "loss": 42.7876, "step": 4022 }, { "epoch": 95.78805970149254, "grad_norm": 29.321386337280273, "learning_rate": 9.482683982683983e-06, "loss": 42.1032, "step": 4023 }, { "epoch": 95.81194029850747, "grad_norm": 26.891469955444336, "learning_rate": 9.48051948051948e-06, "loss": 42.557, "step": 4024 }, { "epoch": 95.83582089552239, "grad_norm": 27.05336570739746, "learning_rate": 9.478354978354978e-06, "loss": 42.9743, "step": 4025 }, { "epoch": 95.85970149253731, "grad_norm": 25.014963150024414, "learning_rate": 9.476190476190476e-06, "loss": 43.1592, "step": 4026 }, { "epoch": 95.88358208955223, "grad_norm": 25.66219711303711, "learning_rate": 9.474025974025974e-06, "loss": 41.8458, "step": 4027 }, { "epoch": 95.90746268656716, "grad_norm": 22.460660934448242, "learning_rate": 9.471861471861472e-06, "loss": 42.1439, "step": 4028 }, { "epoch": 95.9313432835821, "grad_norm": 19.01448631286621, "learning_rate": 9.469696969696971e-06, "loss": 42.2933, "step": 4029 }, { "epoch": 95.95522388059702, "grad_norm": 21.85147476196289, "learning_rate": 9.46753246753247e-06, "loss": 42.1108, "step": 4030 }, { "epoch": 95.97910447761194, "grad_norm": 18.99871826171875, "learning_rate": 9.465367965367967e-06, "loss": 42.5071, "step": 4031 }, { "epoch": 96.0, "grad_norm": 16.825069427490234, "learning_rate": 9.463203463203465e-06, "loss": 37.1366, "step": 4032 }, { "epoch": 96.02388059701492, "grad_norm": 19.010360717773438, "learning_rate": 9.461038961038963e-06, "loss": 42.3766, "step": 4033 }, { "epoch": 96.04776119402985, "grad_norm": 22.50554656982422, "learning_rate": 9.45887445887446e-06, "loss": 42.5748, "step": 4034 }, { "epoch": 96.07164179104478, "grad_norm": 16.554548263549805, "learning_rate": 9.456709956709958e-06, "loss": 41.9278, "step": 4035 }, { "epoch": 96.0955223880597, "grad_norm": 23.447858810424805, "learning_rate": 9.454545454545456e-06, "loss": 42.3679, "step": 4036 }, { "epoch": 96.11940298507463, "grad_norm": 23.394611358642578, "learning_rate": 9.452380952380952e-06, "loss": 42.4519, "step": 4037 }, { "epoch": 96.14328358208955, "grad_norm": 17.726774215698242, "learning_rate": 9.45021645021645e-06, "loss": 41.8001, "step": 4038 }, { "epoch": 96.16716417910447, "grad_norm": 19.8607177734375, "learning_rate": 9.448051948051948e-06, "loss": 42.2731, "step": 4039 }, { "epoch": 96.1910447761194, "grad_norm": 24.878158569335938, "learning_rate": 9.445887445887445e-06, "loss": 42.4626, "step": 4040 }, { "epoch": 96.21492537313434, "grad_norm": 18.564037322998047, "learning_rate": 9.443722943722945e-06, "loss": 42.4094, "step": 4041 }, { "epoch": 96.23880597014926, "grad_norm": 29.672882080078125, "learning_rate": 9.441558441558443e-06, "loss": 41.8399, "step": 4042 }, { "epoch": 96.26268656716418, "grad_norm": 21.15955924987793, "learning_rate": 9.43939393939394e-06, "loss": 41.8022, "step": 4043 }, { "epoch": 96.2865671641791, "grad_norm": 19.90737533569336, "learning_rate": 9.437229437229438e-06, "loss": 41.5356, "step": 4044 }, { "epoch": 96.31044776119403, "grad_norm": 27.035198211669922, "learning_rate": 9.435064935064936e-06, "loss": 42.3891, "step": 4045 }, { "epoch": 96.33432835820895, "grad_norm": 19.44938850402832, "learning_rate": 9.432900432900434e-06, "loss": 41.7612, "step": 4046 }, { "epoch": 96.35820895522389, "grad_norm": 32.34653091430664, "learning_rate": 9.430735930735932e-06, "loss": 42.9741, "step": 4047 }, { "epoch": 96.38208955223881, "grad_norm": 23.551259994506836, "learning_rate": 9.42857142857143e-06, "loss": 41.3423, "step": 4048 }, { "epoch": 96.40597014925373, "grad_norm": 36.44496536254883, "learning_rate": 9.426406926406927e-06, "loss": 42.8346, "step": 4049 }, { "epoch": 96.42985074626866, "grad_norm": 28.864904403686523, "learning_rate": 9.424242424242425e-06, "loss": 41.8315, "step": 4050 }, { "epoch": 96.45373134328358, "grad_norm": 35.26904296875, "learning_rate": 9.422077922077923e-06, "loss": 41.5353, "step": 4051 }, { "epoch": 96.4776119402985, "grad_norm": 32.65912628173828, "learning_rate": 9.41991341991342e-06, "loss": 41.5928, "step": 4052 }, { "epoch": 96.50149253731344, "grad_norm": 31.7542667388916, "learning_rate": 9.417748917748919e-06, "loss": 41.4377, "step": 4053 }, { "epoch": 96.52537313432836, "grad_norm": 31.60584259033203, "learning_rate": 9.415584415584416e-06, "loss": 42.9119, "step": 4054 }, { "epoch": 96.54925373134328, "grad_norm": 31.597043991088867, "learning_rate": 9.413419913419914e-06, "loss": 42.2946, "step": 4055 }, { "epoch": 96.57313432835821, "grad_norm": 25.871496200561523, "learning_rate": 9.411255411255412e-06, "loss": 42.3518, "step": 4056 }, { "epoch": 96.59701492537313, "grad_norm": 38.121971130371094, "learning_rate": 9.40909090909091e-06, "loss": 41.3268, "step": 4057 }, { "epoch": 96.62089552238805, "grad_norm": 31.4708309173584, "learning_rate": 9.406926406926408e-06, "loss": 42.001, "step": 4058 }, { "epoch": 96.64477611940299, "grad_norm": 32.240604400634766, "learning_rate": 9.404761904761905e-06, "loss": 43.7004, "step": 4059 }, { "epoch": 96.66865671641791, "grad_norm": 29.972900390625, "learning_rate": 9.402597402597403e-06, "loss": 40.8066, "step": 4060 }, { "epoch": 96.69253731343284, "grad_norm": 28.71061897277832, "learning_rate": 9.400432900432901e-06, "loss": 43.052, "step": 4061 }, { "epoch": 96.71641791044776, "grad_norm": 23.861024856567383, "learning_rate": 9.398268398268399e-06, "loss": 42.5682, "step": 4062 }, { "epoch": 96.74029850746268, "grad_norm": 34.21725845336914, "learning_rate": 9.396103896103896e-06, "loss": 42.418, "step": 4063 }, { "epoch": 96.7641791044776, "grad_norm": 22.93166732788086, "learning_rate": 9.393939393939396e-06, "loss": 42.3199, "step": 4064 }, { "epoch": 96.78805970149254, "grad_norm": 35.91544723510742, "learning_rate": 9.391774891774894e-06, "loss": 40.5579, "step": 4065 }, { "epoch": 96.81194029850747, "grad_norm": 29.065799713134766, "learning_rate": 9.38961038961039e-06, "loss": 40.6409, "step": 4066 }, { "epoch": 96.83582089552239, "grad_norm": 33.4009895324707, "learning_rate": 9.387445887445888e-06, "loss": 42.934, "step": 4067 }, { "epoch": 96.85970149253731, "grad_norm": 32.16798782348633, "learning_rate": 9.385281385281385e-06, "loss": 42.4209, "step": 4068 }, { "epoch": 96.88358208955223, "grad_norm": 27.158573150634766, "learning_rate": 9.383116883116883e-06, "loss": 42.3285, "step": 4069 }, { "epoch": 96.90746268656716, "grad_norm": 28.05286407470703, "learning_rate": 9.380952380952381e-06, "loss": 43.6253, "step": 4070 }, { "epoch": 96.9313432835821, "grad_norm": 31.17296028137207, "learning_rate": 9.378787878787879e-06, "loss": 43.8199, "step": 4071 }, { "epoch": 96.95522388059702, "grad_norm": 25.182817459106445, "learning_rate": 9.376623376623377e-06, "loss": 41.0505, "step": 4072 }, { "epoch": 96.97910447761194, "grad_norm": 35.5045166015625, "learning_rate": 9.374458874458874e-06, "loss": 42.9265, "step": 4073 }, { "epoch": 97.0, "grad_norm": 23.445880889892578, "learning_rate": 9.372294372294372e-06, "loss": 36.5814, "step": 4074 }, { "epoch": 97.02388059701492, "grad_norm": 28.6851806640625, "learning_rate": 9.37012987012987e-06, "loss": 41.6689, "step": 4075 }, { "epoch": 97.04776119402985, "grad_norm": 22.152568817138672, "learning_rate": 9.36796536796537e-06, "loss": 41.6459, "step": 4076 }, { "epoch": 97.07164179104478, "grad_norm": 35.39872360229492, "learning_rate": 9.365800865800867e-06, "loss": 41.9915, "step": 4077 }, { "epoch": 97.0955223880597, "grad_norm": 27.264184951782227, "learning_rate": 9.363636363636365e-06, "loss": 42.6117, "step": 4078 }, { "epoch": 97.11940298507463, "grad_norm": 36.01545715332031, "learning_rate": 9.361471861471863e-06, "loss": 43.7312, "step": 4079 }, { "epoch": 97.14328358208955, "grad_norm": 34.436134338378906, "learning_rate": 9.35930735930736e-06, "loss": 42.597, "step": 4080 }, { "epoch": 97.16716417910447, "grad_norm": 24.796520233154297, "learning_rate": 9.357142857142859e-06, "loss": 42.432, "step": 4081 }, { "epoch": 97.1910447761194, "grad_norm": 26.330299377441406, "learning_rate": 9.354978354978356e-06, "loss": 42.1124, "step": 4082 }, { "epoch": 97.21492537313434, "grad_norm": 27.518465042114258, "learning_rate": 9.352813852813854e-06, "loss": 41.3868, "step": 4083 }, { "epoch": 97.23880597014926, "grad_norm": 25.9599552154541, "learning_rate": 9.350649350649352e-06, "loss": 40.6964, "step": 4084 }, { "epoch": 97.26268656716418, "grad_norm": 33.074974060058594, "learning_rate": 9.34848484848485e-06, "loss": 42.1326, "step": 4085 }, { "epoch": 97.2865671641791, "grad_norm": 29.895139694213867, "learning_rate": 9.346320346320346e-06, "loss": 42.1873, "step": 4086 }, { "epoch": 97.31044776119403, "grad_norm": 32.33000946044922, "learning_rate": 9.344155844155844e-06, "loss": 42.5366, "step": 4087 }, { "epoch": 97.33432835820895, "grad_norm": 28.283353805541992, "learning_rate": 9.341991341991343e-06, "loss": 41.8857, "step": 4088 }, { "epoch": 97.35820895522389, "grad_norm": 27.200963973999023, "learning_rate": 9.339826839826841e-06, "loss": 41.4329, "step": 4089 }, { "epoch": 97.38208955223881, "grad_norm": 27.918405532836914, "learning_rate": 9.337662337662339e-06, "loss": 41.4236, "step": 4090 }, { "epoch": 97.40597014925373, "grad_norm": 24.885950088500977, "learning_rate": 9.335497835497837e-06, "loss": 41.8926, "step": 4091 }, { "epoch": 97.42985074626866, "grad_norm": 24.703994750976562, "learning_rate": 9.333333333333334e-06, "loss": 42.3685, "step": 4092 }, { "epoch": 97.45373134328358, "grad_norm": 32.68978500366211, "learning_rate": 9.331168831168832e-06, "loss": 41.5668, "step": 4093 }, { "epoch": 97.4776119402985, "grad_norm": 27.5683536529541, "learning_rate": 9.32900432900433e-06, "loss": 42.4125, "step": 4094 }, { "epoch": 97.50149253731344, "grad_norm": 30.541976928710938, "learning_rate": 9.326839826839828e-06, "loss": 40.7424, "step": 4095 }, { "epoch": 97.52537313432836, "grad_norm": 28.704875946044922, "learning_rate": 9.324675324675326e-06, "loss": 42.0617, "step": 4096 }, { "epoch": 97.54925373134328, "grad_norm": 29.45570945739746, "learning_rate": 9.322510822510823e-06, "loss": 42.2572, "step": 4097 }, { "epoch": 97.57313432835821, "grad_norm": 29.299041748046875, "learning_rate": 9.320346320346321e-06, "loss": 42.5461, "step": 4098 }, { "epoch": 97.59701492537313, "grad_norm": 28.30889320373535, "learning_rate": 9.318181818181819e-06, "loss": 41.9226, "step": 4099 }, { "epoch": 97.62089552238805, "grad_norm": 23.587907791137695, "learning_rate": 9.316017316017317e-06, "loss": 42.0195, "step": 4100 }, { "epoch": 97.64477611940299, "grad_norm": 31.324934005737305, "learning_rate": 9.313852813852815e-06, "loss": 41.731, "step": 4101 }, { "epoch": 97.66865671641791, "grad_norm": 25.146387100219727, "learning_rate": 9.311688311688312e-06, "loss": 41.8452, "step": 4102 }, { "epoch": 97.69253731343284, "grad_norm": NaN, "learning_rate": 9.30952380952381e-06, "loss": 73.1578, "step": 4103 }, { "epoch": 97.71641791044776, "grad_norm": 33.619197845458984, "learning_rate": 9.30952380952381e-06, "loss": 42.6151, "step": 4104 }, { "epoch": 97.74029850746268, "grad_norm": 30.636676788330078, "learning_rate": 9.307359307359308e-06, "loss": 43.1022, "step": 4105 }, { "epoch": 97.7641791044776, "grad_norm": 30.259347915649414, "learning_rate": 9.305194805194806e-06, "loss": 42.0399, "step": 4106 }, { "epoch": 97.78805970149254, "grad_norm": 28.927536010742188, "learning_rate": 9.303030303030303e-06, "loss": 42.5658, "step": 4107 }, { "epoch": 97.81194029850747, "grad_norm": 27.93010139465332, "learning_rate": 9.300865800865801e-06, "loss": 41.5662, "step": 4108 }, { "epoch": 97.83582089552239, "grad_norm": 25.34616470336914, "learning_rate": 9.298701298701299e-06, "loss": 43.0076, "step": 4109 }, { "epoch": 97.85970149253731, "grad_norm": 28.407508850097656, "learning_rate": 9.296536796536797e-06, "loss": 43.035, "step": 4110 }, { "epoch": 97.88358208955223, "grad_norm": 22.58799934387207, "learning_rate": 9.294372294372295e-06, "loss": 42.5904, "step": 4111 }, { "epoch": 97.90746268656716, "grad_norm": 30.51255989074707, "learning_rate": 9.292207792207792e-06, "loss": 40.6314, "step": 4112 }, { "epoch": 97.9313432835821, "grad_norm": NaN, "learning_rate": 9.290043290043292e-06, "loss": 47.9418, "step": 4113 }, { "epoch": 97.95522388059702, "grad_norm": 24.9912166595459, "learning_rate": 9.290043290043292e-06, "loss": 42.5057, "step": 4114 }, { "epoch": 97.97910447761194, "grad_norm": 29.492568969726562, "learning_rate": 9.28787878787879e-06, "loss": 42.4723, "step": 4115 }, { "epoch": 98.0, "grad_norm": 22.984312057495117, "learning_rate": 9.285714285714288e-06, "loss": 36.1324, "step": 4116 }, { "epoch": 98.02388059701492, "grad_norm": 26.956518173217773, "learning_rate": 9.283549783549785e-06, "loss": 42.6798, "step": 4117 }, { "epoch": 98.04776119402985, "grad_norm": 23.24462890625, "learning_rate": 9.281385281385281e-06, "loss": 42.5043, "step": 4118 }, { "epoch": 98.07164179104478, "grad_norm": 32.33470153808594, "learning_rate": 9.27922077922078e-06, "loss": 42.0607, "step": 4119 }, { "epoch": 98.0955223880597, "grad_norm": 30.606536865234375, "learning_rate": 9.277056277056277e-06, "loss": 42.3543, "step": 4120 }, { "epoch": 98.11940298507463, "grad_norm": 26.795475006103516, "learning_rate": 9.274891774891775e-06, "loss": 41.33, "step": 4121 }, { "epoch": 98.14328358208955, "grad_norm": 23.049283981323242, "learning_rate": 9.272727272727273e-06, "loss": 41.2262, "step": 4122 }, { "epoch": 98.16716417910447, "grad_norm": 30.961490631103516, "learning_rate": 9.27056277056277e-06, "loss": 42.3126, "step": 4123 }, { "epoch": 98.1910447761194, "grad_norm": 25.457870483398438, "learning_rate": 9.268398268398268e-06, "loss": 43.0498, "step": 4124 }, { "epoch": 98.21492537313434, "grad_norm": 28.787675857543945, "learning_rate": 9.266233766233766e-06, "loss": 41.5441, "step": 4125 }, { "epoch": 98.23880597014926, "grad_norm": 23.33895492553711, "learning_rate": 9.264069264069266e-06, "loss": 41.2298, "step": 4126 }, { "epoch": 98.26268656716418, "grad_norm": 28.43191146850586, "learning_rate": 9.261904761904763e-06, "loss": 43.8188, "step": 4127 }, { "epoch": 98.2865671641791, "grad_norm": 22.150148391723633, "learning_rate": 9.259740259740261e-06, "loss": 41.9418, "step": 4128 }, { "epoch": 98.31044776119403, "grad_norm": 32.84375762939453, "learning_rate": 9.257575757575759e-06, "loss": 42.181, "step": 4129 }, { "epoch": 98.33432835820895, "grad_norm": 27.58066177368164, "learning_rate": 9.255411255411257e-06, "loss": 41.9053, "step": 4130 }, { "epoch": 98.35820895522389, "grad_norm": 26.275638580322266, "learning_rate": 9.253246753246755e-06, "loss": 42.643, "step": 4131 }, { "epoch": 98.38208955223881, "grad_norm": 26.407045364379883, "learning_rate": 9.251082251082252e-06, "loss": 41.2759, "step": 4132 }, { "epoch": 98.40597014925373, "grad_norm": 28.262874603271484, "learning_rate": 9.24891774891775e-06, "loss": 41.2746, "step": 4133 }, { "epoch": 98.42985074626866, "grad_norm": 25.495405197143555, "learning_rate": 9.246753246753248e-06, "loss": 41.17, "step": 4134 }, { "epoch": 98.45373134328358, "grad_norm": 30.302942276000977, "learning_rate": 9.244588744588746e-06, "loss": 40.8692, "step": 4135 }, { "epoch": 98.4776119402985, "grad_norm": 26.874711990356445, "learning_rate": 9.242424242424244e-06, "loss": 42.9695, "step": 4136 }, { "epoch": 98.50149253731344, "grad_norm": 27.96731948852539, "learning_rate": 9.240259740259741e-06, "loss": 41.0995, "step": 4137 }, { "epoch": 98.52537313432836, "grad_norm": 26.49541664123535, "learning_rate": 9.238095238095239e-06, "loss": 42.3258, "step": 4138 }, { "epoch": 98.54925373134328, "grad_norm": 24.790346145629883, "learning_rate": 9.235930735930737e-06, "loss": 42.5989, "step": 4139 }, { "epoch": 98.57313432835821, "grad_norm": 22.83180809020996, "learning_rate": 9.233766233766235e-06, "loss": 41.4101, "step": 4140 }, { "epoch": 98.59701492537313, "grad_norm": 27.18695640563965, "learning_rate": 9.231601731601733e-06, "loss": 42.1914, "step": 4141 }, { "epoch": 98.62089552238805, "grad_norm": 23.35308074951172, "learning_rate": 9.22943722943723e-06, "loss": 42.3357, "step": 4142 }, { "epoch": 98.64477611940299, "grad_norm": 32.9411735534668, "learning_rate": 9.227272727272728e-06, "loss": 42.2151, "step": 4143 }, { "epoch": 98.66865671641791, "grad_norm": 28.968116760253906, "learning_rate": 9.225108225108226e-06, "loss": 42.5766, "step": 4144 }, { "epoch": 98.69253731343284, "grad_norm": 26.254579544067383, "learning_rate": 9.222943722943724e-06, "loss": 42.5968, "step": 4145 }, { "epoch": 98.71641791044776, "grad_norm": 27.665916442871094, "learning_rate": 9.220779220779221e-06, "loss": 41.0831, "step": 4146 }, { "epoch": 98.74029850746268, "grad_norm": 29.594675064086914, "learning_rate": 9.21861471861472e-06, "loss": 42.1963, "step": 4147 }, { "epoch": 98.7641791044776, "grad_norm": 23.506603240966797, "learning_rate": 9.216450216450217e-06, "loss": 41.9209, "step": 4148 }, { "epoch": 98.78805970149254, "grad_norm": 32.939395904541016, "learning_rate": 9.214285714285715e-06, "loss": 42.0637, "step": 4149 }, { "epoch": 98.81194029850747, "grad_norm": 27.35706901550293, "learning_rate": 9.212121212121213e-06, "loss": 42.4936, "step": 4150 }, { "epoch": 98.83582089552239, "grad_norm": 31.6049861907959, "learning_rate": 9.20995670995671e-06, "loss": 43.5351, "step": 4151 }, { "epoch": 98.85970149253731, "grad_norm": 26.57269287109375, "learning_rate": 9.207792207792208e-06, "loss": 42.2598, "step": 4152 }, { "epoch": 98.88358208955223, "grad_norm": 30.60957908630371, "learning_rate": 9.205627705627706e-06, "loss": 42.3751, "step": 4153 }, { "epoch": 98.90746268656716, "grad_norm": 28.574939727783203, "learning_rate": 9.203463203463204e-06, "loss": 41.8665, "step": 4154 }, { "epoch": 98.9313432835821, "grad_norm": 24.66292953491211, "learning_rate": 9.201298701298702e-06, "loss": 42.2066, "step": 4155 }, { "epoch": 98.95522388059702, "grad_norm": 23.727333068847656, "learning_rate": 9.1991341991342e-06, "loss": 41.3947, "step": 4156 }, { "epoch": 98.97910447761194, "grad_norm": 27.1662654876709, "learning_rate": 9.196969696969697e-06, "loss": 42.752, "step": 4157 }, { "epoch": 99.0, "grad_norm": 19.463891983032227, "learning_rate": 9.194805194805195e-06, "loss": 35.6173, "step": 4158 }, { "epoch": 99.02388059701492, "grad_norm": 31.107654571533203, "learning_rate": 9.192640692640693e-06, "loss": 42.7329, "step": 4159 }, { "epoch": 99.04776119402985, "grad_norm": 26.082523345947266, "learning_rate": 9.19047619047619e-06, "loss": 43.3724, "step": 4160 }, { "epoch": 99.07164179104478, "grad_norm": 23.824567794799805, "learning_rate": 9.188311688311688e-06, "loss": 42.6574, "step": 4161 }, { "epoch": 99.0955223880597, "grad_norm": 23.710350036621094, "learning_rate": 9.186147186147188e-06, "loss": 41.6831, "step": 4162 }, { "epoch": 99.11940298507463, "grad_norm": 28.668537139892578, "learning_rate": 9.183982683982686e-06, "loss": 41.099, "step": 4163 }, { "epoch": 99.14328358208955, "grad_norm": 21.060327529907227, "learning_rate": 9.181818181818184e-06, "loss": 43.0679, "step": 4164 }, { "epoch": 99.16716417910447, "grad_norm": 25.86065673828125, "learning_rate": 9.179653679653681e-06, "loss": 42.248, "step": 4165 }, { "epoch": 99.1910447761194, "grad_norm": 20.043672561645508, "learning_rate": 9.177489177489179e-06, "loss": 41.114, "step": 4166 }, { "epoch": 99.21492537313434, "grad_norm": 25.1352481842041, "learning_rate": 9.175324675324675e-06, "loss": 40.9968, "step": 4167 }, { "epoch": 99.23880597014926, "grad_norm": 20.042200088500977, "learning_rate": 9.173160173160173e-06, "loss": 41.9535, "step": 4168 }, { "epoch": 99.26268656716418, "grad_norm": 27.261369705200195, "learning_rate": 9.17099567099567e-06, "loss": 42.6293, "step": 4169 }, { "epoch": 99.2865671641791, "grad_norm": 23.163576126098633, "learning_rate": 9.168831168831169e-06, "loss": 41.9948, "step": 4170 }, { "epoch": 99.31044776119403, "grad_norm": 27.297080993652344, "learning_rate": 9.166666666666666e-06, "loss": 41.4716, "step": 4171 }, { "epoch": 99.33432835820895, "grad_norm": 22.44979476928711, "learning_rate": 9.164502164502164e-06, "loss": 42.406, "step": 4172 }, { "epoch": 99.35820895522389, "grad_norm": 23.482084274291992, "learning_rate": 9.162337662337664e-06, "loss": 41.5008, "step": 4173 }, { "epoch": 99.38208955223881, "grad_norm": 22.505319595336914, "learning_rate": 9.160173160173162e-06, "loss": 40.9368, "step": 4174 }, { "epoch": 99.40597014925373, "grad_norm": 24.250532150268555, "learning_rate": 9.15800865800866e-06, "loss": 40.7122, "step": 4175 }, { "epoch": 99.42985074626866, "grad_norm": 23.2113037109375, "learning_rate": 9.155844155844157e-06, "loss": 41.7559, "step": 4176 }, { "epoch": 99.45373134328358, "grad_norm": 18.7581787109375, "learning_rate": 9.153679653679655e-06, "loss": 41.661, "step": 4177 }, { "epoch": 99.4776119402985, "grad_norm": 17.8604793548584, "learning_rate": 9.151515151515153e-06, "loss": 41.51, "step": 4178 }, { "epoch": 99.50149253731344, "grad_norm": 16.258312225341797, "learning_rate": 9.14935064935065e-06, "loss": 41.2024, "step": 4179 }, { "epoch": 99.52537313432836, "grad_norm": 16.66613006591797, "learning_rate": 9.147186147186148e-06, "loss": 42.5017, "step": 4180 }, { "epoch": 99.54925373134328, "grad_norm": 15.366393089294434, "learning_rate": 9.145021645021646e-06, "loss": 41.6167, "step": 4181 }, { "epoch": 99.57313432835821, "grad_norm": 23.028663635253906, "learning_rate": 9.142857142857144e-06, "loss": 42.308, "step": 4182 }, { "epoch": 99.59701492537313, "grad_norm": 16.91287612915039, "learning_rate": 9.140692640692642e-06, "loss": 43.1037, "step": 4183 }, { "epoch": 99.62089552238805, "grad_norm": 19.781919479370117, "learning_rate": 9.13852813852814e-06, "loss": 42.3187, "step": 4184 }, { "epoch": 99.64477611940299, "grad_norm": 18.985305786132812, "learning_rate": 9.136363636363637e-06, "loss": 41.971, "step": 4185 }, { "epoch": 99.66865671641791, "grad_norm": 17.393688201904297, "learning_rate": 9.134199134199135e-06, "loss": 41.1467, "step": 4186 }, { "epoch": 99.69253731343284, "grad_norm": 19.685924530029297, "learning_rate": 9.132034632034633e-06, "loss": 41.822, "step": 4187 }, { "epoch": 99.71641791044776, "grad_norm": 19.761327743530273, "learning_rate": 9.12987012987013e-06, "loss": 42.1768, "step": 4188 }, { "epoch": 99.74029850746268, "grad_norm": 16.2159423828125, "learning_rate": 9.127705627705628e-06, "loss": 42.9327, "step": 4189 }, { "epoch": 99.7641791044776, "grad_norm": 21.257530212402344, "learning_rate": 9.125541125541126e-06, "loss": 42.556, "step": 4190 }, { "epoch": 99.78805970149254, "grad_norm": NaN, "learning_rate": 9.123376623376624e-06, "loss": 53.9793, "step": 4191 }, { "epoch": 99.81194029850747, "grad_norm": 19.869991302490234, "learning_rate": 9.123376623376624e-06, "loss": 41.4833, "step": 4192 }, { "epoch": 99.83582089552239, "grad_norm": 17.66855239868164, "learning_rate": 9.121212121212122e-06, "loss": 41.6514, "step": 4193 }, { "epoch": 99.85970149253731, "grad_norm": 19.992225646972656, "learning_rate": 9.11904761904762e-06, "loss": 43.4129, "step": 4194 }, { "epoch": 99.88358208955223, "grad_norm": 23.21436882019043, "learning_rate": 9.116883116883117e-06, "loss": 43.2426, "step": 4195 }, { "epoch": 99.90746268656716, "grad_norm": 18.16109848022461, "learning_rate": 9.114718614718615e-06, "loss": 41.9741, "step": 4196 }, { "epoch": 99.9313432835821, "grad_norm": 22.761810302734375, "learning_rate": 9.112554112554113e-06, "loss": 41.4668, "step": 4197 }, { "epoch": 99.95522388059702, "grad_norm": 21.3942928314209, "learning_rate": 9.110389610389611e-06, "loss": 41.6686, "step": 4198 }, { "epoch": 99.97910447761194, "grad_norm": 17.734172821044922, "learning_rate": 9.108225108225109e-06, "loss": 41.746, "step": 4199 }, { "epoch": 100.0, "grad_norm": 22.795557022094727, "learning_rate": 9.106060606060606e-06, "loss": 37.4113, "step": 4200 }, { "epoch": 100.02388059701492, "grad_norm": 18.693927764892578, "learning_rate": 9.103896103896104e-06, "loss": 41.1692, "step": 4201 }, { "epoch": 100.04776119402985, "grad_norm": 15.947311401367188, "learning_rate": 9.101731601731602e-06, "loss": 43.5011, "step": 4202 }, { "epoch": 100.07164179104478, "grad_norm": 24.349090576171875, "learning_rate": 9.0995670995671e-06, "loss": 41.954, "step": 4203 }, { "epoch": 100.0955223880597, "grad_norm": 18.305612564086914, "learning_rate": 9.097402597402598e-06, "loss": 41.7676, "step": 4204 }, { "epoch": 100.11940298507463, "grad_norm": 29.68235206604004, "learning_rate": 9.095238095238095e-06, "loss": 40.8579, "step": 4205 }, { "epoch": 100.14328358208955, "grad_norm": 24.512508392333984, "learning_rate": 9.093073593073593e-06, "loss": 40.7238, "step": 4206 }, { "epoch": 100.16716417910447, "grad_norm": 24.545705795288086, "learning_rate": 9.090909090909091e-06, "loss": 42.7197, "step": 4207 }, { "epoch": 100.1910447761194, "grad_norm": 18.792917251586914, "learning_rate": 9.088744588744589e-06, "loss": 40.8385, "step": 4208 }, { "epoch": 100.21492537313434, "grad_norm": 21.766145706176758, "learning_rate": 9.086580086580087e-06, "loss": 41.3234, "step": 4209 }, { "epoch": 100.23880597014926, "grad_norm": 17.32309341430664, "learning_rate": 9.084415584415586e-06, "loss": 40.6989, "step": 4210 }, { "epoch": 100.26268656716418, "grad_norm": 17.80112648010254, "learning_rate": 9.082251082251084e-06, "loss": 41.0043, "step": 4211 }, { "epoch": 100.2865671641791, "grad_norm": 15.762267112731934, "learning_rate": 9.080086580086582e-06, "loss": 42.5453, "step": 4212 }, { "epoch": 100.31044776119403, "grad_norm": 15.99219036102295, "learning_rate": 9.07792207792208e-06, "loss": 41.9223, "step": 4213 }, { "epoch": 100.33432835820895, "grad_norm": 21.16149139404297, "learning_rate": 9.075757575757577e-06, "loss": 41.7332, "step": 4214 }, { "epoch": 100.35820895522389, "grad_norm": 16.26340675354004, "learning_rate": 9.073593073593075e-06, "loss": 41.9333, "step": 4215 }, { "epoch": 100.38208955223881, "grad_norm": 22.789945602416992, "learning_rate": 9.071428571428573e-06, "loss": 41.5922, "step": 4216 }, { "epoch": 100.40597014925373, "grad_norm": 20.777421951293945, "learning_rate": 9.06926406926407e-06, "loss": 42.4934, "step": 4217 }, { "epoch": 100.42985074626866, "grad_norm": 20.417619705200195, "learning_rate": 9.067099567099567e-06, "loss": 42.0611, "step": 4218 }, { "epoch": 100.45373134328358, "grad_norm": 17.323135375976562, "learning_rate": 9.064935064935065e-06, "loss": 41.4595, "step": 4219 }, { "epoch": 100.4776119402985, "grad_norm": 17.62958335876465, "learning_rate": 9.062770562770562e-06, "loss": 42.1578, "step": 4220 }, { "epoch": 100.50149253731344, "grad_norm": 19.73848533630371, "learning_rate": 9.06060606060606e-06, "loss": 40.6611, "step": 4221 }, { "epoch": 100.52537313432836, "grad_norm": 15.945398330688477, "learning_rate": 9.05844155844156e-06, "loss": 41.9703, "step": 4222 }, { "epoch": 100.54925373134328, "grad_norm": 31.24019432067871, "learning_rate": 9.056277056277057e-06, "loss": 42.1433, "step": 4223 }, { "epoch": 100.57313432835821, "grad_norm": 21.933677673339844, "learning_rate": 9.054112554112555e-06, "loss": 41.873, "step": 4224 }, { "epoch": 100.59701492537313, "grad_norm": 31.41733741760254, "learning_rate": 9.051948051948053e-06, "loss": 42.7139, "step": 4225 }, { "epoch": 100.62089552238805, "grad_norm": 21.998600006103516, "learning_rate": 9.049783549783551e-06, "loss": 42.7483, "step": 4226 }, { "epoch": 100.64477611940299, "grad_norm": 34.37179183959961, "learning_rate": 9.047619047619049e-06, "loss": 41.3319, "step": 4227 }, { "epoch": 100.66865671641791, "grad_norm": 27.14617156982422, "learning_rate": 9.045454545454546e-06, "loss": 42.022, "step": 4228 }, { "epoch": 100.69253731343284, "grad_norm": 37.454708099365234, "learning_rate": 9.043290043290044e-06, "loss": 41.9875, "step": 4229 }, { "epoch": 100.71641791044776, "grad_norm": 32.32929229736328, "learning_rate": 9.041125541125542e-06, "loss": 43.1461, "step": 4230 }, { "epoch": 100.74029850746268, "grad_norm": 33.369842529296875, "learning_rate": 9.03896103896104e-06, "loss": 42.1309, "step": 4231 }, { "epoch": 100.7641791044776, "grad_norm": 26.55228042602539, "learning_rate": 9.036796536796538e-06, "loss": 42.6242, "step": 4232 }, { "epoch": 100.78805970149254, "grad_norm": 30.329452514648438, "learning_rate": 9.034632034632035e-06, "loss": 41.174, "step": 4233 }, { "epoch": 100.81194029850747, "grad_norm": 32.0432014465332, "learning_rate": 9.032467532467533e-06, "loss": 43.1256, "step": 4234 }, { "epoch": 100.83582089552239, "grad_norm": 29.122236251831055, "learning_rate": 9.030303030303031e-06, "loss": 41.3778, "step": 4235 }, { "epoch": 100.85970149253731, "grad_norm": 24.6899471282959, "learning_rate": 9.028138528138529e-06, "loss": 42.2167, "step": 4236 }, { "epoch": 100.88358208955223, "grad_norm": 31.051576614379883, "learning_rate": 9.025974025974027e-06, "loss": 42.5137, "step": 4237 }, { "epoch": 100.90746268656716, "grad_norm": 27.56793785095215, "learning_rate": 9.023809523809524e-06, "loss": 42.2763, "step": 4238 }, { "epoch": 100.9313432835821, "grad_norm": 35.045108795166016, "learning_rate": 9.021645021645022e-06, "loss": 43.3116, "step": 4239 }, { "epoch": 100.95522388059702, "grad_norm": 28.35376739501953, "learning_rate": 9.01948051948052e-06, "loss": 42.4737, "step": 4240 }, { "epoch": 100.97910447761194, "grad_norm": 29.537580490112305, "learning_rate": 9.017316017316018e-06, "loss": 42.2073, "step": 4241 }, { "epoch": 101.0, "grad_norm": 24.736759185791016, "learning_rate": 9.015151515151516e-06, "loss": 37.5375, "step": 4242 }, { "epoch": 101.02388059701492, "grad_norm": 27.93048667907715, "learning_rate": 9.012987012987013e-06, "loss": 42.1642, "step": 4243 }, { "epoch": 101.04776119402985, "grad_norm": 24.460664749145508, "learning_rate": 9.010822510822511e-06, "loss": 42.1769, "step": 4244 }, { "epoch": 101.07164179104478, "grad_norm": 22.52399253845215, "learning_rate": 9.008658008658009e-06, "loss": 41.99, "step": 4245 }, { "epoch": 101.0955223880597, "grad_norm": 19.33254623413086, "learning_rate": 9.006493506493509e-06, "loss": 40.422, "step": 4246 }, { "epoch": 101.11940298507463, "grad_norm": 22.645910263061523, "learning_rate": 9.004329004329005e-06, "loss": 42.8041, "step": 4247 }, { "epoch": 101.14328358208955, "grad_norm": 20.89433479309082, "learning_rate": 9.002164502164502e-06, "loss": 43.0258, "step": 4248 }, { "epoch": 101.16716417910447, "grad_norm": 19.612567901611328, "learning_rate": 9e-06, "loss": 41.4478, "step": 4249 }, { "epoch": 101.1910447761194, "grad_norm": 19.565265655517578, "learning_rate": 8.997835497835498e-06, "loss": 42.6328, "step": 4250 }, { "epoch": 101.21492537313434, "grad_norm": 20.93030548095703, "learning_rate": 8.995670995670996e-06, "loss": 42.7268, "step": 4251 }, { "epoch": 101.23880597014926, "grad_norm": 18.67580795288086, "learning_rate": 8.993506493506494e-06, "loss": 43.3658, "step": 4252 }, { "epoch": 101.26268656716418, "grad_norm": 26.36067008972168, "learning_rate": 8.991341991341991e-06, "loss": 42.2089, "step": 4253 }, { "epoch": 101.2865671641791, "grad_norm": 19.841224670410156, "learning_rate": 8.98917748917749e-06, "loss": 40.842, "step": 4254 }, { "epoch": 101.31044776119403, "grad_norm": 26.14617156982422, "learning_rate": 8.987012987012987e-06, "loss": 43.1673, "step": 4255 }, { "epoch": 101.33432835820895, "grad_norm": 21.286962509155273, "learning_rate": 8.984848484848485e-06, "loss": 42.0463, "step": 4256 }, { "epoch": 101.35820895522389, "grad_norm": 26.335676193237305, "learning_rate": 8.982683982683983e-06, "loss": 41.8856, "step": 4257 }, { "epoch": 101.38208955223881, "grad_norm": 23.881567001342773, "learning_rate": 8.980519480519482e-06, "loss": 41.6253, "step": 4258 }, { "epoch": 101.40597014925373, "grad_norm": 21.65298843383789, "learning_rate": 8.97835497835498e-06, "loss": 42.0994, "step": 4259 }, { "epoch": 101.42985074626866, "grad_norm": 27.039722442626953, "learning_rate": 8.976190476190478e-06, "loss": 41.8836, "step": 4260 }, { "epoch": 101.45373134328358, "grad_norm": 20.1751766204834, "learning_rate": 8.974025974025975e-06, "loss": 41.1007, "step": 4261 }, { "epoch": 101.4776119402985, "grad_norm": 31.58852767944336, "learning_rate": 8.971861471861473e-06, "loss": 41.9793, "step": 4262 }, { "epoch": 101.50149253731344, "grad_norm": 21.907556533813477, "learning_rate": 8.969696969696971e-06, "loss": 41.509, "step": 4263 }, { "epoch": 101.52537313432836, "grad_norm": 32.310272216796875, "learning_rate": 8.967532467532469e-06, "loss": 41.1805, "step": 4264 }, { "epoch": 101.54925373134328, "grad_norm": 25.363170623779297, "learning_rate": 8.965367965367967e-06, "loss": 42.3668, "step": 4265 }, { "epoch": 101.57313432835821, "grad_norm": 29.320520401000977, "learning_rate": 8.963203463203464e-06, "loss": 41.7248, "step": 4266 }, { "epoch": 101.59701492537313, "grad_norm": 24.637983322143555, "learning_rate": 8.96103896103896e-06, "loss": 40.1595, "step": 4267 }, { "epoch": 101.62089552238805, "grad_norm": 32.69458770751953, "learning_rate": 8.958874458874458e-06, "loss": 41.6096, "step": 4268 }, { "epoch": 101.64477611940299, "grad_norm": 24.87364959716797, "learning_rate": 8.956709956709956e-06, "loss": 41.3295, "step": 4269 }, { "epoch": 101.66865671641791, "grad_norm": 31.5223445892334, "learning_rate": 8.954545454545456e-06, "loss": 42.1731, "step": 4270 }, { "epoch": 101.69253731343284, "grad_norm": 29.047664642333984, "learning_rate": 8.952380952380953e-06, "loss": 41.8301, "step": 4271 }, { "epoch": 101.71641791044776, "grad_norm": 31.420434951782227, "learning_rate": 8.950216450216451e-06, "loss": 41.6502, "step": 4272 }, { "epoch": 101.74029850746268, "grad_norm": 28.40896224975586, "learning_rate": 8.948051948051949e-06, "loss": 41.7585, "step": 4273 }, { "epoch": 101.7641791044776, "grad_norm": 32.256263732910156, "learning_rate": 8.945887445887447e-06, "loss": 41.8508, "step": 4274 }, { "epoch": 101.78805970149254, "grad_norm": 30.496904373168945, "learning_rate": 8.943722943722945e-06, "loss": 41.7192, "step": 4275 }, { "epoch": 101.81194029850747, "grad_norm": 31.20074462890625, "learning_rate": 8.941558441558442e-06, "loss": 42.687, "step": 4276 }, { "epoch": 101.83582089552239, "grad_norm": 27.639835357666016, "learning_rate": 8.93939393939394e-06, "loss": 41.7068, "step": 4277 }, { "epoch": 101.85970149253731, "grad_norm": 31.692638397216797, "learning_rate": 8.937229437229438e-06, "loss": 42.8243, "step": 4278 }, { "epoch": 101.88358208955223, "grad_norm": 28.27922248840332, "learning_rate": 8.935064935064936e-06, "loss": 41.8772, "step": 4279 }, { "epoch": 101.90746268656716, "grad_norm": 28.70676040649414, "learning_rate": 8.932900432900434e-06, "loss": 41.682, "step": 4280 }, { "epoch": 101.9313432835821, "grad_norm": 27.140151977539062, "learning_rate": 8.930735930735931e-06, "loss": 42.078, "step": 4281 }, { "epoch": 101.95522388059702, "grad_norm": 25.135448455810547, "learning_rate": 8.92857142857143e-06, "loss": 42.2035, "step": 4282 }, { "epoch": 101.97910447761194, "grad_norm": 22.988903045654297, "learning_rate": 8.926406926406927e-06, "loss": 41.2573, "step": 4283 }, { "epoch": 102.0, "grad_norm": 25.694786071777344, "learning_rate": 8.924242424242425e-06, "loss": 36.1049, "step": 4284 }, { "epoch": 102.02388059701492, "grad_norm": 24.528118133544922, "learning_rate": 8.922077922077923e-06, "loss": 42.162, "step": 4285 }, { "epoch": 102.04776119402985, "grad_norm": 27.563627243041992, "learning_rate": 8.91991341991342e-06, "loss": 41.7018, "step": 4286 }, { "epoch": 102.07164179104478, "grad_norm": 23.374286651611328, "learning_rate": 8.917748917748918e-06, "loss": 42.4075, "step": 4287 }, { "epoch": 102.0955223880597, "grad_norm": 28.673614501953125, "learning_rate": 8.915584415584416e-06, "loss": 41.8272, "step": 4288 }, { "epoch": 102.11940298507463, "grad_norm": 24.432859420776367, "learning_rate": 8.913419913419914e-06, "loss": 41.7054, "step": 4289 }, { "epoch": 102.14328358208955, "grad_norm": 26.83321189880371, "learning_rate": 8.911255411255412e-06, "loss": 42.2169, "step": 4290 }, { "epoch": 102.16716417910447, "grad_norm": 21.222537994384766, "learning_rate": 8.90909090909091e-06, "loss": 42.164, "step": 4291 }, { "epoch": 102.1910447761194, "grad_norm": 32.05888748168945, "learning_rate": 8.906926406926407e-06, "loss": 42.0759, "step": 4292 }, { "epoch": 102.21492537313434, "grad_norm": 22.959369659423828, "learning_rate": 8.904761904761905e-06, "loss": 43.0785, "step": 4293 }, { "epoch": 102.23880597014926, "grad_norm": 37.53632736206055, "learning_rate": 8.902597402597405e-06, "loss": 42.1665, "step": 4294 }, { "epoch": 102.26268656716418, "grad_norm": 29.86913299560547, "learning_rate": 8.900432900432902e-06, "loss": 41.3932, "step": 4295 }, { "epoch": 102.2865671641791, "grad_norm": 31.11789894104004, "learning_rate": 8.8982683982684e-06, "loss": 43.0771, "step": 4296 }, { "epoch": 102.31044776119403, "grad_norm": 27.745323181152344, "learning_rate": 8.896103896103896e-06, "loss": 41.1395, "step": 4297 }, { "epoch": 102.33432835820895, "grad_norm": 25.368127822875977, "learning_rate": 8.893939393939394e-06, "loss": 42.7978, "step": 4298 }, { "epoch": 102.35820895522389, "grad_norm": 24.081409454345703, "learning_rate": 8.891774891774892e-06, "loss": 41.4698, "step": 4299 }, { "epoch": 102.38208955223881, "grad_norm": 24.39154815673828, "learning_rate": 8.88961038961039e-06, "loss": 41.6765, "step": 4300 }, { "epoch": 102.40597014925373, "grad_norm": 21.794816970825195, "learning_rate": 8.887445887445887e-06, "loss": 40.9793, "step": 4301 }, { "epoch": 102.42985074626866, "grad_norm": 24.50321388244629, "learning_rate": 8.885281385281385e-06, "loss": 41.3914, "step": 4302 }, { "epoch": 102.45373134328358, "grad_norm": 21.492965698242188, "learning_rate": 8.883116883116883e-06, "loss": 42.1772, "step": 4303 }, { "epoch": 102.4776119402985, "grad_norm": 25.231094360351562, "learning_rate": 8.88095238095238e-06, "loss": 41.6758, "step": 4304 }, { "epoch": 102.50149253731344, "grad_norm": 21.51530647277832, "learning_rate": 8.87878787878788e-06, "loss": 41.0819, "step": 4305 }, { "epoch": 102.52537313432836, "grad_norm": 21.023269653320312, "learning_rate": 8.876623376623378e-06, "loss": 41.446, "step": 4306 }, { "epoch": 102.54925373134328, "grad_norm": 25.81951904296875, "learning_rate": 8.874458874458876e-06, "loss": 41.3221, "step": 4307 }, { "epoch": 102.57313432835821, "grad_norm": 19.7045841217041, "learning_rate": 8.872294372294374e-06, "loss": 42.5273, "step": 4308 }, { "epoch": 102.59701492537313, "grad_norm": 30.536680221557617, "learning_rate": 8.870129870129871e-06, "loss": 40.7574, "step": 4309 }, { "epoch": 102.62089552238805, "grad_norm": 22.61910629272461, "learning_rate": 8.86796536796537e-06, "loss": 42.1551, "step": 4310 }, { "epoch": 102.64477611940299, "grad_norm": 31.215150833129883, "learning_rate": 8.865800865800867e-06, "loss": 42.3013, "step": 4311 }, { "epoch": 102.66865671641791, "grad_norm": 29.22039794921875, "learning_rate": 8.863636363636365e-06, "loss": 42.3447, "step": 4312 }, { "epoch": 102.69253731343284, "grad_norm": 31.03571128845215, "learning_rate": 8.861471861471863e-06, "loss": 41.9643, "step": 4313 }, { "epoch": 102.71641791044776, "grad_norm": 26.90915298461914, "learning_rate": 8.85930735930736e-06, "loss": 42.8879, "step": 4314 }, { "epoch": 102.74029850746268, "grad_norm": 31.34430503845215, "learning_rate": 8.857142857142858e-06, "loss": 41.6856, "step": 4315 }, { "epoch": 102.7641791044776, "grad_norm": 26.868675231933594, "learning_rate": 8.854978354978356e-06, "loss": 41.1538, "step": 4316 }, { "epoch": 102.78805970149254, "grad_norm": 26.82084846496582, "learning_rate": 8.852813852813854e-06, "loss": 42.6873, "step": 4317 }, { "epoch": 102.81194029850747, "grad_norm": 24.742094039916992, "learning_rate": 8.850649350649352e-06, "loss": 43.168, "step": 4318 }, { "epoch": 102.83582089552239, "grad_norm": 23.871686935424805, "learning_rate": 8.84848484848485e-06, "loss": 42.0424, "step": 4319 }, { "epoch": 102.85970149253731, "grad_norm": 21.681507110595703, "learning_rate": 8.846320346320347e-06, "loss": 42.1547, "step": 4320 }, { "epoch": 102.88358208955223, "grad_norm": 28.63477325439453, "learning_rate": 8.844155844155845e-06, "loss": 41.2654, "step": 4321 }, { "epoch": 102.90746268656716, "grad_norm": 19.495147705078125, "learning_rate": 8.841991341991343e-06, "loss": 41.5641, "step": 4322 }, { "epoch": 102.9313432835821, "grad_norm": 33.34874725341797, "learning_rate": 8.83982683982684e-06, "loss": 41.7787, "step": 4323 }, { "epoch": 102.95522388059702, "grad_norm": 27.586767196655273, "learning_rate": 8.837662337662338e-06, "loss": 40.4204, "step": 4324 }, { "epoch": 102.97910447761194, "grad_norm": 28.708871841430664, "learning_rate": 8.835497835497836e-06, "loss": 41.7225, "step": 4325 }, { "epoch": 103.0, "grad_norm": 22.439306259155273, "learning_rate": 8.833333333333334e-06, "loss": 35.3291, "step": 4326 }, { "epoch": 103.02388059701492, "grad_norm": 25.760793685913086, "learning_rate": 8.831168831168832e-06, "loss": 42.0465, "step": 4327 }, { "epoch": 103.04776119402985, "grad_norm": 22.56456756591797, "learning_rate": 8.82900432900433e-06, "loss": 41.6094, "step": 4328 }, { "epoch": 103.07164179104478, "grad_norm": 30.912078857421875, "learning_rate": 8.826839826839827e-06, "loss": 43.0196, "step": 4329 }, { "epoch": 103.0955223880597, "grad_norm": 23.01909065246582, "learning_rate": 8.824675324675325e-06, "loss": 42.7305, "step": 4330 }, { "epoch": 103.11940298507463, "grad_norm": 29.197927474975586, "learning_rate": 8.822510822510823e-06, "loss": 41.0641, "step": 4331 }, { "epoch": 103.14328358208955, "grad_norm": 27.894495010375977, "learning_rate": 8.82034632034632e-06, "loss": 40.9656, "step": 4332 }, { "epoch": 103.16716417910447, "grad_norm": 27.135541915893555, "learning_rate": 8.818181818181819e-06, "loss": 41.7715, "step": 4333 }, { "epoch": 103.1910447761194, "grad_norm": 24.774351119995117, "learning_rate": 8.816017316017316e-06, "loss": 40.5809, "step": 4334 }, { "epoch": 103.21492537313434, "grad_norm": 27.74059295654297, "learning_rate": 8.813852813852814e-06, "loss": 40.9501, "step": 4335 }, { "epoch": 103.23880597014926, "grad_norm": 24.502626419067383, "learning_rate": 8.811688311688312e-06, "loss": 41.3341, "step": 4336 }, { "epoch": 103.26268656716418, "grad_norm": 29.406909942626953, "learning_rate": 8.80952380952381e-06, "loss": 42.8791, "step": 4337 }, { "epoch": 103.2865671641791, "grad_norm": 24.162965774536133, "learning_rate": 8.807359307359308e-06, "loss": 41.3314, "step": 4338 }, { "epoch": 103.31044776119403, "grad_norm": 27.782527923583984, "learning_rate": 8.805194805194805e-06, "loss": 41.0151, "step": 4339 }, { "epoch": 103.33432835820895, "grad_norm": 25.89789390563965, "learning_rate": 8.803030303030303e-06, "loss": 41.0182, "step": 4340 }, { "epoch": 103.35820895522389, "grad_norm": 31.413692474365234, "learning_rate": 8.800865800865803e-06, "loss": 41.1635, "step": 4341 }, { "epoch": 103.38208955223881, "grad_norm": 23.838945388793945, "learning_rate": 8.7987012987013e-06, "loss": 42.2695, "step": 4342 }, { "epoch": 103.40597014925373, "grad_norm": 27.55811309814453, "learning_rate": 8.796536796536798e-06, "loss": 42.6491, "step": 4343 }, { "epoch": 103.42985074626866, "grad_norm": 24.99410629272461, "learning_rate": 8.794372294372296e-06, "loss": 40.49, "step": 4344 }, { "epoch": 103.45373134328358, "grad_norm": 32.69471740722656, "learning_rate": 8.792207792207794e-06, "loss": 41.0334, "step": 4345 }, { "epoch": 103.4776119402985, "grad_norm": 25.661212921142578, "learning_rate": 8.79004329004329e-06, "loss": 42.0111, "step": 4346 }, { "epoch": 103.50149253731344, "grad_norm": 32.33528518676758, "learning_rate": 8.787878787878788e-06, "loss": 41.451, "step": 4347 }, { "epoch": 103.52537313432836, "grad_norm": 30.863183975219727, "learning_rate": 8.785714285714286e-06, "loss": 43.0101, "step": 4348 }, { "epoch": 103.54925373134328, "grad_norm": 27.80331802368164, "learning_rate": 8.783549783549783e-06, "loss": 41.8201, "step": 4349 }, { "epoch": 103.57313432835821, "grad_norm": 25.65656089782715, "learning_rate": 8.781385281385281e-06, "loss": 42.3188, "step": 4350 }, { "epoch": 103.59701492537313, "grad_norm": 27.477493286132812, "learning_rate": 8.779220779220779e-06, "loss": 42.4443, "step": 4351 }, { "epoch": 103.62089552238805, "grad_norm": 19.195556640625, "learning_rate": 8.777056277056277e-06, "loss": 41.6902, "step": 4352 }, { "epoch": 103.64477611940299, "grad_norm": 31.54138946533203, "learning_rate": 8.774891774891776e-06, "loss": 41.7891, "step": 4353 }, { "epoch": 103.66865671641791, "grad_norm": 24.392765045166016, "learning_rate": 8.772727272727274e-06, "loss": 43.201, "step": 4354 }, { "epoch": 103.69253731343284, "grad_norm": 31.868196487426758, "learning_rate": 8.770562770562772e-06, "loss": 42.0864, "step": 4355 }, { "epoch": 103.71641791044776, "grad_norm": 28.33005142211914, "learning_rate": 8.76839826839827e-06, "loss": 40.8061, "step": 4356 }, { "epoch": 103.74029850746268, "grad_norm": 29.663543701171875, "learning_rate": 8.766233766233767e-06, "loss": 41.195, "step": 4357 }, { "epoch": 103.7641791044776, "grad_norm": 24.99871826171875, "learning_rate": 8.764069264069265e-06, "loss": 42.0865, "step": 4358 }, { "epoch": 103.78805970149254, "grad_norm": 26.281768798828125, "learning_rate": 8.761904761904763e-06, "loss": 42.2214, "step": 4359 }, { "epoch": 103.81194029850747, "grad_norm": 25.848814010620117, "learning_rate": 8.75974025974026e-06, "loss": 41.752, "step": 4360 }, { "epoch": 103.83582089552239, "grad_norm": 25.99828338623047, "learning_rate": 8.757575757575759e-06, "loss": 41.3675, "step": 4361 }, { "epoch": 103.85970149253731, "grad_norm": 24.577255249023438, "learning_rate": 8.755411255411256e-06, "loss": 41.3633, "step": 4362 }, { "epoch": 103.88358208955223, "grad_norm": 28.189889907836914, "learning_rate": 8.753246753246754e-06, "loss": 41.1397, "step": 4363 }, { "epoch": 103.90746268656716, "grad_norm": 21.285263061523438, "learning_rate": 8.751082251082252e-06, "loss": 42.9034, "step": 4364 }, { "epoch": 103.9313432835821, "grad_norm": 26.459442138671875, "learning_rate": 8.74891774891775e-06, "loss": 42.1868, "step": 4365 }, { "epoch": 103.95522388059702, "grad_norm": 23.833219528198242, "learning_rate": 8.746753246753248e-06, "loss": 43.8222, "step": 4366 }, { "epoch": 103.97910447761194, "grad_norm": 28.269039154052734, "learning_rate": 8.744588744588745e-06, "loss": 41.7916, "step": 4367 }, { "epoch": 104.0, "grad_norm": 21.251577377319336, "learning_rate": 8.742424242424243e-06, "loss": 36.7322, "step": 4368 }, { "epoch": 104.02388059701492, "grad_norm": 24.385892868041992, "learning_rate": 8.740259740259741e-06, "loss": 41.1758, "step": 4369 }, { "epoch": 104.04776119402985, "grad_norm": 24.85951805114746, "learning_rate": 8.738095238095239e-06, "loss": 41.1797, "step": 4370 }, { "epoch": 104.07164179104478, "grad_norm": 22.94902229309082, "learning_rate": 8.735930735930737e-06, "loss": 42.0245, "step": 4371 }, { "epoch": 104.0955223880597, "grad_norm": 22.89316749572754, "learning_rate": 8.733766233766234e-06, "loss": 40.8802, "step": 4372 }, { "epoch": 104.11940298507463, "grad_norm": 17.931550979614258, "learning_rate": 8.731601731601732e-06, "loss": 41.7585, "step": 4373 }, { "epoch": 104.14328358208955, "grad_norm": 25.272066116333008, "learning_rate": 8.72943722943723e-06, "loss": 41.9595, "step": 4374 }, { "epoch": 104.16716417910447, "grad_norm": 18.83379364013672, "learning_rate": 8.727272727272728e-06, "loss": 42.0377, "step": 4375 }, { "epoch": 104.1910447761194, "grad_norm": 26.816553115844727, "learning_rate": 8.725108225108226e-06, "loss": 42.3945, "step": 4376 }, { "epoch": 104.21492537313434, "grad_norm": 21.217594146728516, "learning_rate": 8.722943722943723e-06, "loss": 41.0879, "step": 4377 }, { "epoch": 104.23880597014926, "grad_norm": 26.040369033813477, "learning_rate": 8.720779220779221e-06, "loss": 41.9009, "step": 4378 }, { "epoch": 104.26268656716418, "grad_norm": 21.120927810668945, "learning_rate": 8.718614718614719e-06, "loss": 41.5876, "step": 4379 }, { "epoch": 104.2865671641791, "grad_norm": 24.789485931396484, "learning_rate": 8.716450216450217e-06, "loss": 42.4683, "step": 4380 }, { "epoch": 104.31044776119403, "grad_norm": 20.2288761138916, "learning_rate": 8.714285714285715e-06, "loss": 41.7415, "step": 4381 }, { "epoch": 104.33432835820895, "grad_norm": 23.13172721862793, "learning_rate": 8.712121212121212e-06, "loss": 41.8696, "step": 4382 }, { "epoch": 104.35820895522389, "grad_norm": 21.838037490844727, "learning_rate": 8.70995670995671e-06, "loss": 43.1081, "step": 4383 }, { "epoch": 104.38208955223881, "grad_norm": 18.31660270690918, "learning_rate": 8.707792207792208e-06, "loss": 41.6609, "step": 4384 }, { "epoch": 104.40597014925373, "grad_norm": 20.596466064453125, "learning_rate": 8.705627705627706e-06, "loss": 41.9226, "step": 4385 }, { "epoch": 104.42985074626866, "grad_norm": 19.209354400634766, "learning_rate": 8.703463203463204e-06, "loss": 41.8937, "step": 4386 }, { "epoch": 104.45373134328358, "grad_norm": 21.35397720336914, "learning_rate": 8.701298701298701e-06, "loss": 41.222, "step": 4387 }, { "epoch": 104.4776119402985, "grad_norm": 16.040178298950195, "learning_rate": 8.6991341991342e-06, "loss": 40.8326, "step": 4388 }, { "epoch": 104.50149253731344, "grad_norm": 26.846803665161133, "learning_rate": 8.696969696969699e-06, "loss": 42.0748, "step": 4389 }, { "epoch": 104.52537313432836, "grad_norm": 19.368515014648438, "learning_rate": 8.694805194805196e-06, "loss": 41.4322, "step": 4390 }, { "epoch": 104.54925373134328, "grad_norm": 30.950580596923828, "learning_rate": 8.692640692640694e-06, "loss": 41.2695, "step": 4391 }, { "epoch": 104.57313432835821, "grad_norm": 23.07410430908203, "learning_rate": 8.690476190476192e-06, "loss": 41.8303, "step": 4392 }, { "epoch": 104.59701492537313, "grad_norm": 27.158117294311523, "learning_rate": 8.68831168831169e-06, "loss": 42.4952, "step": 4393 }, { "epoch": 104.62089552238805, "grad_norm": 25.001056671142578, "learning_rate": 8.686147186147188e-06, "loss": 41.4797, "step": 4394 }, { "epoch": 104.64477611940299, "grad_norm": 27.168846130371094, "learning_rate": 8.683982683982685e-06, "loss": 41.8096, "step": 4395 }, { "epoch": 104.66865671641791, "grad_norm": 21.596757888793945, "learning_rate": 8.681818181818182e-06, "loss": 41.9243, "step": 4396 }, { "epoch": 104.69253731343284, "grad_norm": 27.944332122802734, "learning_rate": 8.67965367965368e-06, "loss": 42.5102, "step": 4397 }, { "epoch": 104.71641791044776, "grad_norm": 24.295595169067383, "learning_rate": 8.677489177489177e-06, "loss": 42.5514, "step": 4398 }, { "epoch": 104.74029850746268, "grad_norm": 27.505474090576172, "learning_rate": 8.675324675324675e-06, "loss": 42.017, "step": 4399 }, { "epoch": 104.7641791044776, "grad_norm": 24.030363082885742, "learning_rate": 8.673160173160173e-06, "loss": 42.5318, "step": 4400 }, { "epoch": 104.78805970149254, "grad_norm": 26.74481964111328, "learning_rate": 8.670995670995672e-06, "loss": 42.4153, "step": 4401 }, { "epoch": 104.81194029850747, "grad_norm": 25.275205612182617, "learning_rate": 8.66883116883117e-06, "loss": 40.9114, "step": 4402 }, { "epoch": 104.83582089552239, "grad_norm": 19.21797752380371, "learning_rate": 8.666666666666668e-06, "loss": 41.4621, "step": 4403 }, { "epoch": 104.85970149253731, "grad_norm": 21.647167205810547, "learning_rate": 8.664502164502166e-06, "loss": 42.0579, "step": 4404 }, { "epoch": 104.88358208955223, "grad_norm": 18.133159637451172, "learning_rate": 8.662337662337663e-06, "loss": 41.3995, "step": 4405 }, { "epoch": 104.90746268656716, "grad_norm": 17.7130069732666, "learning_rate": 8.660173160173161e-06, "loss": 42.2021, "step": 4406 }, { "epoch": 104.9313432835821, "grad_norm": 17.646291732788086, "learning_rate": 8.658008658008659e-06, "loss": 41.4231, "step": 4407 }, { "epoch": 104.95522388059702, "grad_norm": 20.67991065979004, "learning_rate": 8.655844155844157e-06, "loss": 40.8638, "step": 4408 }, { "epoch": 104.97910447761194, "grad_norm": 19.140832901000977, "learning_rate": 8.653679653679655e-06, "loss": 42.5387, "step": 4409 }, { "epoch": 105.0, "grad_norm": 13.847710609436035, "learning_rate": 8.651515151515152e-06, "loss": 35.4038, "step": 4410 }, { "epoch": 105.02388059701492, "grad_norm": 16.923620223999023, "learning_rate": 8.64935064935065e-06, "loss": 43.0403, "step": 4411 }, { "epoch": 105.04776119402985, "grad_norm": 17.983060836791992, "learning_rate": 8.647186147186148e-06, "loss": 42.2899, "step": 4412 }, { "epoch": 105.07164179104478, "grad_norm": 16.440452575683594, "learning_rate": 8.645021645021646e-06, "loss": 41.3221, "step": 4413 }, { "epoch": 105.0955223880597, "grad_norm": 20.931194305419922, "learning_rate": 8.642857142857144e-06, "loss": 41.6118, "step": 4414 }, { "epoch": 105.11940298507463, "grad_norm": 16.63971710205078, "learning_rate": 8.640692640692641e-06, "loss": 41.56, "step": 4415 }, { "epoch": 105.14328358208955, "grad_norm": 19.395835876464844, "learning_rate": 8.63852813852814e-06, "loss": 40.3512, "step": 4416 }, { "epoch": 105.16716417910447, "grad_norm": 23.419681549072266, "learning_rate": 8.636363636363637e-06, "loss": 41.588, "step": 4417 }, { "epoch": 105.1910447761194, "grad_norm": 18.330759048461914, "learning_rate": 8.634199134199135e-06, "loss": 41.5903, "step": 4418 }, { "epoch": 105.21492537313434, "grad_norm": 32.92748260498047, "learning_rate": 8.632034632034633e-06, "loss": 41.7201, "step": 4419 }, { "epoch": 105.23880597014926, "grad_norm": 23.43516731262207, "learning_rate": 8.62987012987013e-06, "loss": 42.0367, "step": 4420 }, { "epoch": 105.26268656716418, "grad_norm": 31.077037811279297, "learning_rate": 8.627705627705628e-06, "loss": 41.8229, "step": 4421 }, { "epoch": 105.2865671641791, "grad_norm": 24.310850143432617, "learning_rate": 8.625541125541126e-06, "loss": 41.869, "step": 4422 }, { "epoch": 105.31044776119403, "grad_norm": 29.064128875732422, "learning_rate": 8.623376623376624e-06, "loss": 41.3312, "step": 4423 }, { "epoch": 105.33432835820895, "grad_norm": 27.2437686920166, "learning_rate": 8.621212121212122e-06, "loss": 41.4347, "step": 4424 }, { "epoch": 105.35820895522389, "grad_norm": 26.48787498474121, "learning_rate": 8.61904761904762e-06, "loss": 41.9868, "step": 4425 }, { "epoch": 105.38208955223881, "grad_norm": 23.06917953491211, "learning_rate": 8.616883116883117e-06, "loss": 40.7182, "step": 4426 }, { "epoch": 105.40597014925373, "grad_norm": 25.888072967529297, "learning_rate": 8.614718614718615e-06, "loss": 43.4227, "step": 4427 }, { "epoch": 105.42985074626866, "grad_norm": 21.196561813354492, "learning_rate": 8.612554112554113e-06, "loss": 42.1541, "step": 4428 }, { "epoch": 105.45373134328358, "grad_norm": 23.897281646728516, "learning_rate": 8.61038961038961e-06, "loss": 42.3009, "step": 4429 }, { "epoch": 105.4776119402985, "grad_norm": 21.39472770690918, "learning_rate": 8.608225108225108e-06, "loss": 41.9873, "step": 4430 }, { "epoch": 105.50149253731344, "grad_norm": 22.932235717773438, "learning_rate": 8.606060606060606e-06, "loss": 42.9503, "step": 4431 }, { "epoch": 105.52537313432836, "grad_norm": 19.643224716186523, "learning_rate": 8.603896103896104e-06, "loss": 41.1197, "step": 4432 }, { "epoch": 105.54925373134328, "grad_norm": 22.474496841430664, "learning_rate": 8.601731601731602e-06, "loss": 41.472, "step": 4433 }, { "epoch": 105.57313432835821, "grad_norm": 18.618505477905273, "learning_rate": 8.5995670995671e-06, "loss": 42.0385, "step": 4434 }, { "epoch": 105.59701492537313, "grad_norm": 22.780241012573242, "learning_rate": 8.597402597402597e-06, "loss": 42.2941, "step": 4435 }, { "epoch": 105.62089552238805, "grad_norm": 18.00736427307129, "learning_rate": 8.595238095238097e-06, "loss": 41.086, "step": 4436 }, { "epoch": 105.64477611940299, "grad_norm": 26.372411727905273, "learning_rate": 8.593073593073595e-06, "loss": 43.5092, "step": 4437 }, { "epoch": 105.66865671641791, "grad_norm": 20.315715789794922, "learning_rate": 8.590909090909092e-06, "loss": 42.6546, "step": 4438 }, { "epoch": 105.69253731343284, "grad_norm": 25.5256404876709, "learning_rate": 8.58874458874459e-06, "loss": 41.5332, "step": 4439 }, { "epoch": 105.71641791044776, "grad_norm": 23.848834991455078, "learning_rate": 8.586580086580088e-06, "loss": 41.1996, "step": 4440 }, { "epoch": 105.74029850746268, "grad_norm": 22.64993667602539, "learning_rate": 8.584415584415586e-06, "loss": 41.0973, "step": 4441 }, { "epoch": 105.7641791044776, "grad_norm": 25.26251792907715, "learning_rate": 8.582251082251084e-06, "loss": 40.5223, "step": 4442 }, { "epoch": 105.78805970149254, "grad_norm": 18.45581817626953, "learning_rate": 8.580086580086581e-06, "loss": 40.3545, "step": 4443 }, { "epoch": 105.81194029850747, "grad_norm": 20.561473846435547, "learning_rate": 8.57792207792208e-06, "loss": 41.3425, "step": 4444 }, { "epoch": 105.83582089552239, "grad_norm": 19.369930267333984, "learning_rate": 8.575757575757575e-06, "loss": 41.9595, "step": 4445 }, { "epoch": 105.85970149253731, "grad_norm": 16.14900779724121, "learning_rate": 8.573593073593073e-06, "loss": 41.1797, "step": 4446 }, { "epoch": 105.88358208955223, "grad_norm": 21.74477195739746, "learning_rate": 8.571428571428571e-06, "loss": 40.7879, "step": 4447 }, { "epoch": 105.90746268656716, "grad_norm": 16.549848556518555, "learning_rate": 8.56926406926407e-06, "loss": 41.5383, "step": 4448 }, { "epoch": 105.9313432835821, "grad_norm": 18.291797637939453, "learning_rate": 8.567099567099568e-06, "loss": 42.818, "step": 4449 }, { "epoch": 105.95522388059702, "grad_norm": 21.389198303222656, "learning_rate": 8.564935064935066e-06, "loss": 41.8181, "step": 4450 }, { "epoch": 105.97910447761194, "grad_norm": 18.4671630859375, "learning_rate": 8.562770562770564e-06, "loss": 40.5471, "step": 4451 }, { "epoch": 106.0, "grad_norm": 12.387261390686035, "learning_rate": 8.560606060606062e-06, "loss": 37.1723, "step": 4452 }, { "epoch": 106.02388059701492, "grad_norm": 25.35000991821289, "learning_rate": 8.55844155844156e-06, "loss": 41.526, "step": 4453 }, { "epoch": 106.04776119402985, "grad_norm": 18.196853637695312, "learning_rate": 8.556277056277057e-06, "loss": 42.4372, "step": 4454 }, { "epoch": 106.07164179104478, "grad_norm": 17.911649703979492, "learning_rate": 8.554112554112555e-06, "loss": 41.6807, "step": 4455 }, { "epoch": 106.0955223880597, "grad_norm": 18.82575798034668, "learning_rate": 8.551948051948053e-06, "loss": 41.5713, "step": 4456 }, { "epoch": 106.11940298507463, "grad_norm": 17.8409423828125, "learning_rate": 8.54978354978355e-06, "loss": 41.243, "step": 4457 }, { "epoch": 106.14328358208955, "grad_norm": 14.669032096862793, "learning_rate": 8.547619047619048e-06, "loss": 41.3578, "step": 4458 }, { "epoch": 106.16716417910447, "grad_norm": 18.624805450439453, "learning_rate": 8.545454545454546e-06, "loss": 42.5552, "step": 4459 }, { "epoch": 106.1910447761194, "grad_norm": 15.485766410827637, "learning_rate": 8.543290043290044e-06, "loss": 42.2293, "step": 4460 }, { "epoch": 106.21492537313434, "grad_norm": 19.794565200805664, "learning_rate": 8.541125541125542e-06, "loss": 41.1415, "step": 4461 }, { "epoch": 106.23880597014926, "grad_norm": 18.35716438293457, "learning_rate": 8.53896103896104e-06, "loss": 41.6452, "step": 4462 }, { "epoch": 106.26268656716418, "grad_norm": 20.6253719329834, "learning_rate": 8.536796536796537e-06, "loss": 41.2028, "step": 4463 }, { "epoch": 106.2865671641791, "grad_norm": 17.438785552978516, "learning_rate": 8.534632034632035e-06, "loss": 42.4732, "step": 4464 }, { "epoch": 106.31044776119403, "grad_norm": 22.83930778503418, "learning_rate": 8.532467532467533e-06, "loss": 40.1875, "step": 4465 }, { "epoch": 106.33432835820895, "grad_norm": 19.77629852294922, "learning_rate": 8.53030303030303e-06, "loss": 42.7191, "step": 4466 }, { "epoch": 106.35820895522389, "grad_norm": 24.823516845703125, "learning_rate": 8.528138528138529e-06, "loss": 41.8532, "step": 4467 }, { "epoch": 106.38208955223881, "grad_norm": 25.804109573364258, "learning_rate": 8.525974025974026e-06, "loss": 41.3039, "step": 4468 }, { "epoch": 106.40597014925373, "grad_norm": 18.37181854248047, "learning_rate": 8.523809523809524e-06, "loss": 42.5937, "step": 4469 }, { "epoch": 106.42985074626866, "grad_norm": 21.761140823364258, "learning_rate": 8.521645021645022e-06, "loss": 41.5739, "step": 4470 }, { "epoch": 106.45373134328358, "grad_norm": 18.34234619140625, "learning_rate": 8.51948051948052e-06, "loss": 42.0672, "step": 4471 }, { "epoch": 106.4776119402985, "grad_norm": 21.598434448242188, "learning_rate": 8.51731601731602e-06, "loss": 43.3303, "step": 4472 }, { "epoch": 106.50149253731344, "grad_norm": 19.549448013305664, "learning_rate": 8.515151515151517e-06, "loss": 41.4708, "step": 4473 }, { "epoch": 106.52537313432836, "grad_norm": 20.763225555419922, "learning_rate": 8.512987012987015e-06, "loss": 41.8263, "step": 4474 }, { "epoch": 106.54925373134328, "grad_norm": 19.644168853759766, "learning_rate": 8.510822510822511e-06, "loss": 42.1677, "step": 4475 }, { "epoch": 106.57313432835821, "grad_norm": 23.9834041595459, "learning_rate": 8.508658008658009e-06, "loss": 42.5743, "step": 4476 }, { "epoch": 106.59701492537313, "grad_norm": 19.93153953552246, "learning_rate": 8.506493506493507e-06, "loss": 41.2167, "step": 4477 }, { "epoch": 106.62089552238805, "grad_norm": 22.8863525390625, "learning_rate": 8.504329004329004e-06, "loss": 41.1281, "step": 4478 }, { "epoch": 106.64477611940299, "grad_norm": 20.42034149169922, "learning_rate": 8.502164502164502e-06, "loss": 41.0462, "step": 4479 }, { "epoch": 106.66865671641791, "grad_norm": 21.096284866333008, "learning_rate": 8.5e-06, "loss": 41.1694, "step": 4480 }, { "epoch": 106.69253731343284, "grad_norm": 21.80982780456543, "learning_rate": 8.497835497835498e-06, "loss": 41.3229, "step": 4481 }, { "epoch": 106.71641791044776, "grad_norm": 19.85307502746582, "learning_rate": 8.495670995670996e-06, "loss": 41.6649, "step": 4482 }, { "epoch": 106.74029850746268, "grad_norm": 15.509448051452637, "learning_rate": 8.493506493506493e-06, "loss": 41.1427, "step": 4483 }, { "epoch": 106.7641791044776, "grad_norm": 19.124879837036133, "learning_rate": 8.491341991341993e-06, "loss": 41.2628, "step": 4484 }, { "epoch": 106.78805970149254, "grad_norm": 16.123470306396484, "learning_rate": 8.48917748917749e-06, "loss": 41.9344, "step": 4485 }, { "epoch": 106.81194029850747, "grad_norm": 16.704002380371094, "learning_rate": 8.487012987012988e-06, "loss": 41.3242, "step": 4486 }, { "epoch": 106.83582089552239, "grad_norm": 17.679168701171875, "learning_rate": 8.484848484848486e-06, "loss": 42.4921, "step": 4487 }, { "epoch": 106.85970149253731, "grad_norm": 16.334306716918945, "learning_rate": 8.482683982683984e-06, "loss": 41.7613, "step": 4488 }, { "epoch": 106.88358208955223, "grad_norm": 21.279388427734375, "learning_rate": 8.480519480519482e-06, "loss": 40.7258, "step": 4489 }, { "epoch": 106.90746268656716, "grad_norm": 18.096824645996094, "learning_rate": 8.47835497835498e-06, "loss": 41.4365, "step": 4490 }, { "epoch": 106.9313432835821, "grad_norm": 14.148079872131348, "learning_rate": 8.476190476190477e-06, "loss": 41.9096, "step": 4491 }, { "epoch": 106.95522388059702, "grad_norm": 18.99448013305664, "learning_rate": 8.474025974025975e-06, "loss": 41.1249, "step": 4492 }, { "epoch": 106.97910447761194, "grad_norm": 19.877487182617188, "learning_rate": 8.471861471861473e-06, "loss": 41.6588, "step": 4493 }, { "epoch": 107.0, "grad_norm": 17.858646392822266, "learning_rate": 8.46969696969697e-06, "loss": 35.8561, "step": 4494 }, { "epoch": 107.02388059701492, "grad_norm": 15.608851432800293, "learning_rate": 8.467532467532467e-06, "loss": 41.4418, "step": 4495 }, { "epoch": 107.04776119402985, "grad_norm": 22.582759857177734, "learning_rate": 8.465367965367966e-06, "loss": 41.0498, "step": 4496 }, { "epoch": 107.07164179104478, "grad_norm": 21.779876708984375, "learning_rate": 8.463203463203464e-06, "loss": 41.6588, "step": 4497 }, { "epoch": 107.0955223880597, "grad_norm": 20.698528289794922, "learning_rate": 8.461038961038962e-06, "loss": 43.0142, "step": 4498 }, { "epoch": 107.11940298507463, "grad_norm": 16.091886520385742, "learning_rate": 8.45887445887446e-06, "loss": 41.3033, "step": 4499 }, { "epoch": 107.14328358208955, "grad_norm": 28.291919708251953, "learning_rate": 8.456709956709958e-06, "loss": 41.3949, "step": 4500 }, { "epoch": 107.16716417910447, "grad_norm": 19.51844596862793, "learning_rate": 8.454545454545455e-06, "loss": 42.2322, "step": 4501 }, { "epoch": 107.1910447761194, "grad_norm": 27.817554473876953, "learning_rate": 8.452380952380953e-06, "loss": 41.1951, "step": 4502 }, { "epoch": 107.21492537313434, "grad_norm": 20.286903381347656, "learning_rate": 8.450216450216451e-06, "loss": 42.1971, "step": 4503 }, { "epoch": 107.23880597014926, "grad_norm": 26.34720230102539, "learning_rate": 8.448051948051949e-06, "loss": 40.7248, "step": 4504 }, { "epoch": 107.26268656716418, "grad_norm": 21.989835739135742, "learning_rate": 8.445887445887447e-06, "loss": 42.4462, "step": 4505 }, { "epoch": 107.2865671641791, "grad_norm": 22.28291893005371, "learning_rate": 8.443722943722944e-06, "loss": 41.103, "step": 4506 }, { "epoch": 107.31044776119403, "grad_norm": 21.63711166381836, "learning_rate": 8.441558441558442e-06, "loss": 42.685, "step": 4507 }, { "epoch": 107.33432835820895, "grad_norm": 16.82655143737793, "learning_rate": 8.43939393939394e-06, "loss": 42.0045, "step": 4508 }, { "epoch": 107.35820895522389, "grad_norm": 24.85128402709961, "learning_rate": 8.437229437229438e-06, "loss": 41.6018, "step": 4509 }, { "epoch": 107.38208955223881, "grad_norm": 18.015731811523438, "learning_rate": 8.435064935064936e-06, "loss": 40.7281, "step": 4510 }, { "epoch": 107.40597014925373, "grad_norm": 26.402570724487305, "learning_rate": 8.432900432900433e-06, "loss": 42.5324, "step": 4511 }, { "epoch": 107.42985074626866, "grad_norm": 21.223861694335938, "learning_rate": 8.430735930735931e-06, "loss": 40.7112, "step": 4512 }, { "epoch": 107.45373134328358, "grad_norm": 19.461315155029297, "learning_rate": 8.428571428571429e-06, "loss": 40.8781, "step": 4513 }, { "epoch": 107.4776119402985, "grad_norm": 23.075971603393555, "learning_rate": 8.426406926406927e-06, "loss": 41.2487, "step": 4514 }, { "epoch": 107.50149253731344, "grad_norm": 22.154701232910156, "learning_rate": 8.424242424242425e-06, "loss": 41.175, "step": 4515 }, { "epoch": 107.52537313432836, "grad_norm": 29.775875091552734, "learning_rate": 8.422077922077922e-06, "loss": 42.0601, "step": 4516 }, { "epoch": 107.54925373134328, "grad_norm": 23.231462478637695, "learning_rate": 8.41991341991342e-06, "loss": 41.6765, "step": 4517 }, { "epoch": 107.57313432835821, "grad_norm": 28.446731567382812, "learning_rate": 8.417748917748918e-06, "loss": 43.3939, "step": 4518 }, { "epoch": 107.59701492537313, "grad_norm": 17.26323890686035, "learning_rate": 8.415584415584416e-06, "loss": 41.2887, "step": 4519 }, { "epoch": 107.62089552238805, "grad_norm": 20.49373435974121, "learning_rate": 8.413419913419915e-06, "loss": 41.6722, "step": 4520 }, { "epoch": 107.64477611940299, "grad_norm": 16.85104751586914, "learning_rate": 8.411255411255413e-06, "loss": 40.5418, "step": 4521 }, { "epoch": 107.66865671641791, "grad_norm": 16.146242141723633, "learning_rate": 8.40909090909091e-06, "loss": 40.9689, "step": 4522 }, { "epoch": 107.69253731343284, "grad_norm": 17.153108596801758, "learning_rate": 8.406926406926409e-06, "loss": 41.6464, "step": 4523 }, { "epoch": 107.71641791044776, "grad_norm": 20.580894470214844, "learning_rate": 8.404761904761905e-06, "loss": 42.4474, "step": 4524 }, { "epoch": 107.74029850746268, "grad_norm": 15.058161735534668, "learning_rate": 8.402597402597403e-06, "loss": 41.7216, "step": 4525 }, { "epoch": 107.7641791044776, "grad_norm": 20.474285125732422, "learning_rate": 8.4004329004329e-06, "loss": 41.7553, "step": 4526 }, { "epoch": 107.78805970149254, "grad_norm": 20.75484275817871, "learning_rate": 8.398268398268398e-06, "loss": 41.6698, "step": 4527 }, { "epoch": 107.81194029850747, "grad_norm": 14.600532531738281, "learning_rate": 8.396103896103896e-06, "loss": 41.6121, "step": 4528 }, { "epoch": 107.83582089552239, "grad_norm": 18.704586029052734, "learning_rate": 8.393939393939394e-06, "loss": 40.5424, "step": 4529 }, { "epoch": 107.85970149253731, "grad_norm": 15.7553129196167, "learning_rate": 8.391774891774892e-06, "loss": 41.0951, "step": 4530 }, { "epoch": 107.88358208955223, "grad_norm": 20.587574005126953, "learning_rate": 8.38961038961039e-06, "loss": 42.1185, "step": 4531 }, { "epoch": 107.90746268656716, "grad_norm": 16.545307159423828, "learning_rate": 8.387445887445889e-06, "loss": 40.7692, "step": 4532 }, { "epoch": 107.9313432835821, "grad_norm": 19.46141815185547, "learning_rate": 8.385281385281387e-06, "loss": 41.6013, "step": 4533 }, { "epoch": 107.95522388059702, "grad_norm": 16.87144660949707, "learning_rate": 8.383116883116884e-06, "loss": 41.6794, "step": 4534 }, { "epoch": 107.97910447761194, "grad_norm": 16.42438316345215, "learning_rate": 8.380952380952382e-06, "loss": 42.3932, "step": 4535 }, { "epoch": 108.0, "grad_norm": 24.72748374938965, "learning_rate": 8.37878787878788e-06, "loss": 36.3065, "step": 4536 }, { "epoch": 108.02388059701492, "grad_norm": 21.551437377929688, "learning_rate": 8.376623376623378e-06, "loss": 41.1883, "step": 4537 }, { "epoch": 108.04776119402985, "grad_norm": 31.447101593017578, "learning_rate": 8.374458874458876e-06, "loss": 41.2846, "step": 4538 }, { "epoch": 108.07164179104478, "grad_norm": 23.090343475341797, "learning_rate": 8.372294372294373e-06, "loss": 41.7488, "step": 4539 }, { "epoch": 108.0955223880597, "grad_norm": 29.949562072753906, "learning_rate": 8.370129870129871e-06, "loss": 42.462, "step": 4540 }, { "epoch": 108.11940298507463, "grad_norm": 21.09743309020996, "learning_rate": 8.367965367965369e-06, "loss": 41.4602, "step": 4541 }, { "epoch": 108.14328358208955, "grad_norm": 34.469139099121094, "learning_rate": 8.365800865800867e-06, "loss": 41.5088, "step": 4542 }, { "epoch": 108.16716417910447, "grad_norm": 25.73923110961914, "learning_rate": 8.363636363636365e-06, "loss": 42.9585, "step": 4543 }, { "epoch": 108.1910447761194, "grad_norm": 30.073488235473633, "learning_rate": 8.361471861471862e-06, "loss": 41.5492, "step": 4544 }, { "epoch": 108.21492537313434, "grad_norm": 26.56512451171875, "learning_rate": 8.35930735930736e-06, "loss": 41.2259, "step": 4545 }, { "epoch": 108.23880597014926, "grad_norm": 32.05238723754883, "learning_rate": 8.357142857142858e-06, "loss": 41.8249, "step": 4546 }, { "epoch": 108.26268656716418, "grad_norm": 25.487403869628906, "learning_rate": 8.354978354978356e-06, "loss": 41.2949, "step": 4547 }, { "epoch": 108.2865671641791, "grad_norm": 26.391586303710938, "learning_rate": 8.352813852813854e-06, "loss": 41.9133, "step": 4548 }, { "epoch": 108.31044776119403, "grad_norm": 23.400354385375977, "learning_rate": 8.350649350649351e-06, "loss": 41.2916, "step": 4549 }, { "epoch": 108.33432835820895, "grad_norm": 32.927467346191406, "learning_rate": 8.348484848484849e-06, "loss": 41.0151, "step": 4550 }, { "epoch": 108.35820895522389, "grad_norm": NaN, "learning_rate": 8.346320346320347e-06, "loss": 41.9067, "step": 4551 }, { "epoch": 108.38208955223881, "grad_norm": 27.199121475219727, "learning_rate": 8.346320346320347e-06, "loss": 41.8715, "step": 4552 }, { "epoch": 108.40597014925373, "grad_norm": 30.17380142211914, "learning_rate": 8.344155844155845e-06, "loss": 41.9553, "step": 4553 }, { "epoch": 108.42985074626866, "grad_norm": 28.72991943359375, "learning_rate": 8.341991341991343e-06, "loss": 41.9683, "step": 4554 }, { "epoch": 108.45373134328358, "grad_norm": 26.323143005371094, "learning_rate": 8.33982683982684e-06, "loss": 41.1456, "step": 4555 }, { "epoch": 108.4776119402985, "grad_norm": 23.400619506835938, "learning_rate": 8.337662337662338e-06, "loss": 41.7313, "step": 4556 }, { "epoch": 108.50149253731344, "grad_norm": 23.805021286010742, "learning_rate": 8.335497835497836e-06, "loss": 40.5904, "step": 4557 }, { "epoch": 108.52537313432836, "grad_norm": 21.17874526977539, "learning_rate": 8.333333333333334e-06, "loss": 41.5941, "step": 4558 }, { "epoch": 108.54925373134328, "grad_norm": 26.89427375793457, "learning_rate": 8.331168831168832e-06, "loss": 40.6515, "step": 4559 }, { "epoch": 108.57313432835821, "grad_norm": 22.102890014648438, "learning_rate": 8.32900432900433e-06, "loss": 41.6195, "step": 4560 }, { "epoch": 108.59701492537313, "grad_norm": 28.349239349365234, "learning_rate": 8.326839826839827e-06, "loss": 41.7613, "step": 4561 }, { "epoch": 108.62089552238805, "grad_norm": 24.95227813720703, "learning_rate": 8.324675324675325e-06, "loss": 42.2593, "step": 4562 }, { "epoch": 108.64477611940299, "grad_norm": 29.643531799316406, "learning_rate": 8.322510822510823e-06, "loss": 42.5247, "step": 4563 }, { "epoch": 108.66865671641791, "grad_norm": 24.321622848510742, "learning_rate": 8.32034632034632e-06, "loss": 41.5149, "step": 4564 }, { "epoch": 108.69253731343284, "grad_norm": 27.7292537689209, "learning_rate": 8.318181818181818e-06, "loss": 41.6252, "step": 4565 }, { "epoch": 108.71641791044776, "grad_norm": 23.14917755126953, "learning_rate": 8.316017316017316e-06, "loss": 41.8188, "step": 4566 }, { "epoch": 108.74029850746268, "grad_norm": 31.897857666015625, "learning_rate": 8.313852813852814e-06, "loss": 41.2639, "step": 4567 }, { "epoch": 108.7641791044776, "grad_norm": 22.20448112487793, "learning_rate": 8.311688311688313e-06, "loss": 40.8366, "step": 4568 }, { "epoch": 108.78805970149254, "grad_norm": 33.260982513427734, "learning_rate": 8.309523809523811e-06, "loss": 42.1547, "step": 4569 }, { "epoch": 108.81194029850747, "grad_norm": 28.327970504760742, "learning_rate": 8.307359307359309e-06, "loss": 41.0261, "step": 4570 }, { "epoch": 108.83582089552239, "grad_norm": 30.421405792236328, "learning_rate": 8.305194805194807e-06, "loss": 42.6333, "step": 4571 }, { "epoch": 108.85970149253731, "grad_norm": 27.54227066040039, "learning_rate": 8.303030303030305e-06, "loss": 41.5392, "step": 4572 }, { "epoch": 108.88358208955223, "grad_norm": 27.083431243896484, "learning_rate": 8.300865800865802e-06, "loss": 40.5557, "step": 4573 }, { "epoch": 108.90746268656716, "grad_norm": 22.203136444091797, "learning_rate": 8.2987012987013e-06, "loss": 40.2079, "step": 4574 }, { "epoch": 108.9313432835821, "grad_norm": 30.871158599853516, "learning_rate": 8.296536796536796e-06, "loss": 40.74, "step": 4575 }, { "epoch": 108.95522388059702, "grad_norm": 26.300838470458984, "learning_rate": 8.294372294372294e-06, "loss": 41.3107, "step": 4576 }, { "epoch": 108.97910447761194, "grad_norm": 31.482698440551758, "learning_rate": 8.292207792207792e-06, "loss": 42.54, "step": 4577 }, { "epoch": 109.0, "grad_norm": 21.353776931762695, "learning_rate": 8.29004329004329e-06, "loss": 36.6851, "step": 4578 }, { "epoch": 109.02388059701492, "grad_norm": 27.117504119873047, "learning_rate": 8.287878787878787e-06, "loss": 41.3884, "step": 4579 }, { "epoch": 109.04776119402985, "grad_norm": 23.497106552124023, "learning_rate": 8.285714285714287e-06, "loss": 41.0846, "step": 4580 }, { "epoch": 109.07164179104478, "grad_norm": 27.996051788330078, "learning_rate": 8.283549783549785e-06, "loss": 41.214, "step": 4581 }, { "epoch": 109.0955223880597, "grad_norm": 24.364675521850586, "learning_rate": 8.281385281385283e-06, "loss": 42.1255, "step": 4582 }, { "epoch": 109.11940298507463, "grad_norm": 31.155681610107422, "learning_rate": 8.27922077922078e-06, "loss": 42.1774, "step": 4583 }, { "epoch": 109.14328358208955, "grad_norm": 27.193376541137695, "learning_rate": 8.277056277056278e-06, "loss": 42.4321, "step": 4584 }, { "epoch": 109.16716417910447, "grad_norm": 30.398059844970703, "learning_rate": 8.274891774891776e-06, "loss": 42.0286, "step": 4585 }, { "epoch": 109.1910447761194, "grad_norm": 27.1219425201416, "learning_rate": 8.272727272727274e-06, "loss": 40.9508, "step": 4586 }, { "epoch": 109.21492537313434, "grad_norm": 29.481327056884766, "learning_rate": 8.270562770562772e-06, "loss": 42.1899, "step": 4587 }, { "epoch": 109.23880597014926, "grad_norm": 29.547292709350586, "learning_rate": 8.26839826839827e-06, "loss": 40.2919, "step": 4588 }, { "epoch": 109.26268656716418, "grad_norm": 26.99224281311035, "learning_rate": 8.266233766233767e-06, "loss": 41.1843, "step": 4589 }, { "epoch": 109.2865671641791, "grad_norm": 25.45054054260254, "learning_rate": 8.264069264069265e-06, "loss": 41.6843, "step": 4590 }, { "epoch": 109.31044776119403, "grad_norm": 27.529739379882812, "learning_rate": 8.261904761904763e-06, "loss": 39.7442, "step": 4591 }, { "epoch": 109.33432835820895, "grad_norm": 23.54625129699707, "learning_rate": 8.25974025974026e-06, "loss": 40.9662, "step": 4592 }, { "epoch": 109.35820895522389, "grad_norm": 26.74515151977539, "learning_rate": 8.257575757575758e-06, "loss": 42.1445, "step": 4593 }, { "epoch": 109.38208955223881, "grad_norm": 24.591623306274414, "learning_rate": 8.255411255411256e-06, "loss": 41.4631, "step": 4594 }, { "epoch": 109.40597014925373, "grad_norm": 27.32378578186035, "learning_rate": 8.253246753246754e-06, "loss": 43.223, "step": 4595 }, { "epoch": 109.42985074626866, "grad_norm": 24.74321937561035, "learning_rate": 8.251082251082252e-06, "loss": 42.1151, "step": 4596 }, { "epoch": 109.45373134328358, "grad_norm": 24.764156341552734, "learning_rate": 8.24891774891775e-06, "loss": 42.0353, "step": 4597 }, { "epoch": 109.4776119402985, "grad_norm": 22.001508712768555, "learning_rate": 8.246753246753247e-06, "loss": 41.5189, "step": 4598 }, { "epoch": 109.50149253731344, "grad_norm": 27.916759490966797, "learning_rate": 8.244588744588745e-06, "loss": 42.8372, "step": 4599 }, { "epoch": 109.52537313432836, "grad_norm": 23.65235137939453, "learning_rate": 8.242424242424243e-06, "loss": 40.9737, "step": 4600 }, { "epoch": 109.54925373134328, "grad_norm": 25.150957107543945, "learning_rate": 8.24025974025974e-06, "loss": 41.279, "step": 4601 }, { "epoch": 109.57313432835821, "grad_norm": 24.193187713623047, "learning_rate": 8.238095238095239e-06, "loss": 42.029, "step": 4602 }, { "epoch": 109.59701492537313, "grad_norm": 26.186813354492188, "learning_rate": 8.235930735930736e-06, "loss": 40.3791, "step": 4603 }, { "epoch": 109.62089552238805, "grad_norm": 24.97614097595215, "learning_rate": 8.233766233766236e-06, "loss": 40.8347, "step": 4604 }, { "epoch": 109.64477611940299, "grad_norm": 27.51297950744629, "learning_rate": 8.231601731601732e-06, "loss": 40.6329, "step": 4605 }, { "epoch": 109.66865671641791, "grad_norm": 24.866369247436523, "learning_rate": 8.22943722943723e-06, "loss": 41.5968, "step": 4606 }, { "epoch": 109.69253731343284, "grad_norm": 28.864290237426758, "learning_rate": 8.227272727272728e-06, "loss": 41.3671, "step": 4607 }, { "epoch": 109.71641791044776, "grad_norm": 29.503835678100586, "learning_rate": 8.225108225108225e-06, "loss": 41.2315, "step": 4608 }, { "epoch": 109.74029850746268, "grad_norm": 26.039966583251953, "learning_rate": 8.222943722943723e-06, "loss": 41.0179, "step": 4609 }, { "epoch": 109.7641791044776, "grad_norm": 22.42831039428711, "learning_rate": 8.220779220779221e-06, "loss": 41.3331, "step": 4610 }, { "epoch": 109.78805970149254, "grad_norm": 25.328296661376953, "learning_rate": 8.218614718614719e-06, "loss": 39.9773, "step": 4611 }, { "epoch": 109.81194029850747, "grad_norm": 18.524499893188477, "learning_rate": 8.216450216450216e-06, "loss": 42.5481, "step": 4612 }, { "epoch": 109.83582089552239, "grad_norm": 26.1571102142334, "learning_rate": 8.214285714285714e-06, "loss": 41.759, "step": 4613 }, { "epoch": 109.85970149253731, "grad_norm": 22.46668243408203, "learning_rate": 8.212121212121212e-06, "loss": 42.0987, "step": 4614 }, { "epoch": 109.88358208955223, "grad_norm": 29.418230056762695, "learning_rate": 8.20995670995671e-06, "loss": 42.8083, "step": 4615 }, { "epoch": 109.90746268656716, "grad_norm": 23.00196647644043, "learning_rate": 8.20779220779221e-06, "loss": 40.488, "step": 4616 }, { "epoch": 109.9313432835821, "grad_norm": 27.977956771850586, "learning_rate": 8.205627705627707e-06, "loss": 41.4731, "step": 4617 }, { "epoch": 109.95522388059702, "grad_norm": 24.776628494262695, "learning_rate": 8.203463203463205e-06, "loss": 42.6836, "step": 4618 }, { "epoch": 109.97910447761194, "grad_norm": 27.11109733581543, "learning_rate": 8.201298701298703e-06, "loss": 40.7662, "step": 4619 }, { "epoch": 110.0, "grad_norm": 20.246700286865234, "learning_rate": 8.1991341991342e-06, "loss": 36.1303, "step": 4620 }, { "epoch": 110.0, "step": 4620, "total_flos": 2.2713564637226506e+17, "train_loss": 7.6340307194433175, "train_runtime": 25635.1587, "train_samples_per_second": 22.965, "train_steps_per_second": 0.18 }, { "epoch": 110.02388059701492, "grad_norm": 21.282230377197266, "learning_rate": 1e-05, "loss": 42.4757, "step": 4621 }, { "epoch": 110.04776119402985, "grad_norm": Infinity, "learning_rate": 9.998015873015874e-06, "loss": 48.912, "step": 4622 }, { "epoch": 110.07164179104478, "grad_norm": Infinity, "learning_rate": 9.998015873015874e-06, "loss": 49.0673, "step": 4623 }, { "epoch": 110.0955223880597, "grad_norm": 445.232177734375, "learning_rate": 9.998015873015874e-06, "loss": 48.7345, "step": 4624 }, { "epoch": 110.11940298507463, "grad_norm": 224.98858642578125, "learning_rate": 9.996031746031746e-06, "loss": 45.5376, "step": 4625 }, { "epoch": 110.14328358208955, "grad_norm": 92.86235046386719, "learning_rate": 9.99404761904762e-06, "loss": 44.0537, "step": 4626 }, { "epoch": 110.16716417910447, "grad_norm": 67.79006958007812, "learning_rate": 9.992063492063493e-06, "loss": 42.4137, "step": 4627 }, { "epoch": 110.1910447761194, "grad_norm": 52.0079345703125, "learning_rate": 9.990079365079366e-06, "loss": 42.7133, "step": 4628 }, { "epoch": 110.21492537313434, "grad_norm": 40.780120849609375, "learning_rate": 9.988095238095239e-06, "loss": 42.7036, "step": 4629 }, { "epoch": 110.23880597014926, "grad_norm": 45.80341339111328, "learning_rate": 9.986111111111111e-06, "loss": 42.5543, "step": 4630 }, { "epoch": 110.26268656716418, "grad_norm": NaN, "learning_rate": 9.984126984126986e-06, "loss": 67.9585, "step": 4631 }, { "epoch": 110.2865671641791, "grad_norm": 37.52305603027344, "learning_rate": 9.984126984126986e-06, "loss": 42.0859, "step": 4632 }, { "epoch": 110.31044776119403, "grad_norm": 37.10969543457031, "learning_rate": 9.982142857142858e-06, "loss": 42.8517, "step": 4633 }, { "epoch": 110.33432835820895, "grad_norm": 33.601905822753906, "learning_rate": 9.980158730158731e-06, "loss": 41.4451, "step": 4634 }, { "epoch": 110.35820895522389, "grad_norm": 25.348403930664062, "learning_rate": 9.978174603174604e-06, "loss": 41.7698, "step": 4635 }, { "epoch": 110.38208955223881, "grad_norm": 28.69048309326172, "learning_rate": 9.976190476190477e-06, "loss": 41.8287, "step": 4636 }, { "epoch": 110.40597014925373, "grad_norm": 30.578548431396484, "learning_rate": 9.97420634920635e-06, "loss": 42.4165, "step": 4637 }, { "epoch": 110.42985074626866, "grad_norm": 19.63727569580078, "learning_rate": 9.972222222222224e-06, "loss": 42.37, "step": 4638 }, { "epoch": 110.45373134328358, "grad_norm": 34.42063522338867, "learning_rate": 9.970238095238096e-06, "loss": 42.7996, "step": 4639 }, { "epoch": 110.4776119402985, "grad_norm": NaN, "learning_rate": 9.968253968253969e-06, "loss": 62.9577, "step": 4640 }, { "epoch": 110.50149253731344, "grad_norm": 23.131494522094727, "learning_rate": 9.968253968253969e-06, "loss": 40.8106, "step": 4641 }, { "epoch": 110.52537313432836, "grad_norm": 22.294376373291016, "learning_rate": 9.966269841269842e-06, "loss": 42.7557, "step": 4642 }, { "epoch": 110.54925373134328, "grad_norm": 30.476016998291016, "learning_rate": 9.964285714285714e-06, "loss": 42.1925, "step": 4643 }, { "epoch": 110.57313432835821, "grad_norm": 20.84010887145996, "learning_rate": 9.962301587301589e-06, "loss": 41.9241, "step": 4644 }, { "epoch": 110.59701492537313, "grad_norm": 23.944196701049805, "learning_rate": 9.960317460317462e-06, "loss": 41.8911, "step": 4645 }, { "epoch": 110.62089552238805, "grad_norm": 28.773279190063477, "learning_rate": 9.958333333333334e-06, "loss": 41.5711, "step": 4646 }, { "epoch": 110.64477611940299, "grad_norm": 22.82482147216797, "learning_rate": 9.956349206349207e-06, "loss": 42.1915, "step": 4647 }, { "epoch": 110.66865671641791, "grad_norm": 24.0530948638916, "learning_rate": 9.95436507936508e-06, "loss": 40.6648, "step": 4648 }, { "epoch": 110.69253731343284, "grad_norm": 21.640682220458984, "learning_rate": 9.952380952380954e-06, "loss": 41.5682, "step": 4649 }, { "epoch": 110.71641791044776, "grad_norm": 21.377979278564453, "learning_rate": 9.950396825396827e-06, "loss": 41.6034, "step": 4650 }, { "epoch": 110.74029850746268, "grad_norm": 19.04741668701172, "learning_rate": 9.9484126984127e-06, "loss": 41.8165, "step": 4651 }, { "epoch": 110.7641791044776, "grad_norm": 22.74652099609375, "learning_rate": 9.946428571428572e-06, "loss": 40.4093, "step": 4652 }, { "epoch": 110.78805970149254, "grad_norm": 16.828824996948242, "learning_rate": 9.944444444444445e-06, "loss": 42.181, "step": 4653 }, { "epoch": 110.81194029850747, "grad_norm": 20.226478576660156, "learning_rate": 9.94246031746032e-06, "loss": 41.774, "step": 4654 }, { "epoch": 110.83582089552239, "grad_norm": 19.935068130493164, "learning_rate": 9.940476190476192e-06, "loss": 41.6547, "step": 4655 }, { "epoch": 110.85970149253731, "grad_norm": 18.150102615356445, "learning_rate": 9.938492063492065e-06, "loss": 39.962, "step": 4656 }, { "epoch": 110.88358208955223, "grad_norm": 27.12464141845703, "learning_rate": 9.936507936507937e-06, "loss": 41.2807, "step": 4657 }, { "epoch": 110.90746268656716, "grad_norm": 18.194360733032227, "learning_rate": 9.93452380952381e-06, "loss": 40.8381, "step": 4658 }, { "epoch": 110.9313432835821, "grad_norm": 25.638107299804688, "learning_rate": 9.932539682539684e-06, "loss": 41.2385, "step": 4659 }, { "epoch": 110.95522388059702, "grad_norm": 21.1163387298584, "learning_rate": 9.930555555555557e-06, "loss": 41.0065, "step": 4660 }, { "epoch": 110.97910447761194, "grad_norm": 17.089710235595703, "learning_rate": 9.92857142857143e-06, "loss": 41.8835, "step": 4661 }, { "epoch": 111.0, "grad_norm": 19.484764099121094, "learning_rate": 9.926587301587303e-06, "loss": 37.1289, "step": 4662 }, { "epoch": 111.02388059701492, "grad_norm": 20.73271942138672, "learning_rate": 9.924603174603175e-06, "loss": 40.8035, "step": 4663 }, { "epoch": 111.04776119402985, "grad_norm": 14.759368896484375, "learning_rate": 9.922619047619048e-06, "loss": 42.164, "step": 4664 }, { "epoch": 111.07164179104478, "grad_norm": 20.654579162597656, "learning_rate": 9.920634920634922e-06, "loss": 41.7302, "step": 4665 }, { "epoch": 111.0955223880597, "grad_norm": 17.05702781677246, "learning_rate": 9.918650793650795e-06, "loss": 41.3278, "step": 4666 }, { "epoch": 111.11940298507463, "grad_norm": 15.701156616210938, "learning_rate": 9.916666666666668e-06, "loss": 40.7933, "step": 4667 }, { "epoch": 111.14328358208955, "grad_norm": 17.04022216796875, "learning_rate": 9.91468253968254e-06, "loss": 41.58, "step": 4668 }, { "epoch": 111.16716417910447, "grad_norm": 16.614116668701172, "learning_rate": 9.912698412698413e-06, "loss": 40.8149, "step": 4669 }, { "epoch": 111.1910447761194, "grad_norm": 17.664091110229492, "learning_rate": 9.910714285714288e-06, "loss": 40.459, "step": 4670 }, { "epoch": 111.21492537313434, "grad_norm": 17.730188369750977, "learning_rate": 9.90873015873016e-06, "loss": 42.3291, "step": 4671 }, { "epoch": 111.23880597014926, "grad_norm": 14.860199928283691, "learning_rate": 9.906746031746033e-06, "loss": 42.314, "step": 4672 }, { "epoch": 111.26268656716418, "grad_norm": 18.45416259765625, "learning_rate": 9.904761904761906e-06, "loss": 41.2486, "step": 4673 }, { "epoch": 111.2865671641791, "grad_norm": 15.178065299987793, "learning_rate": 9.902777777777778e-06, "loss": 42.0394, "step": 4674 }, { "epoch": 111.31044776119403, "grad_norm": 16.214420318603516, "learning_rate": 9.900793650793653e-06, "loss": 41.6074, "step": 4675 }, { "epoch": 111.33432835820895, "grad_norm": 19.241151809692383, "learning_rate": 9.898809523809525e-06, "loss": 42.0125, "step": 4676 }, { "epoch": 111.35820895522389, "grad_norm": 16.019407272338867, "learning_rate": 9.896825396825398e-06, "loss": 41.5867, "step": 4677 }, { "epoch": 111.38208955223881, "grad_norm": 18.017990112304688, "learning_rate": 9.89484126984127e-06, "loss": 40.4534, "step": 4678 }, { "epoch": 111.40597014925373, "grad_norm": 18.37062644958496, "learning_rate": 9.892857142857143e-06, "loss": 41.4307, "step": 4679 }, { "epoch": 111.42985074626866, "grad_norm": 18.12076187133789, "learning_rate": 9.890873015873018e-06, "loss": 42.1702, "step": 4680 }, { "epoch": 111.45373134328358, "grad_norm": 21.935218811035156, "learning_rate": 9.88888888888889e-06, "loss": 40.5205, "step": 4681 }, { "epoch": 111.4776119402985, "grad_norm": 19.747133255004883, "learning_rate": 9.886904761904763e-06, "loss": 41.7721, "step": 4682 }, { "epoch": 111.50149253731344, "grad_norm": 17.159732818603516, "learning_rate": 9.884920634920636e-06, "loss": 41.285, "step": 4683 }, { "epoch": 111.52537313432836, "grad_norm": 15.736952781677246, "learning_rate": 9.882936507936509e-06, "loss": 40.8216, "step": 4684 }, { "epoch": 111.54925373134328, "grad_norm": 17.591854095458984, "learning_rate": 9.880952380952381e-06, "loss": 40.0516, "step": 4685 }, { "epoch": 111.57313432835821, "grad_norm": 17.530582427978516, "learning_rate": 9.878968253968256e-06, "loss": 41.4235, "step": 4686 }, { "epoch": 111.59701492537313, "grad_norm": 18.394372940063477, "learning_rate": 9.876984126984128e-06, "loss": 41.7204, "step": 4687 }, { "epoch": 111.62089552238805, "grad_norm": 17.80558967590332, "learning_rate": 9.875000000000001e-06, "loss": 41.6861, "step": 4688 }, { "epoch": 111.64477611940299, "grad_norm": 19.939964294433594, "learning_rate": 9.873015873015874e-06, "loss": 41.3087, "step": 4689 }, { "epoch": 111.66865671641791, "grad_norm": 14.58205509185791, "learning_rate": 9.871031746031747e-06, "loss": 41.7955, "step": 4690 }, { "epoch": 111.69253731343284, "grad_norm": 19.98933982849121, "learning_rate": 9.869047619047621e-06, "loss": 42.3174, "step": 4691 }, { "epoch": 111.71641791044776, "grad_norm": 20.377466201782227, "learning_rate": 9.867063492063494e-06, "loss": 42.1654, "step": 4692 }, { "epoch": 111.74029850746268, "grad_norm": 19.26752471923828, "learning_rate": 9.865079365079366e-06, "loss": 41.0597, "step": 4693 }, { "epoch": 111.7641791044776, "grad_norm": 16.435440063476562, "learning_rate": 9.863095238095239e-06, "loss": 42.1122, "step": 4694 }, { "epoch": 111.78805970149254, "grad_norm": 17.955474853515625, "learning_rate": 9.861111111111112e-06, "loss": 41.0326, "step": 4695 }, { "epoch": 111.81194029850747, "grad_norm": 21.791505813598633, "learning_rate": 9.859126984126986e-06, "loss": 42.4256, "step": 4696 }, { "epoch": 111.83582089552239, "grad_norm": 17.081600189208984, "learning_rate": 9.857142857142859e-06, "loss": 41.7548, "step": 4697 }, { "epoch": 111.85970149253731, "grad_norm": 21.21491241455078, "learning_rate": 9.855158730158732e-06, "loss": 41.1434, "step": 4698 }, { "epoch": 111.88358208955223, "grad_norm": 25.082992553710938, "learning_rate": 9.853174603174604e-06, "loss": 41.2857, "step": 4699 }, { "epoch": 111.90746268656716, "grad_norm": 19.19919204711914, "learning_rate": 9.851190476190477e-06, "loss": 41.5529, "step": 4700 }, { "epoch": 111.9313432835821, "grad_norm": 32.29753494262695, "learning_rate": 9.849206349206351e-06, "loss": 42.4376, "step": 4701 }, { "epoch": 111.95522388059702, "grad_norm": 20.654430389404297, "learning_rate": 9.847222222222224e-06, "loss": 41.3052, "step": 4702 }, { "epoch": 111.97910447761194, "grad_norm": 32.98462677001953, "learning_rate": 9.845238095238097e-06, "loss": 41.1561, "step": 4703 }, { "epoch": 112.0, "grad_norm": 18.214174270629883, "learning_rate": 9.843253968253968e-06, "loss": 35.3902, "step": 4704 }, { "epoch": 112.02388059701492, "grad_norm": 25.639781951904297, "learning_rate": 9.841269841269842e-06, "loss": 40.7291, "step": 4705 }, { "epoch": 112.04776119402985, "grad_norm": 19.745450973510742, "learning_rate": 9.839285714285715e-06, "loss": 41.6564, "step": 4706 }, { "epoch": 112.07164179104478, "grad_norm": 24.907617568969727, "learning_rate": 9.837301587301588e-06, "loss": 41.4856, "step": 4707 }, { "epoch": 112.0955223880597, "grad_norm": 24.20347023010254, "learning_rate": 9.83531746031746e-06, "loss": 40.6423, "step": 4708 }, { "epoch": 112.11940298507463, "grad_norm": 16.246206283569336, "learning_rate": 9.833333333333333e-06, "loss": 40.5309, "step": 4709 }, { "epoch": 112.14328358208955, "grad_norm": 28.89447784423828, "learning_rate": 9.831349206349207e-06, "loss": 41.173, "step": 4710 }, { "epoch": 112.16716417910447, "grad_norm": 18.989233016967773, "learning_rate": 9.82936507936508e-06, "loss": 42.2629, "step": 4711 }, { "epoch": 112.1910447761194, "grad_norm": 22.261035919189453, "learning_rate": 9.827380952380953e-06, "loss": 41.9901, "step": 4712 }, { "epoch": 112.21492537313434, "grad_norm": 21.082855224609375, "learning_rate": 9.825396825396825e-06, "loss": 40.9817, "step": 4713 }, { "epoch": 112.23880597014926, "grad_norm": 15.739337921142578, "learning_rate": 9.823412698412698e-06, "loss": 42.0745, "step": 4714 }, { "epoch": 112.26268656716418, "grad_norm": 25.604066848754883, "learning_rate": 9.821428571428573e-06, "loss": 40.9371, "step": 4715 }, { "epoch": 112.2865671641791, "grad_norm": 17.916481018066406, "learning_rate": 9.819444444444445e-06, "loss": 40.9361, "step": 4716 }, { "epoch": 112.31044776119403, "grad_norm": 21.53338050842285, "learning_rate": 9.817460317460318e-06, "loss": 40.2245, "step": 4717 }, { "epoch": 112.33432835820895, "grad_norm": 21.370702743530273, "learning_rate": 9.81547619047619e-06, "loss": 40.7986, "step": 4718 }, { "epoch": 112.35820895522389, "grad_norm": 18.217588424682617, "learning_rate": 9.813492063492063e-06, "loss": 41.5072, "step": 4719 }, { "epoch": 112.38208955223881, "grad_norm": 18.874122619628906, "learning_rate": 9.811507936507938e-06, "loss": 39.7088, "step": 4720 }, { "epoch": 112.40597014925373, "grad_norm": 17.31776237487793, "learning_rate": 9.80952380952381e-06, "loss": 41.6839, "step": 4721 }, { "epoch": 112.42985074626866, "grad_norm": 23.88166046142578, "learning_rate": 9.807539682539683e-06, "loss": 41.7857, "step": 4722 }, { "epoch": 112.45373134328358, "grad_norm": 17.09743881225586, "learning_rate": 9.805555555555556e-06, "loss": 42.2407, "step": 4723 }, { "epoch": 112.4776119402985, "grad_norm": 20.519947052001953, "learning_rate": 9.803571428571428e-06, "loss": 41.8095, "step": 4724 }, { "epoch": 112.50149253731344, "grad_norm": 23.761943817138672, "learning_rate": 9.801587301587301e-06, "loss": 41.371, "step": 4725 }, { "epoch": 112.52537313432836, "grad_norm": 17.033470153808594, "learning_rate": 9.799603174603176e-06, "loss": 41.5687, "step": 4726 }, { "epoch": 112.54925373134328, "grad_norm": 18.175559997558594, "learning_rate": 9.797619047619048e-06, "loss": 42.2144, "step": 4727 }, { "epoch": 112.57313432835821, "grad_norm": 19.10957145690918, "learning_rate": 9.795634920634921e-06, "loss": 40.2305, "step": 4728 }, { "epoch": 112.59701492537313, "grad_norm": 20.52096176147461, "learning_rate": 9.793650793650794e-06, "loss": 42.5612, "step": 4729 }, { "epoch": 112.62089552238805, "grad_norm": 17.42753791809082, "learning_rate": 9.791666666666666e-06, "loss": 43.286, "step": 4730 }, { "epoch": 112.64477611940299, "grad_norm": 25.452363967895508, "learning_rate": 9.78968253968254e-06, "loss": 41.0071, "step": 4731 }, { "epoch": 112.66865671641791, "grad_norm": 21.480247497558594, "learning_rate": 9.787698412698413e-06, "loss": 41.7063, "step": 4732 }, { "epoch": 112.69253731343284, "grad_norm": 18.553220748901367, "learning_rate": 9.785714285714286e-06, "loss": 41.4099, "step": 4733 }, { "epoch": 112.71641791044776, "grad_norm": 25.513225555419922, "learning_rate": 9.783730158730159e-06, "loss": 41.5696, "step": 4734 }, { "epoch": 112.74029850746268, "grad_norm": 16.76629638671875, "learning_rate": 9.781746031746032e-06, "loss": 41.6305, "step": 4735 }, { "epoch": 112.7641791044776, "grad_norm": 19.330625534057617, "learning_rate": 9.779761904761906e-06, "loss": 40.7885, "step": 4736 }, { "epoch": 112.78805970149254, "grad_norm": 24.649667739868164, "learning_rate": 9.777777777777779e-06, "loss": 41.5939, "step": 4737 }, { "epoch": 112.81194029850747, "grad_norm": 15.628157615661621, "learning_rate": 9.775793650793651e-06, "loss": 40.9676, "step": 4738 }, { "epoch": 112.83582089552239, "grad_norm": 18.18578338623047, "learning_rate": 9.773809523809524e-06, "loss": 40.0681, "step": 4739 }, { "epoch": 112.85970149253731, "grad_norm": 16.768980026245117, "learning_rate": 9.771825396825397e-06, "loss": 42.2564, "step": 4740 }, { "epoch": 112.88358208955223, "grad_norm": 18.52190399169922, "learning_rate": 9.769841269841271e-06, "loss": 42.4806, "step": 4741 }, { "epoch": 112.90746268656716, "grad_norm": 20.884937286376953, "learning_rate": 9.767857142857144e-06, "loss": 41.2333, "step": 4742 }, { "epoch": 112.9313432835821, "grad_norm": 20.760377883911133, "learning_rate": 9.765873015873017e-06, "loss": 41.3071, "step": 4743 }, { "epoch": 112.95522388059702, "grad_norm": 19.27536392211914, "learning_rate": 9.76388888888889e-06, "loss": 42.3135, "step": 4744 }, { "epoch": 112.97910447761194, "grad_norm": 16.836727142333984, "learning_rate": 9.761904761904762e-06, "loss": 40.9553, "step": 4745 }, { "epoch": 113.0, "grad_norm": 15.910188674926758, "learning_rate": 9.759920634920635e-06, "loss": 35.1574, "step": 4746 }, { "epoch": 113.02388059701492, "grad_norm": 25.05491828918457, "learning_rate": 9.757936507936509e-06, "loss": 40.585, "step": 4747 }, { "epoch": 113.04776119402985, "grad_norm": NaN, "learning_rate": 9.755952380952382e-06, "loss": 62.2866, "step": 4748 }, { "epoch": 113.07164179104478, "grad_norm": 15.88016414642334, "learning_rate": 9.755952380952382e-06, "loss": 41.1309, "step": 4749 }, { "epoch": 113.0955223880597, "grad_norm": NaN, "learning_rate": 9.753968253968254e-06, "loss": 48.2293, "step": 4750 }, { "epoch": 113.11940298507463, "grad_norm": 24.244104385375977, "learning_rate": 9.753968253968254e-06, "loss": 42.1546, "step": 4751 }, { "epoch": 113.14328358208955, "grad_norm": 24.652694702148438, "learning_rate": 9.751984126984127e-06, "loss": 41.6784, "step": 4752 }, { "epoch": 113.16716417910447, "grad_norm": 17.30400276184082, "learning_rate": 9.75e-06, "loss": 41.3338, "step": 4753 }, { "epoch": 113.1910447761194, "grad_norm": 22.837020874023438, "learning_rate": 9.748015873015874e-06, "loss": 39.9112, "step": 4754 }, { "epoch": 113.21492537313434, "grad_norm": NaN, "learning_rate": 9.746031746031747e-06, "loss": 51.4889, "step": 4755 }, { "epoch": 113.23880597014926, "grad_norm": 19.977386474609375, "learning_rate": 9.746031746031747e-06, "loss": 40.8136, "step": 4756 }, { "epoch": 113.26268656716418, "grad_norm": 17.338441848754883, "learning_rate": 9.74404761904762e-06, "loss": 41.41, "step": 4757 }, { "epoch": 113.2865671641791, "grad_norm": 17.25606346130371, "learning_rate": 9.742063492063492e-06, "loss": 42.0376, "step": 4758 }, { "epoch": 113.31044776119403, "grad_norm": 18.690338134765625, "learning_rate": 9.740079365079365e-06, "loss": 39.8714, "step": 4759 }, { "epoch": 113.33432835820895, "grad_norm": 20.5388240814209, "learning_rate": 9.73809523809524e-06, "loss": 40.7767, "step": 4760 }, { "epoch": 113.35820895522389, "grad_norm": 20.36353302001953, "learning_rate": 9.736111111111112e-06, "loss": 42.7652, "step": 4761 }, { "epoch": 113.38208955223881, "grad_norm": 17.473264694213867, "learning_rate": 9.734126984126985e-06, "loss": 41.3501, "step": 4762 }, { "epoch": 113.40597014925373, "grad_norm": 16.665048599243164, "learning_rate": 9.732142857142858e-06, "loss": 40.8948, "step": 4763 }, { "epoch": 113.42985074626866, "grad_norm": 18.917985916137695, "learning_rate": 9.73015873015873e-06, "loss": 42.4226, "step": 4764 }, { "epoch": 113.45373134328358, "grad_norm": 15.448834419250488, "learning_rate": 9.728174603174605e-06, "loss": 41.5246, "step": 4765 }, { "epoch": 113.4776119402985, "grad_norm": 16.92607879638672, "learning_rate": 9.726190476190477e-06, "loss": 41.1058, "step": 4766 }, { "epoch": 113.50149253731344, "grad_norm": 16.17359161376953, "learning_rate": 9.72420634920635e-06, "loss": 41.4232, "step": 4767 }, { "epoch": 113.52537313432836, "grad_norm": 16.6822452545166, "learning_rate": 9.722222222222223e-06, "loss": 41.9703, "step": 4768 }, { "epoch": 113.54925373134328, "grad_norm": 16.724811553955078, "learning_rate": 9.720238095238095e-06, "loss": 41.4117, "step": 4769 }, { "epoch": 113.57313432835821, "grad_norm": 16.85785484313965, "learning_rate": 9.71825396825397e-06, "loss": 41.4467, "step": 4770 }, { "epoch": 113.59701492537313, "grad_norm": 19.173654556274414, "learning_rate": 9.716269841269842e-06, "loss": 40.871, "step": 4771 }, { "epoch": 113.62089552238805, "grad_norm": 16.131881713867188, "learning_rate": 9.714285714285715e-06, "loss": 42.595, "step": 4772 }, { "epoch": 113.64477611940299, "grad_norm": 15.41543960571289, "learning_rate": 9.712301587301588e-06, "loss": 41.7077, "step": 4773 }, { "epoch": 113.66865671641791, "grad_norm": 19.808330535888672, "learning_rate": 9.71031746031746e-06, "loss": 40.8761, "step": 4774 }, { "epoch": 113.69253731343284, "grad_norm": 16.406370162963867, "learning_rate": 9.708333333333333e-06, "loss": 41.1769, "step": 4775 }, { "epoch": 113.71641791044776, "grad_norm": 20.239530563354492, "learning_rate": 9.706349206349208e-06, "loss": 40.8274, "step": 4776 }, { "epoch": 113.74029850746268, "grad_norm": 18.771743774414062, "learning_rate": 9.70436507936508e-06, "loss": 41.4099, "step": 4777 }, { "epoch": 113.7641791044776, "grad_norm": 18.418540954589844, "learning_rate": 9.702380952380953e-06, "loss": 39.6443, "step": 4778 }, { "epoch": 113.78805970149254, "grad_norm": 21.50214958190918, "learning_rate": 9.700396825396826e-06, "loss": 41.6937, "step": 4779 }, { "epoch": 113.81194029850747, "grad_norm": 22.449935913085938, "learning_rate": 9.698412698412698e-06, "loss": 41.7069, "step": 4780 }, { "epoch": 113.83582089552239, "grad_norm": 15.33384895324707, "learning_rate": 9.696428571428573e-06, "loss": 40.6666, "step": 4781 }, { "epoch": 113.85970149253731, "grad_norm": 21.013437271118164, "learning_rate": 9.694444444444446e-06, "loss": 40.5768, "step": 4782 }, { "epoch": 113.88358208955223, "grad_norm": 19.128190994262695, "learning_rate": 9.692460317460318e-06, "loss": 41.4668, "step": 4783 }, { "epoch": 113.90746268656716, "grad_norm": 23.851394653320312, "learning_rate": 9.690476190476191e-06, "loss": 41.1051, "step": 4784 }, { "epoch": 113.9313432835821, "grad_norm": 21.990671157836914, "learning_rate": 9.688492063492064e-06, "loss": 41.6264, "step": 4785 }, { "epoch": 113.95522388059702, "grad_norm": 16.185327529907227, "learning_rate": 9.686507936507938e-06, "loss": 41.8408, "step": 4786 }, { "epoch": 113.97910447761194, "grad_norm": 30.063560485839844, "learning_rate": 9.68452380952381e-06, "loss": 41.2658, "step": 4787 }, { "epoch": 114.0, "grad_norm": 19.5380916595459, "learning_rate": 9.682539682539683e-06, "loss": 36.7106, "step": 4788 }, { "epoch": 114.02388059701492, "grad_norm": 26.1965389251709, "learning_rate": 9.680555555555556e-06, "loss": 42.3092, "step": 4789 }, { "epoch": 114.04776119402985, "grad_norm": 19.98543930053711, "learning_rate": 9.678571428571429e-06, "loss": 41.2309, "step": 4790 }, { "epoch": 114.07164179104478, "grad_norm": 26.361085891723633, "learning_rate": 9.676587301587303e-06, "loss": 41.9058, "step": 4791 }, { "epoch": 114.0955223880597, "grad_norm": 23.132400512695312, "learning_rate": 9.674603174603176e-06, "loss": 43.0372, "step": 4792 }, { "epoch": 114.11940298507463, "grad_norm": 25.199525833129883, "learning_rate": 9.672619047619049e-06, "loss": 41.5403, "step": 4793 }, { "epoch": 114.14328358208955, "grad_norm": 23.17612075805664, "learning_rate": 9.670634920634921e-06, "loss": 41.0863, "step": 4794 }, { "epoch": 114.16716417910447, "grad_norm": 23.930667877197266, "learning_rate": 9.668650793650794e-06, "loss": 40.8035, "step": 4795 }, { "epoch": 114.1910447761194, "grad_norm": 23.487939834594727, "learning_rate": 9.666666666666667e-06, "loss": 39.6217, "step": 4796 }, { "epoch": 114.21492537313434, "grad_norm": 23.342439651489258, "learning_rate": 9.664682539682541e-06, "loss": 42.0502, "step": 4797 }, { "epoch": 114.23880597014926, "grad_norm": 25.328317642211914, "learning_rate": 9.662698412698414e-06, "loss": 40.3101, "step": 4798 }, { "epoch": 114.26268656716418, "grad_norm": 18.363313674926758, "learning_rate": 9.660714285714287e-06, "loss": 40.5746, "step": 4799 }, { "epoch": 114.2865671641791, "grad_norm": 24.081649780273438, "learning_rate": 9.65873015873016e-06, "loss": 42.0376, "step": 4800 }, { "epoch": 114.31044776119403, "grad_norm": 20.24997329711914, "learning_rate": 9.656746031746032e-06, "loss": 40.5347, "step": 4801 }, { "epoch": 114.33432835820895, "grad_norm": 14.942011833190918, "learning_rate": 9.654761904761906e-06, "loss": 41.7814, "step": 4802 }, { "epoch": 114.35820895522389, "grad_norm": 22.662822723388672, "learning_rate": 9.652777777777779e-06, "loss": 41.767, "step": 4803 }, { "epoch": 114.38208955223881, "grad_norm": 19.27354621887207, "learning_rate": 9.650793650793652e-06, "loss": 40.7947, "step": 4804 }, { "epoch": 114.40597014925373, "grad_norm": 14.431193351745605, "learning_rate": 9.648809523809524e-06, "loss": 42.3785, "step": 4805 }, { "epoch": 114.42985074626866, "grad_norm": 15.706212043762207, "learning_rate": 9.646825396825397e-06, "loss": 42.0003, "step": 4806 }, { "epoch": 114.45373134328358, "grad_norm": 17.65169906616211, "learning_rate": 9.644841269841271e-06, "loss": 41.968, "step": 4807 }, { "epoch": 114.4776119402985, "grad_norm": 16.792739868164062, "learning_rate": 9.642857142857144e-06, "loss": 41.1987, "step": 4808 }, { "epoch": 114.50149253731344, "grad_norm": 20.06905746459961, "learning_rate": 9.640873015873017e-06, "loss": 41.0098, "step": 4809 }, { "epoch": 114.52537313432836, "grad_norm": 24.13865852355957, "learning_rate": 9.63888888888889e-06, "loss": 41.5633, "step": 4810 }, { "epoch": 114.54925373134328, "grad_norm": 16.85896873474121, "learning_rate": 9.636904761904762e-06, "loss": 41.7772, "step": 4811 }, { "epoch": 114.57313432835821, "grad_norm": 15.44628620147705, "learning_rate": 9.634920634920637e-06, "loss": 40.0732, "step": 4812 }, { "epoch": 114.59701492537313, "grad_norm": 18.970260620117188, "learning_rate": 9.63293650793651e-06, "loss": 42.318, "step": 4813 }, { "epoch": 114.62089552238805, "grad_norm": 16.574501037597656, "learning_rate": 9.630952380952382e-06, "loss": 40.0387, "step": 4814 }, { "epoch": 114.64477611940299, "grad_norm": 18.372955322265625, "learning_rate": 9.628968253968255e-06, "loss": 41.5759, "step": 4815 }, { "epoch": 114.66865671641791, "grad_norm": 21.253253936767578, "learning_rate": 9.626984126984127e-06, "loss": 40.2675, "step": 4816 }, { "epoch": 114.69253731343284, "grad_norm": 19.223817825317383, "learning_rate": 9.625e-06, "loss": 41.1779, "step": 4817 }, { "epoch": 114.71641791044776, "grad_norm": 17.391407012939453, "learning_rate": 9.623015873015875e-06, "loss": 40.9899, "step": 4818 }, { "epoch": 114.74029850746268, "grad_norm": 21.367889404296875, "learning_rate": 9.621031746031747e-06, "loss": 40.1854, "step": 4819 }, { "epoch": 114.7641791044776, "grad_norm": 21.202396392822266, "learning_rate": 9.61904761904762e-06, "loss": 41.5819, "step": 4820 }, { "epoch": 114.78805970149254, "grad_norm": 14.345793724060059, "learning_rate": 9.617063492063493e-06, "loss": 41.7843, "step": 4821 }, { "epoch": 114.81194029850747, "grad_norm": 16.483112335205078, "learning_rate": 9.615079365079365e-06, "loss": 40.9715, "step": 4822 }, { "epoch": 114.83582089552239, "grad_norm": 16.397315979003906, "learning_rate": 9.61309523809524e-06, "loss": 40.8702, "step": 4823 }, { "epoch": 114.85970149253731, "grad_norm": 14.784750938415527, "learning_rate": 9.611111111111112e-06, "loss": 40.5076, "step": 4824 }, { "epoch": 114.88358208955223, "grad_norm": 21.29036521911621, "learning_rate": 9.609126984126985e-06, "loss": 41.0657, "step": 4825 }, { "epoch": 114.90746268656716, "grad_norm": 19.237743377685547, "learning_rate": 9.607142857142858e-06, "loss": 40.7839, "step": 4826 }, { "epoch": 114.9313432835821, "grad_norm": 17.527833938598633, "learning_rate": 9.60515873015873e-06, "loss": 41.3853, "step": 4827 }, { "epoch": 114.95522388059702, "grad_norm": 16.477439880371094, "learning_rate": 9.603174603174605e-06, "loss": 41.3862, "step": 4828 }, { "epoch": 114.97910447761194, "grad_norm": 16.46197509765625, "learning_rate": 9.601190476190478e-06, "loss": 41.9143, "step": 4829 }, { "epoch": 115.0, "grad_norm": 18.8862361907959, "learning_rate": 9.59920634920635e-06, "loss": 36.444, "step": 4830 }, { "epoch": 115.02388059701492, "grad_norm": 22.985044479370117, "learning_rate": 9.597222222222223e-06, "loss": 41.3098, "step": 4831 }, { "epoch": 115.04776119402985, "grad_norm": 17.263700485229492, "learning_rate": 9.595238095238096e-06, "loss": 41.2013, "step": 4832 }, { "epoch": 115.07164179104478, "grad_norm": 21.497802734375, "learning_rate": 9.59325396825397e-06, "loss": 40.4798, "step": 4833 }, { "epoch": 115.0955223880597, "grad_norm": 20.014450073242188, "learning_rate": 9.591269841269843e-06, "loss": 41.2098, "step": 4834 }, { "epoch": 115.11940298507463, "grad_norm": 18.972618103027344, "learning_rate": 9.589285714285716e-06, "loss": 41.7606, "step": 4835 }, { "epoch": 115.14328358208955, "grad_norm": 14.9144287109375, "learning_rate": 9.587301587301588e-06, "loss": 40.7529, "step": 4836 }, { "epoch": 115.16716417910447, "grad_norm": 24.37519073486328, "learning_rate": 9.585317460317461e-06, "loss": 41.7598, "step": 4837 }, { "epoch": 115.1910447761194, "grad_norm": 23.033283233642578, "learning_rate": 9.583333333333335e-06, "loss": 41.4316, "step": 4838 }, { "epoch": 115.21492537313434, "grad_norm": 20.98251724243164, "learning_rate": 9.581349206349208e-06, "loss": 40.3066, "step": 4839 }, { "epoch": 115.23880597014926, "grad_norm": 21.950714111328125, "learning_rate": 9.57936507936508e-06, "loss": 40.1732, "step": 4840 }, { "epoch": 115.26268656716418, "grad_norm": 22.479713439941406, "learning_rate": 9.577380952380953e-06, "loss": 41.586, "step": 4841 }, { "epoch": 115.2865671641791, "grad_norm": 16.739639282226562, "learning_rate": 9.575396825396826e-06, "loss": 42.143, "step": 4842 }, { "epoch": 115.31044776119403, "grad_norm": 23.182594299316406, "learning_rate": 9.573412698412699e-06, "loss": 42.4852, "step": 4843 }, { "epoch": 115.33432835820895, "grad_norm": 23.18885040283203, "learning_rate": 9.571428571428573e-06, "loss": 40.3618, "step": 4844 }, { "epoch": 115.35820895522389, "grad_norm": 15.238030433654785, "learning_rate": 9.569444444444446e-06, "loss": 41.3859, "step": 4845 }, { "epoch": 115.38208955223881, "grad_norm": 28.07355308532715, "learning_rate": 9.567460317460319e-06, "loss": 41.1147, "step": 4846 }, { "epoch": 115.40597014925373, "grad_norm": 21.76200294494629, "learning_rate": 9.565476190476191e-06, "loss": 41.6603, "step": 4847 }, { "epoch": 115.42985074626866, "grad_norm": 32.459312438964844, "learning_rate": 9.563492063492064e-06, "loss": 40.7283, "step": 4848 }, { "epoch": 115.45373134328358, "grad_norm": 22.368288040161133, "learning_rate": 9.561507936507938e-06, "loss": 40.4951, "step": 4849 }, { "epoch": 115.4776119402985, "grad_norm": 22.91469955444336, "learning_rate": 9.559523809523811e-06, "loss": 41.117, "step": 4850 }, { "epoch": 115.50149253731344, "grad_norm": 20.357376098632812, "learning_rate": 9.557539682539684e-06, "loss": 41.753, "step": 4851 }, { "epoch": 115.52537313432836, "grad_norm": 21.377849578857422, "learning_rate": 9.555555555555556e-06, "loss": 41.8999, "step": 4852 }, { "epoch": 115.54925373134328, "grad_norm": 33.38006591796875, "learning_rate": 9.55357142857143e-06, "loss": 41.1317, "step": 4853 }, { "epoch": 115.57313432835821, "grad_norm": 21.435209274291992, "learning_rate": 9.551587301587304e-06, "loss": 40.1686, "step": 4854 }, { "epoch": 115.59701492537313, "grad_norm": 31.958423614501953, "learning_rate": 9.549603174603176e-06, "loss": 42.572, "step": 4855 }, { "epoch": 115.62089552238805, "grad_norm": 21.460599899291992, "learning_rate": 9.547619047619049e-06, "loss": 40.5071, "step": 4856 }, { "epoch": 115.64477611940299, "grad_norm": 33.65336227416992, "learning_rate": 9.545634920634922e-06, "loss": 41.7753, "step": 4857 }, { "epoch": 115.66865671641791, "grad_norm": 23.594022750854492, "learning_rate": 9.543650793650794e-06, "loss": 41.4436, "step": 4858 }, { "epoch": 115.69253731343284, "grad_norm": 23.563594818115234, "learning_rate": 9.541666666666669e-06, "loss": 39.9414, "step": 4859 }, { "epoch": 115.71641791044776, "grad_norm": 24.98297882080078, "learning_rate": 9.539682539682541e-06, "loss": 40.8619, "step": 4860 }, { "epoch": 115.74029850746268, "grad_norm": 22.393163681030273, "learning_rate": 9.537698412698414e-06, "loss": 42.8338, "step": 4861 }, { "epoch": 115.7641791044776, "grad_norm": 30.07286834716797, "learning_rate": 9.535714285714287e-06, "loss": 41.2226, "step": 4862 }, { "epoch": 115.78805970149254, "grad_norm": 22.388198852539062, "learning_rate": 9.53373015873016e-06, "loss": 41.1935, "step": 4863 }, { "epoch": 115.81194029850747, "grad_norm": 33.4913215637207, "learning_rate": 9.531746031746032e-06, "loss": 42.5784, "step": 4864 }, { "epoch": 115.83582089552239, "grad_norm": 25.117082595825195, "learning_rate": 9.529761904761905e-06, "loss": 39.364, "step": 4865 }, { "epoch": 115.85970149253731, "grad_norm": 37.31660079956055, "learning_rate": 9.527777777777778e-06, "loss": 41.5319, "step": 4866 }, { "epoch": 115.88358208955223, "grad_norm": 28.936159133911133, "learning_rate": 9.52579365079365e-06, "loss": 41.757, "step": 4867 }, { "epoch": 115.90746268656716, "grad_norm": 34.599647521972656, "learning_rate": 9.523809523809525e-06, "loss": 41.6518, "step": 4868 }, { "epoch": 115.9313432835821, "grad_norm": 27.539873123168945, "learning_rate": 9.521825396825397e-06, "loss": 40.9794, "step": 4869 }, { "epoch": 115.95522388059702, "grad_norm": 37.74484634399414, "learning_rate": 9.51984126984127e-06, "loss": 40.8585, "step": 4870 }, { "epoch": 115.97910447761194, "grad_norm": 32.444847106933594, "learning_rate": 9.517857142857143e-06, "loss": 41.7152, "step": 4871 }, { "epoch": 116.0, "grad_norm": 32.239253997802734, "learning_rate": 9.515873015873016e-06, "loss": 35.2825, "step": 4872 }, { "epoch": 116.02388059701492, "grad_norm": 35.12287521362305, "learning_rate": 9.51388888888889e-06, "loss": 41.7451, "step": 4873 }, { "epoch": 116.04776119402985, "grad_norm": 28.03133773803711, "learning_rate": 9.511904761904763e-06, "loss": 40.8461, "step": 4874 }, { "epoch": 116.07164179104478, "grad_norm": 25.59912872314453, "learning_rate": 9.509920634920635e-06, "loss": 41.5307, "step": 4875 }, { "epoch": 116.0955223880597, "grad_norm": 31.361936569213867, "learning_rate": 9.507936507936508e-06, "loss": 41.9054, "step": 4876 }, { "epoch": 116.11940298507463, "grad_norm": 21.869449615478516, "learning_rate": 9.50595238095238e-06, "loss": 40.38, "step": 4877 }, { "epoch": 116.14328358208955, "grad_norm": 38.86557388305664, "learning_rate": 9.503968253968255e-06, "loss": 42.0518, "step": 4878 }, { "epoch": 116.16716417910447, "grad_norm": 31.712495803833008, "learning_rate": 9.501984126984128e-06, "loss": 40.2141, "step": 4879 }, { "epoch": 116.1910447761194, "grad_norm": 34.77455520629883, "learning_rate": 9.5e-06, "loss": 41.5116, "step": 4880 }, { "epoch": 116.21492537313434, "grad_norm": 28.530269622802734, "learning_rate": 9.498015873015873e-06, "loss": 40.6907, "step": 4881 }, { "epoch": 116.23880597014926, "grad_norm": 28.550081253051758, "learning_rate": 9.496031746031746e-06, "loss": 41.0168, "step": 4882 }, { "epoch": 116.26268656716418, "grad_norm": 28.081035614013672, "learning_rate": 9.494047619047619e-06, "loss": 42.3482, "step": 4883 }, { "epoch": 116.2865671641791, "grad_norm": 39.402713775634766, "learning_rate": 9.492063492063493e-06, "loss": 41.3423, "step": 4884 }, { "epoch": 116.31044776119403, "grad_norm": 30.37664794921875, "learning_rate": 9.490079365079366e-06, "loss": 41.0571, "step": 4885 }, { "epoch": 116.33432835820895, "grad_norm": 33.314979553222656, "learning_rate": 9.488095238095238e-06, "loss": 41.7844, "step": 4886 }, { "epoch": 116.35820895522389, "grad_norm": 31.91356658935547, "learning_rate": 9.486111111111111e-06, "loss": 42.6115, "step": 4887 }, { "epoch": 116.38208955223881, "grad_norm": 33.23076629638672, "learning_rate": 9.484126984126984e-06, "loss": 42.9912, "step": 4888 }, { "epoch": 116.40597014925373, "grad_norm": 33.23727798461914, "learning_rate": 9.482142857142858e-06, "loss": 40.2839, "step": 4889 }, { "epoch": 116.42985074626866, "grad_norm": 34.349090576171875, "learning_rate": 9.480158730158731e-06, "loss": 41.3853, "step": 4890 }, { "epoch": 116.45373134328358, "grad_norm": 28.603391647338867, "learning_rate": 9.478174603174604e-06, "loss": 41.8607, "step": 4891 }, { "epoch": 116.4776119402985, "grad_norm": 30.6513671875, "learning_rate": 9.476190476190476e-06, "loss": 40.6123, "step": 4892 }, { "epoch": 116.50149253731344, "grad_norm": 26.542037963867188, "learning_rate": 9.474206349206349e-06, "loss": 40.7056, "step": 4893 }, { "epoch": 116.52537313432836, "grad_norm": 33.709774017333984, "learning_rate": 9.472222222222223e-06, "loss": 41.8717, "step": 4894 }, { "epoch": 116.54925373134328, "grad_norm": 29.847158432006836, "learning_rate": 9.470238095238096e-06, "loss": 39.7896, "step": 4895 }, { "epoch": 116.57313432835821, "grad_norm": 29.366252899169922, "learning_rate": 9.468253968253969e-06, "loss": 40.6317, "step": 4896 }, { "epoch": 116.59701492537313, "grad_norm": 27.17310905456543, "learning_rate": 9.466269841269841e-06, "loss": 41.57, "step": 4897 }, { "epoch": 116.62089552238805, "grad_norm": 29.52984619140625, "learning_rate": 9.464285714285714e-06, "loss": 41.313, "step": 4898 }, { "epoch": 116.64477611940299, "grad_norm": 25.72901725769043, "learning_rate": 9.462301587301589e-06, "loss": 39.4479, "step": 4899 }, { "epoch": 116.66865671641791, "grad_norm": 36.030372619628906, "learning_rate": 9.460317460317461e-06, "loss": 41.6829, "step": 4900 }, { "epoch": 116.69253731343284, "grad_norm": 30.29513168334961, "learning_rate": 9.458333333333334e-06, "loss": 41.8183, "step": 4901 }, { "epoch": 116.71641791044776, "grad_norm": 28.564956665039062, "learning_rate": 9.456349206349207e-06, "loss": 41.1474, "step": 4902 }, { "epoch": 116.74029850746268, "grad_norm": 24.22428321838379, "learning_rate": 9.45436507936508e-06, "loss": 41.2769, "step": 4903 }, { "epoch": 116.7641791044776, "grad_norm": 27.916051864624023, "learning_rate": 9.452380952380952e-06, "loss": 40.8082, "step": 4904 }, { "epoch": 116.78805970149254, "grad_norm": 20.302335739135742, "learning_rate": 9.450396825396826e-06, "loss": 41.0273, "step": 4905 }, { "epoch": 116.81194029850747, "grad_norm": 32.881134033203125, "learning_rate": 9.4484126984127e-06, "loss": 41.9168, "step": 4906 }, { "epoch": 116.83582089552239, "grad_norm": 26.058923721313477, "learning_rate": 9.446428571428572e-06, "loss": 41.0683, "step": 4907 }, { "epoch": 116.85970149253731, "grad_norm": 34.14630889892578, "learning_rate": 9.444444444444445e-06, "loss": 40.9509, "step": 4908 }, { "epoch": 116.88358208955223, "grad_norm": 31.35688018798828, "learning_rate": 9.442460317460317e-06, "loss": 40.551, "step": 4909 }, { "epoch": 116.90746268656716, "grad_norm": 24.473339080810547, "learning_rate": 9.440476190476192e-06, "loss": 39.3649, "step": 4910 }, { "epoch": 116.9313432835821, "grad_norm": 21.814205169677734, "learning_rate": 9.438492063492064e-06, "loss": 40.4577, "step": 4911 }, { "epoch": 116.95522388059702, "grad_norm": 29.724409103393555, "learning_rate": 9.436507936507937e-06, "loss": 40.6152, "step": 4912 }, { "epoch": 116.97910447761194, "grad_norm": 24.086170196533203, "learning_rate": 9.43452380952381e-06, "loss": 41.106, "step": 4913 }, { "epoch": 117.0, "grad_norm": 28.476037979125977, "learning_rate": 9.432539682539682e-06, "loss": 36.447, "step": 4914 }, { "epoch": 117.02388059701492, "grad_norm": 27.55150032043457, "learning_rate": 9.430555555555557e-06, "loss": 41.9582, "step": 4915 }, { "epoch": 117.04776119402985, "grad_norm": 28.565845489501953, "learning_rate": 9.42857142857143e-06, "loss": 40.9572, "step": 4916 }, { "epoch": 117.07164179104478, "grad_norm": 24.59885025024414, "learning_rate": 9.426587301587302e-06, "loss": 41.2797, "step": 4917 }, { "epoch": 117.0955223880597, "grad_norm": 21.83265495300293, "learning_rate": 9.424603174603175e-06, "loss": 41.1726, "step": 4918 }, { "epoch": 117.11940298507463, "grad_norm": 21.117053985595703, "learning_rate": 9.422619047619048e-06, "loss": 42.4423, "step": 4919 }, { "epoch": 117.14328358208955, "grad_norm": 26.478992462158203, "learning_rate": 9.420634920634922e-06, "loss": 40.2709, "step": 4920 }, { "epoch": 117.16716417910447, "grad_norm": 20.61237335205078, "learning_rate": 9.418650793650795e-06, "loss": 40.8788, "step": 4921 }, { "epoch": 117.1910447761194, "grad_norm": 32.1706657409668, "learning_rate": 9.416666666666667e-06, "loss": 41.6381, "step": 4922 }, { "epoch": 117.21492537313434, "grad_norm": 26.040164947509766, "learning_rate": 9.41468253968254e-06, "loss": 40.662, "step": 4923 }, { "epoch": 117.23880597014926, "grad_norm": 27.465307235717773, "learning_rate": 9.412698412698413e-06, "loss": 39.2348, "step": 4924 }, { "epoch": 117.26268656716418, "grad_norm": 28.407739639282227, "learning_rate": 9.410714285714286e-06, "loss": 40.981, "step": 4925 }, { "epoch": 117.2865671641791, "grad_norm": 26.080398559570312, "learning_rate": 9.40873015873016e-06, "loss": 39.726, "step": 4926 }, { "epoch": 117.31044776119403, "grad_norm": 23.23761749267578, "learning_rate": 9.406746031746033e-06, "loss": 41.9898, "step": 4927 }, { "epoch": 117.33432835820895, "grad_norm": 25.763086318969727, "learning_rate": 9.404761904761905e-06, "loss": 41.6503, "step": 4928 }, { "epoch": 117.35820895522389, "grad_norm": 25.27565574645996, "learning_rate": 9.402777777777778e-06, "loss": 41.5848, "step": 4929 }, { "epoch": 117.38208955223881, "grad_norm": 21.535991668701172, "learning_rate": 9.40079365079365e-06, "loss": 41.4816, "step": 4930 }, { "epoch": 117.40597014925373, "grad_norm": 20.212120056152344, "learning_rate": 9.398809523809525e-06, "loss": 40.8427, "step": 4931 }, { "epoch": 117.42985074626866, "grad_norm": 24.479822158813477, "learning_rate": 9.396825396825398e-06, "loss": 41.3141, "step": 4932 }, { "epoch": 117.45373134328358, "grad_norm": 14.332042694091797, "learning_rate": 9.39484126984127e-06, "loss": 41.4974, "step": 4933 }, { "epoch": 117.4776119402985, "grad_norm": 22.84208869934082, "learning_rate": 9.392857142857143e-06, "loss": 41.8713, "step": 4934 }, { "epoch": 117.50149253731344, "grad_norm": 18.916187286376953, "learning_rate": 9.390873015873016e-06, "loss": 41.2954, "step": 4935 }, { "epoch": 117.52537313432836, "grad_norm": 22.096107482910156, "learning_rate": 9.38888888888889e-06, "loss": 40.7045, "step": 4936 }, { "epoch": 117.54925373134328, "grad_norm": 20.42098045349121, "learning_rate": 9.386904761904763e-06, "loss": 42.4039, "step": 4937 }, { "epoch": 117.57313432835821, "grad_norm": 19.17930793762207, "learning_rate": 9.384920634920636e-06, "loss": 41.2849, "step": 4938 }, { "epoch": 117.59701492537313, "grad_norm": 18.003908157348633, "learning_rate": 9.382936507936508e-06, "loss": 41.2694, "step": 4939 }, { "epoch": 117.62089552238805, "grad_norm": 21.67378044128418, "learning_rate": 9.380952380952381e-06, "loss": 41.4086, "step": 4940 }, { "epoch": 117.64477611940299, "grad_norm": 14.220067024230957, "learning_rate": 9.378968253968255e-06, "loss": 40.5293, "step": 4941 }, { "epoch": 117.66865671641791, "grad_norm": 17.12972640991211, "learning_rate": 9.376984126984128e-06, "loss": 40.7469, "step": 4942 }, { "epoch": 117.69253731343284, "grad_norm": 21.055694580078125, "learning_rate": 9.375000000000001e-06, "loss": 39.6643, "step": 4943 }, { "epoch": 117.71641791044776, "grad_norm": 17.032026290893555, "learning_rate": 9.373015873015874e-06, "loss": 39.6835, "step": 4944 }, { "epoch": 117.74029850746268, "grad_norm": 22.909225463867188, "learning_rate": 9.371031746031746e-06, "loss": 41.411, "step": 4945 }, { "epoch": 117.7641791044776, "grad_norm": 15.6399564743042, "learning_rate": 9.36904761904762e-06, "loss": 41.0838, "step": 4946 }, { "epoch": 117.78805970149254, "grad_norm": 22.99868392944336, "learning_rate": 9.367063492063493e-06, "loss": 41.1988, "step": 4947 }, { "epoch": 117.81194029850747, "grad_norm": 19.78955841064453, "learning_rate": 9.365079365079366e-06, "loss": 41.5181, "step": 4948 }, { "epoch": 117.83582089552239, "grad_norm": 21.281328201293945, "learning_rate": 9.363095238095239e-06, "loss": 40.5115, "step": 4949 }, { "epoch": 117.85970149253731, "grad_norm": 19.100648880004883, "learning_rate": 9.361111111111111e-06, "loss": 40.3604, "step": 4950 }, { "epoch": 117.88358208955223, "grad_norm": 24.486183166503906, "learning_rate": 9.359126984126984e-06, "loss": 42.065, "step": 4951 }, { "epoch": 117.90746268656716, "grad_norm": 20.265453338623047, "learning_rate": 9.357142857142859e-06, "loss": 42.1137, "step": 4952 }, { "epoch": 117.9313432835821, "grad_norm": 21.281848907470703, "learning_rate": 9.355158730158731e-06, "loss": 42.0899, "step": 4953 }, { "epoch": 117.95522388059702, "grad_norm": 21.65452766418457, "learning_rate": 9.353174603174604e-06, "loss": 41.4076, "step": 4954 }, { "epoch": 117.97910447761194, "grad_norm": 19.85662841796875, "learning_rate": 9.351190476190477e-06, "loss": 40.9143, "step": 4955 }, { "epoch": 118.0, "grad_norm": 16.60548210144043, "learning_rate": 9.34920634920635e-06, "loss": 35.2268, "step": 4956 }, { "epoch": 118.02388059701492, "grad_norm": 19.02985382080078, "learning_rate": 9.347222222222224e-06, "loss": 41.6227, "step": 4957 }, { "epoch": 118.04776119402985, "grad_norm": 20.057069778442383, "learning_rate": 9.345238095238096e-06, "loss": 39.6729, "step": 4958 }, { "epoch": 118.07164179104478, "grad_norm": 16.330196380615234, "learning_rate": 9.343253968253969e-06, "loss": 41.2542, "step": 4959 }, { "epoch": 118.0955223880597, "grad_norm": 18.172393798828125, "learning_rate": 9.341269841269842e-06, "loss": 40.0607, "step": 4960 }, { "epoch": 118.11940298507463, "grad_norm": 20.96540069580078, "learning_rate": 9.339285714285715e-06, "loss": 40.585, "step": 4961 }, { "epoch": 118.14328358208955, "grad_norm": 14.967394828796387, "learning_rate": 9.337301587301589e-06, "loss": 40.2613, "step": 4962 }, { "epoch": 118.16716417910447, "grad_norm": 18.953601837158203, "learning_rate": 9.335317460317462e-06, "loss": 39.38, "step": 4963 }, { "epoch": 118.1910447761194, "grad_norm": 15.904739379882812, "learning_rate": 9.333333333333334e-06, "loss": 41.6314, "step": 4964 }, { "epoch": 118.21492537313434, "grad_norm": 24.293170928955078, "learning_rate": 9.331349206349207e-06, "loss": 40.5077, "step": 4965 }, { "epoch": 118.23880597014926, "grad_norm": 20.04494857788086, "learning_rate": 9.32936507936508e-06, "loss": 40.8951, "step": 4966 }, { "epoch": 118.26268656716418, "grad_norm": 23.613727569580078, "learning_rate": 9.327380952380954e-06, "loss": 42.0233, "step": 4967 }, { "epoch": 118.2865671641791, "grad_norm": 23.967741012573242, "learning_rate": 9.325396825396827e-06, "loss": 41.0547, "step": 4968 }, { "epoch": 118.31044776119403, "grad_norm": 19.54030418395996, "learning_rate": 9.3234126984127e-06, "loss": 41.2887, "step": 4969 }, { "epoch": 118.33432835820895, "grad_norm": 23.12442398071289, "learning_rate": 9.321428571428572e-06, "loss": 40.5083, "step": 4970 }, { "epoch": 118.35820895522389, "grad_norm": 21.34069061279297, "learning_rate": 9.319444444444445e-06, "loss": 41.3474, "step": 4971 }, { "epoch": 118.38208955223881, "grad_norm": 20.411256790161133, "learning_rate": 9.317460317460318e-06, "loss": 40.3927, "step": 4972 }, { "epoch": 118.40597014925373, "grad_norm": 21.702983856201172, "learning_rate": 9.315476190476192e-06, "loss": 41.2522, "step": 4973 }, { "epoch": 118.42985074626866, "grad_norm": 20.09593963623047, "learning_rate": 9.313492063492065e-06, "loss": 40.8607, "step": 4974 }, { "epoch": 118.45373134328358, "grad_norm": 16.693893432617188, "learning_rate": 9.311507936507937e-06, "loss": 41.9847, "step": 4975 }, { "epoch": 118.4776119402985, "grad_norm": 16.682085037231445, "learning_rate": 9.30952380952381e-06, "loss": 41.3428, "step": 4976 }, { "epoch": 118.50149253731344, "grad_norm": 16.73056983947754, "learning_rate": 9.307539682539683e-06, "loss": 40.8279, "step": 4977 }, { "epoch": 118.52537313432836, "grad_norm": 16.317480087280273, "learning_rate": 9.305555555555557e-06, "loss": 40.4602, "step": 4978 }, { "epoch": 118.54925373134328, "grad_norm": 15.660470008850098, "learning_rate": 9.30357142857143e-06, "loss": 40.7565, "step": 4979 }, { "epoch": 118.57313432835821, "grad_norm": 21.601036071777344, "learning_rate": 9.301587301587303e-06, "loss": 41.7317, "step": 4980 }, { "epoch": 118.59701492537313, "grad_norm": 16.545438766479492, "learning_rate": 9.299603174603175e-06, "loss": 42.1659, "step": 4981 }, { "epoch": 118.62089552238805, "grad_norm": 20.3563175201416, "learning_rate": 9.297619047619048e-06, "loss": 39.8948, "step": 4982 }, { "epoch": 118.64477611940299, "grad_norm": 19.03108024597168, "learning_rate": 9.295634920634922e-06, "loss": 40.6225, "step": 4983 }, { "epoch": 118.66865671641791, "grad_norm": 18.866544723510742, "learning_rate": 9.293650793650795e-06, "loss": 40.781, "step": 4984 }, { "epoch": 118.69253731343284, "grad_norm": 18.367883682250977, "learning_rate": 9.291666666666668e-06, "loss": 42.1775, "step": 4985 }, { "epoch": 118.71641791044776, "grad_norm": 17.574983596801758, "learning_rate": 9.28968253968254e-06, "loss": 40.7228, "step": 4986 }, { "epoch": 118.74029850746268, "grad_norm": 17.931612014770508, "learning_rate": 9.287698412698413e-06, "loss": 41.352, "step": 4987 }, { "epoch": 118.7641791044776, "grad_norm": NaN, "learning_rate": 9.285714285714288e-06, "loss": 37.2747, "step": 4988 }, { "epoch": 118.78805970149254, "grad_norm": 19.131587982177734, "learning_rate": 9.285714285714288e-06, "loss": 41.442, "step": 4989 }, { "epoch": 118.81194029850747, "grad_norm": 19.01002311706543, "learning_rate": 9.28373015873016e-06, "loss": 40.1583, "step": 4990 }, { "epoch": 118.83582089552239, "grad_norm": 20.718921661376953, "learning_rate": 9.281746031746033e-06, "loss": 42.1721, "step": 4991 }, { "epoch": 118.85970149253731, "grad_norm": 24.149545669555664, "learning_rate": 9.279761904761906e-06, "loss": 39.6434, "step": 4992 }, { "epoch": 118.88358208955223, "grad_norm": 19.575162887573242, "learning_rate": 9.277777777777778e-06, "loss": 41.7524, "step": 4993 }, { "epoch": 118.90746268656716, "grad_norm": 21.472047805786133, "learning_rate": 9.275793650793653e-06, "loss": 41.5381, "step": 4994 }, { "epoch": 118.9313432835821, "grad_norm": 18.96376609802246, "learning_rate": 9.273809523809525e-06, "loss": 41.8712, "step": 4995 }, { "epoch": 118.95522388059702, "grad_norm": 20.816585540771484, "learning_rate": 9.271825396825398e-06, "loss": 42.7263, "step": 4996 }, { "epoch": 118.97910447761194, "grad_norm": 18.856704711914062, "learning_rate": 9.26984126984127e-06, "loss": 42.2396, "step": 4997 }, { "epoch": 119.0, "grad_norm": 17.700910568237305, "learning_rate": 9.267857142857144e-06, "loss": 35.0377, "step": 4998 }, { "epoch": 119.02388059701492, "grad_norm": 18.852880477905273, "learning_rate": 9.265873015873016e-06, "loss": 40.1171, "step": 4999 }, { "epoch": 119.04776119402985, "grad_norm": 17.4823055267334, "learning_rate": 9.26388888888889e-06, "loss": 39.7783, "step": 5000 }, { "epoch": 119.07164179104478, "grad_norm": 22.45401954650879, "learning_rate": 9.261904761904763e-06, "loss": 41.6926, "step": 5001 }, { "epoch": 119.0955223880597, "grad_norm": 19.38802719116211, "learning_rate": 9.259920634920636e-06, "loss": 41.0149, "step": 5002 }, { "epoch": 119.11940298507463, "grad_norm": 18.921022415161133, "learning_rate": 9.257936507936509e-06, "loss": 41.2486, "step": 5003 }, { "epoch": 119.14328358208955, "grad_norm": 22.00980567932129, "learning_rate": 9.255952380952381e-06, "loss": 40.8794, "step": 5004 }, { "epoch": 119.16716417910447, "grad_norm": 13.831929206848145, "learning_rate": 9.253968253968256e-06, "loss": 40.3292, "step": 5005 }, { "epoch": 119.1910447761194, "grad_norm": 20.504989624023438, "learning_rate": 9.251984126984129e-06, "loss": 41.5119, "step": 5006 }, { "epoch": 119.21492537313434, "grad_norm": 15.127291679382324, "learning_rate": 9.250000000000001e-06, "loss": 40.214, "step": 5007 }, { "epoch": 119.23880597014926, "grad_norm": 18.562606811523438, "learning_rate": 9.248015873015874e-06, "loss": 41.0757, "step": 5008 }, { "epoch": 119.26268656716418, "grad_norm": 20.99079132080078, "learning_rate": 9.246031746031747e-06, "loss": 41.3658, "step": 5009 }, { "epoch": 119.2865671641791, "grad_norm": 17.714588165283203, "learning_rate": 9.244047619047621e-06, "loss": 41.8379, "step": 5010 }, { "epoch": 119.31044776119403, "grad_norm": 20.95669174194336, "learning_rate": 9.242063492063494e-06, "loss": 40.6619, "step": 5011 }, { "epoch": 119.33432835820895, "grad_norm": 18.291975021362305, "learning_rate": 9.240079365079366e-06, "loss": 38.9992, "step": 5012 }, { "epoch": 119.35820895522389, "grad_norm": 14.831878662109375, "learning_rate": 9.238095238095239e-06, "loss": 41.5072, "step": 5013 }, { "epoch": 119.38208955223881, "grad_norm": 17.76835823059082, "learning_rate": 9.236111111111112e-06, "loss": 41.0227, "step": 5014 }, { "epoch": 119.40597014925373, "grad_norm": 15.433774948120117, "learning_rate": 9.234126984126986e-06, "loss": 40.4539, "step": 5015 }, { "epoch": 119.42985074626866, "grad_norm": 23.18012237548828, "learning_rate": 9.232142857142859e-06, "loss": 41.8991, "step": 5016 }, { "epoch": 119.45373134328358, "grad_norm": 17.35015106201172, "learning_rate": 9.230158730158732e-06, "loss": 40.189, "step": 5017 }, { "epoch": 119.4776119402985, "grad_norm": 19.60420036315918, "learning_rate": 9.228174603174604e-06, "loss": 41.602, "step": 5018 }, { "epoch": 119.50149253731344, "grad_norm": 20.470211029052734, "learning_rate": 9.226190476190477e-06, "loss": 42.1062, "step": 5019 }, { "epoch": 119.52537313432836, "grad_norm": 16.949901580810547, "learning_rate": 9.22420634920635e-06, "loss": 41.6508, "step": 5020 }, { "epoch": 119.54925373134328, "grad_norm": 22.598966598510742, "learning_rate": 9.222222222222224e-06, "loss": 39.9819, "step": 5021 }, { "epoch": 119.57313432835821, "grad_norm": 16.502370834350586, "learning_rate": 9.220238095238097e-06, "loss": 40.1142, "step": 5022 }, { "epoch": 119.59701492537313, "grad_norm": 20.456647872924805, "learning_rate": 9.218253968253968e-06, "loss": 41.6525, "step": 5023 }, { "epoch": 119.62089552238805, "grad_norm": 18.311965942382812, "learning_rate": 9.216269841269842e-06, "loss": 41.1592, "step": 5024 }, { "epoch": 119.64477611940299, "grad_norm": 19.683259963989258, "learning_rate": 9.214285714285715e-06, "loss": 40.853, "step": 5025 }, { "epoch": 119.66865671641791, "grad_norm": 20.134082794189453, "learning_rate": 9.212301587301588e-06, "loss": 40.3045, "step": 5026 }, { "epoch": 119.69253731343284, "grad_norm": 28.281267166137695, "learning_rate": 9.21031746031746e-06, "loss": 41.6703, "step": 5027 }, { "epoch": 119.71641791044776, "grad_norm": 22.25422477722168, "learning_rate": 9.208333333333333e-06, "loss": 41.0012, "step": 5028 }, { "epoch": 119.74029850746268, "grad_norm": 15.698911666870117, "learning_rate": 9.206349206349207e-06, "loss": 39.3874, "step": 5029 }, { "epoch": 119.7641791044776, "grad_norm": 22.822614669799805, "learning_rate": 9.20436507936508e-06, "loss": 42.7782, "step": 5030 }, { "epoch": 119.78805970149254, "grad_norm": 18.489330291748047, "learning_rate": 9.202380952380953e-06, "loss": 42.2175, "step": 5031 }, { "epoch": 119.81194029850747, "grad_norm": 23.18742561340332, "learning_rate": 9.200396825396825e-06, "loss": 42.1583, "step": 5032 }, { "epoch": 119.83582089552239, "grad_norm": 24.11537742614746, "learning_rate": 9.198412698412698e-06, "loss": 40.783, "step": 5033 }, { "epoch": 119.85970149253731, "grad_norm": 16.897441864013672, "learning_rate": 9.196428571428571e-06, "loss": 40.3459, "step": 5034 }, { "epoch": 119.88358208955223, "grad_norm": 20.22298812866211, "learning_rate": 9.194444444444445e-06, "loss": 40.9984, "step": 5035 }, { "epoch": 119.90746268656716, "grad_norm": 19.373756408691406, "learning_rate": 9.192460317460318e-06, "loss": 41.8363, "step": 5036 }, { "epoch": 119.9313432835821, "grad_norm": 16.265701293945312, "learning_rate": 9.19047619047619e-06, "loss": 40.9217, "step": 5037 }, { "epoch": 119.95522388059702, "grad_norm": 28.902698516845703, "learning_rate": 9.188492063492063e-06, "loss": 41.7966, "step": 5038 }, { "epoch": 119.97910447761194, "grad_norm": 19.491430282592773, "learning_rate": 9.186507936507936e-06, "loss": 41.2973, "step": 5039 }, { "epoch": 120.0, "grad_norm": 25.749500274658203, "learning_rate": 9.18452380952381e-06, "loss": 35.3125, "step": 5040 }, { "epoch": 120.0, "step": 5040, "total_flos": 2.4776207925060864e+17, "train_loss": 3.4518184624021013, "train_runtime": 12809.9419, "train_samples_per_second": 50.136, "train_steps_per_second": 0.393 }, { "epoch": 120.02388059701492, "grad_norm": 24.0944766998291, "learning_rate": 1e-05, "loss": 41.0597, "step": 5041 }, { "epoch": 120.04776119402985, "grad_norm": Infinity, "learning_rate": 9.998168498168499e-06, "loss": 46.3783, "step": 5042 }, { "epoch": 120.07164179104478, "grad_norm": 259.0445861816406, "learning_rate": 9.998168498168499e-06, "loss": 46.5108, "step": 5043 }, { "epoch": 120.0955223880597, "grad_norm": 128.19775390625, "learning_rate": 9.996336996336997e-06, "loss": 45.0948, "step": 5044 }, { "epoch": 120.11940298507463, "grad_norm": 58.83436584472656, "learning_rate": 9.994505494505496e-06, "loss": 43.1635, "step": 5045 }, { "epoch": 120.14328358208955, "grad_norm": 58.79975891113281, "learning_rate": 9.992673992673994e-06, "loss": 41.6829, "step": 5046 }, { "epoch": 120.16716417910447, "grad_norm": 50.534278869628906, "learning_rate": 9.990842490842492e-06, "loss": 42.3871, "step": 5047 }, { "epoch": 120.1910447761194, "grad_norm": 38.682125091552734, "learning_rate": 9.98901098901099e-06, "loss": 40.9709, "step": 5048 }, { "epoch": 120.21492537313434, "grad_norm": 35.06442642211914, "learning_rate": 9.987179487179488e-06, "loss": 41.0217, "step": 5049 }, { "epoch": 120.23880597014926, "grad_norm": 59.00712585449219, "learning_rate": 9.985347985347986e-06, "loss": 41.7985, "step": 5050 }, { "epoch": 120.26268656716418, "grad_norm": 36.52231216430664, "learning_rate": 9.983516483516485e-06, "loss": 41.6886, "step": 5051 }, { "epoch": 120.2865671641791, "grad_norm": 35.213436126708984, "learning_rate": 9.981684981684983e-06, "loss": 40.9909, "step": 5052 }, { "epoch": 120.31044776119403, "grad_norm": 40.0443000793457, "learning_rate": 9.97985347985348e-06, "loss": 41.1657, "step": 5053 }, { "epoch": 120.33432835820895, "grad_norm": 27.66771697998047, "learning_rate": 9.978021978021979e-06, "loss": 41.327, "step": 5054 }, { "epoch": 120.35820895522389, "grad_norm": 34.4952507019043, "learning_rate": 9.976190476190477e-06, "loss": 40.8086, "step": 5055 }, { "epoch": 120.38208955223881, "grad_norm": 26.404708862304688, "learning_rate": 9.974358974358974e-06, "loss": 41.0862, "step": 5056 }, { "epoch": 120.40597014925373, "grad_norm": 24.669050216674805, "learning_rate": 9.972527472527474e-06, "loss": 40.6639, "step": 5057 }, { "epoch": 120.42985074626866, "grad_norm": 29.60878562927246, "learning_rate": 9.970695970695972e-06, "loss": 40.8127, "step": 5058 }, { "epoch": 120.45373134328358, "grad_norm": 17.245283126831055, "learning_rate": 9.96886446886447e-06, "loss": 41.6983, "step": 5059 }, { "epoch": 120.4776119402985, "grad_norm": 26.338546752929688, "learning_rate": 9.967032967032968e-06, "loss": 40.5917, "step": 5060 }, { "epoch": 120.50149253731344, "grad_norm": 25.838808059692383, "learning_rate": 9.965201465201466e-06, "loss": 41.6386, "step": 5061 }, { "epoch": 120.52537313432836, "grad_norm": 17.583539962768555, "learning_rate": 9.963369963369965e-06, "loss": 39.5372, "step": 5062 }, { "epoch": 120.54925373134328, "grad_norm": 29.433382034301758, "learning_rate": 9.961538461538463e-06, "loss": 41.2372, "step": 5063 }, { "epoch": 120.57313432835821, "grad_norm": 19.41893768310547, "learning_rate": 9.959706959706961e-06, "loss": 41.2464, "step": 5064 }, { "epoch": 120.59701492537313, "grad_norm": 20.060937881469727, "learning_rate": 9.957875457875459e-06, "loss": 41.1316, "step": 5065 }, { "epoch": 120.62089552238805, "grad_norm": 21.93149185180664, "learning_rate": 9.956043956043957e-06, "loss": 40.6738, "step": 5066 }, { "epoch": 120.64477611940299, "grad_norm": 20.02782440185547, "learning_rate": 9.954212454212454e-06, "loss": 41.0332, "step": 5067 }, { "epoch": 120.66865671641791, "grad_norm": 16.836517333984375, "learning_rate": 9.952380952380954e-06, "loss": 41.8322, "step": 5068 }, { "epoch": 120.69253731343284, "grad_norm": 19.467927932739258, "learning_rate": 9.950549450549452e-06, "loss": 42.0419, "step": 5069 }, { "epoch": 120.71641791044776, "grad_norm": 20.398895263671875, "learning_rate": 9.94871794871795e-06, "loss": 40.1522, "step": 5070 }, { "epoch": 120.74029850746268, "grad_norm": 17.445634841918945, "learning_rate": 9.946886446886448e-06, "loss": 41.1946, "step": 5071 }, { "epoch": 120.7641791044776, "grad_norm": 17.94610595703125, "learning_rate": 9.945054945054946e-06, "loss": 41.4025, "step": 5072 }, { "epoch": 120.78805970149254, "grad_norm": 25.02172088623047, "learning_rate": 9.943223443223443e-06, "loss": 42.0855, "step": 5073 }, { "epoch": 120.81194029850747, "grad_norm": 16.557662963867188, "learning_rate": 9.941391941391943e-06, "loss": 39.8862, "step": 5074 }, { "epoch": 120.83582089552239, "grad_norm": 19.688400268554688, "learning_rate": 9.939560439560441e-06, "loss": 40.9361, "step": 5075 }, { "epoch": 120.85970149253731, "grad_norm": 29.196117401123047, "learning_rate": 9.937728937728939e-06, "loss": 42.8812, "step": 5076 }, { "epoch": 120.88358208955223, "grad_norm": 17.111480712890625, "learning_rate": 9.935897435897437e-06, "loss": 41.4032, "step": 5077 }, { "epoch": 120.90746268656716, "grad_norm": 29.072128295898438, "learning_rate": 9.934065934065935e-06, "loss": 42.2839, "step": 5078 }, { "epoch": 120.9313432835821, "grad_norm": 24.953367233276367, "learning_rate": 9.932234432234434e-06, "loss": 41.5165, "step": 5079 }, { "epoch": 120.95522388059702, "grad_norm": 19.515911102294922, "learning_rate": 9.930402930402932e-06, "loss": 40.4111, "step": 5080 }, { "epoch": 120.97910447761194, "grad_norm": 23.281414031982422, "learning_rate": 9.92857142857143e-06, "loss": 40.3576, "step": 5081 }, { "epoch": 121.0, "grad_norm": 16.75458335876465, "learning_rate": 9.926739926739928e-06, "loss": 36.3203, "step": 5082 }, { "epoch": 121.02388059701492, "grad_norm": 29.20741844177246, "learning_rate": 9.924908424908426e-06, "loss": 39.9303, "step": 5083 }, { "epoch": 121.04776119402985, "grad_norm": 21.79246711730957, "learning_rate": 9.923076923076923e-06, "loss": 41.4785, "step": 5084 }, { "epoch": 121.07164179104478, "grad_norm": 29.117504119873047, "learning_rate": 9.921245421245423e-06, "loss": 41.8695, "step": 5085 }, { "epoch": 121.0955223880597, "grad_norm": 17.819120407104492, "learning_rate": 9.919413919413921e-06, "loss": 39.2762, "step": 5086 }, { "epoch": 121.11940298507463, "grad_norm": 24.556377410888672, "learning_rate": 9.917582417582419e-06, "loss": 41.5134, "step": 5087 }, { "epoch": 121.14328358208955, "grad_norm": 19.049671173095703, "learning_rate": 9.915750915750917e-06, "loss": 40.8369, "step": 5088 }, { "epoch": 121.16716417910447, "grad_norm": 20.745899200439453, "learning_rate": 9.913919413919415e-06, "loss": 41.4137, "step": 5089 }, { "epoch": 121.1910447761194, "grad_norm": 21.53566551208496, "learning_rate": 9.912087912087912e-06, "loss": 40.3688, "step": 5090 }, { "epoch": 121.21492537313434, "grad_norm": 23.52694320678711, "learning_rate": 9.910256410256412e-06, "loss": 41.1741, "step": 5091 }, { "epoch": 121.23880597014926, "grad_norm": 19.23663330078125, "learning_rate": 9.90842490842491e-06, "loss": 41.2629, "step": 5092 }, { "epoch": 121.26268656716418, "grad_norm": 20.38791847229004, "learning_rate": 9.906593406593408e-06, "loss": 40.6994, "step": 5093 }, { "epoch": 121.2865671641791, "grad_norm": 29.10164451599121, "learning_rate": 9.904761904761906e-06, "loss": 41.7159, "step": 5094 }, { "epoch": 121.31044776119403, "grad_norm": 18.191295623779297, "learning_rate": 9.902930402930403e-06, "loss": 40.0695, "step": 5095 }, { "epoch": 121.33432835820895, "grad_norm": 34.14667510986328, "learning_rate": 9.901098901098903e-06, "loss": 40.7836, "step": 5096 }, { "epoch": 121.35820895522389, "grad_norm": 25.464981079101562, "learning_rate": 9.899267399267401e-06, "loss": 40.5731, "step": 5097 }, { "epoch": 121.38208955223881, "grad_norm": 34.738773345947266, "learning_rate": 9.897435897435899e-06, "loss": 42.5079, "step": 5098 }, { "epoch": 121.40597014925373, "grad_norm": 24.047697067260742, "learning_rate": 9.895604395604397e-06, "loss": 41.9274, "step": 5099 }, { "epoch": 121.42985074626866, "grad_norm": 36.788326263427734, "learning_rate": 9.893772893772895e-06, "loss": 41.1378, "step": 5100 }, { "epoch": 121.45373134328358, "grad_norm": 26.662019729614258, "learning_rate": 9.891941391941392e-06, "loss": 41.4065, "step": 5101 }, { "epoch": 121.4776119402985, "grad_norm": 35.20701217651367, "learning_rate": 9.890109890109892e-06, "loss": 39.1299, "step": 5102 }, { "epoch": 121.50149253731344, "grad_norm": 29.675378799438477, "learning_rate": 9.88827838827839e-06, "loss": 41.0234, "step": 5103 }, { "epoch": 121.52537313432836, "grad_norm": 34.06852722167969, "learning_rate": 9.886446886446888e-06, "loss": 41.8632, "step": 5104 }, { "epoch": 121.54925373134328, "grad_norm": 25.621753692626953, "learning_rate": 9.884615384615386e-06, "loss": 40.9295, "step": 5105 }, { "epoch": 121.57313432835821, "grad_norm": 27.804433822631836, "learning_rate": 9.882783882783884e-06, "loss": 40.0458, "step": 5106 }, { "epoch": 121.59701492537313, "grad_norm": 26.332223892211914, "learning_rate": 9.880952380952381e-06, "loss": 39.7798, "step": 5107 }, { "epoch": 121.62089552238805, "grad_norm": 29.49053192138672, "learning_rate": 9.879120879120881e-06, "loss": 42.0289, "step": 5108 }, { "epoch": 121.64477611940299, "grad_norm": 24.052976608276367, "learning_rate": 9.877289377289379e-06, "loss": 40.5861, "step": 5109 }, { "epoch": 121.66865671641791, "grad_norm": 23.03173828125, "learning_rate": 9.875457875457877e-06, "loss": 40.9261, "step": 5110 }, { "epoch": 121.69253731343284, "grad_norm": 24.134889602661133, "learning_rate": 9.873626373626375e-06, "loss": 41.0466, "step": 5111 }, { "epoch": 121.71641791044776, "grad_norm": 19.443124771118164, "learning_rate": 9.871794871794872e-06, "loss": 40.4331, "step": 5112 }, { "epoch": 121.74029850746268, "grad_norm": 31.88178825378418, "learning_rate": 9.869963369963372e-06, "loss": 40.6991, "step": 5113 }, { "epoch": 121.7641791044776, "grad_norm": 21.850631713867188, "learning_rate": 9.86813186813187e-06, "loss": 41.4331, "step": 5114 }, { "epoch": 121.78805970149254, "grad_norm": 37.39925765991211, "learning_rate": 9.866300366300368e-06, "loss": 40.9437, "step": 5115 }, { "epoch": 121.81194029850747, "grad_norm": 31.58283042907715, "learning_rate": 9.864468864468866e-06, "loss": 41.0558, "step": 5116 }, { "epoch": 121.83582089552239, "grad_norm": 29.965499877929688, "learning_rate": 9.862637362637364e-06, "loss": 39.5632, "step": 5117 }, { "epoch": 121.85970149253731, "grad_norm": 25.50206756591797, "learning_rate": 9.860805860805861e-06, "loss": 41.287, "step": 5118 }, { "epoch": 121.88358208955223, "grad_norm": 34.806034088134766, "learning_rate": 9.858974358974361e-06, "loss": 41.0144, "step": 5119 }, { "epoch": 121.90746268656716, "grad_norm": 21.66145133972168, "learning_rate": 9.857142857142859e-06, "loss": 41.2587, "step": 5120 }, { "epoch": 121.9313432835821, "grad_norm": 37.883094787597656, "learning_rate": 9.855311355311357e-06, "loss": 40.7321, "step": 5121 }, { "epoch": 121.95522388059702, "grad_norm": 28.472124099731445, "learning_rate": 9.853479853479855e-06, "loss": 41.5554, "step": 5122 }, { "epoch": 121.97910447761194, "grad_norm": 35.33477783203125, "learning_rate": 9.851648351648352e-06, "loss": 42.0246, "step": 5123 }, { "epoch": 122.0, "grad_norm": 27.911645889282227, "learning_rate": 9.84981684981685e-06, "loss": 35.3824, "step": 5124 }, { "epoch": 122.02388059701492, "grad_norm": 33.792213439941406, "learning_rate": 9.84798534798535e-06, "loss": 40.2451, "step": 5125 }, { "epoch": 122.04776119402985, "grad_norm": 33.73054885864258, "learning_rate": 9.846153846153848e-06, "loss": 41.5777, "step": 5126 }, { "epoch": 122.07164179104478, "grad_norm": 29.55936622619629, "learning_rate": 9.844322344322346e-06, "loss": 40.5313, "step": 5127 }, { "epoch": 122.0955223880597, "grad_norm": 21.786413192749023, "learning_rate": 9.842490842490844e-06, "loss": 41.795, "step": 5128 }, { "epoch": 122.11940298507463, "grad_norm": 38.503475189208984, "learning_rate": 9.840659340659341e-06, "loss": 40.2868, "step": 5129 }, { "epoch": 122.14328358208955, "grad_norm": 27.126779556274414, "learning_rate": 9.83882783882784e-06, "loss": 40.5464, "step": 5130 }, { "epoch": 122.16716417910447, "grad_norm": 34.76428985595703, "learning_rate": 9.836996336996337e-06, "loss": 40.2589, "step": 5131 }, { "epoch": 122.1910447761194, "grad_norm": 37.37604522705078, "learning_rate": 9.835164835164835e-06, "loss": 40.8401, "step": 5132 }, { "epoch": 122.21492537313434, "grad_norm": 29.67528533935547, "learning_rate": 9.833333333333333e-06, "loss": 40.9921, "step": 5133 }, { "epoch": 122.23880597014926, "grad_norm": 27.43715476989746, "learning_rate": 9.831501831501832e-06, "loss": 40.4038, "step": 5134 }, { "epoch": 122.26268656716418, "grad_norm": 30.960216522216797, "learning_rate": 9.82967032967033e-06, "loss": 39.8886, "step": 5135 }, { "epoch": 122.2865671641791, "grad_norm": 27.186513900756836, "learning_rate": 9.827838827838828e-06, "loss": 42.1122, "step": 5136 }, { "epoch": 122.31044776119403, "grad_norm": 32.01823806762695, "learning_rate": 9.826007326007326e-06, "loss": 40.7854, "step": 5137 }, { "epoch": 122.33432835820895, "grad_norm": 26.988773345947266, "learning_rate": 9.824175824175824e-06, "loss": 40.5902, "step": 5138 }, { "epoch": 122.35820895522389, "grad_norm": 29.70166778564453, "learning_rate": 9.822344322344322e-06, "loss": 41.4538, "step": 5139 }, { "epoch": 122.38208955223881, "grad_norm": 25.9971981048584, "learning_rate": 9.820512820512821e-06, "loss": 39.6575, "step": 5140 }, { "epoch": 122.40597014925373, "grad_norm": 33.1441535949707, "learning_rate": 9.81868131868132e-06, "loss": 40.0902, "step": 5141 }, { "epoch": 122.42985074626866, "grad_norm": 27.196630477905273, "learning_rate": 9.816849816849817e-06, "loss": 40.0376, "step": 5142 }, { "epoch": 122.45373134328358, "grad_norm": 34.561798095703125, "learning_rate": 9.815018315018315e-06, "loss": 41.6209, "step": 5143 }, { "epoch": 122.4776119402985, "grad_norm": 33.98078155517578, "learning_rate": 9.813186813186813e-06, "loss": 40.8931, "step": 5144 }, { "epoch": 122.50149253731344, "grad_norm": 29.115427017211914, "learning_rate": 9.811355311355313e-06, "loss": 41.4718, "step": 5145 }, { "epoch": 122.52537313432836, "grad_norm": 24.698219299316406, "learning_rate": 9.80952380952381e-06, "loss": 40.2337, "step": 5146 }, { "epoch": 122.54925373134328, "grad_norm": 32.09329605102539, "learning_rate": 9.807692307692308e-06, "loss": 40.1893, "step": 5147 }, { "epoch": 122.57313432835821, "grad_norm": 28.50708770751953, "learning_rate": 9.805860805860806e-06, "loss": 41.2457, "step": 5148 }, { "epoch": 122.59701492537313, "grad_norm": 34.65631103515625, "learning_rate": 9.804029304029304e-06, "loss": 40.8311, "step": 5149 }, { "epoch": 122.62089552238805, "grad_norm": 27.82625961303711, "learning_rate": 9.802197802197802e-06, "loss": 40.3574, "step": 5150 }, { "epoch": 122.64477611940299, "grad_norm": 31.24656105041504, "learning_rate": 9.800366300366301e-06, "loss": 40.999, "step": 5151 }, { "epoch": 122.66865671641791, "grad_norm": 26.075342178344727, "learning_rate": 9.7985347985348e-06, "loss": 41.6763, "step": 5152 }, { "epoch": 122.69253731343284, "grad_norm": 28.61420440673828, "learning_rate": 9.796703296703297e-06, "loss": 41.1096, "step": 5153 }, { "epoch": 122.71641791044776, "grad_norm": 24.201374053955078, "learning_rate": 9.794871794871795e-06, "loss": 41.7294, "step": 5154 }, { "epoch": 122.74029850746268, "grad_norm": 33.25908660888672, "learning_rate": 9.793040293040293e-06, "loss": 41.0633, "step": 5155 }, { "epoch": 122.7641791044776, "grad_norm": 28.24220848083496, "learning_rate": 9.79120879120879e-06, "loss": 42.0281, "step": 5156 }, { "epoch": 122.78805970149254, "grad_norm": 34.96881103515625, "learning_rate": 9.78937728937729e-06, "loss": 40.648, "step": 5157 }, { "epoch": 122.81194029850747, "grad_norm": 29.03910255432129, "learning_rate": 9.787545787545788e-06, "loss": 41.1215, "step": 5158 }, { "epoch": 122.83582089552239, "grad_norm": 30.120044708251953, "learning_rate": 9.785714285714286e-06, "loss": 41.7353, "step": 5159 }, { "epoch": 122.85970149253731, "grad_norm": 30.23310661315918, "learning_rate": 9.783882783882784e-06, "loss": 40.7885, "step": 5160 }, { "epoch": 122.88358208955223, "grad_norm": 29.74199104309082, "learning_rate": 9.782051282051282e-06, "loss": 41.4646, "step": 5161 }, { "epoch": 122.90746268656716, "grad_norm": 27.558090209960938, "learning_rate": 9.780219780219781e-06, "loss": 41.0687, "step": 5162 }, { "epoch": 122.9313432835821, "grad_norm": 29.82993507385254, "learning_rate": 9.77838827838828e-06, "loss": 41.5666, "step": 5163 }, { "epoch": 122.95522388059702, "grad_norm": 24.96250343322754, "learning_rate": 9.776556776556777e-06, "loss": 41.1099, "step": 5164 }, { "epoch": 122.97910447761194, "grad_norm": 34.85405731201172, "learning_rate": 9.774725274725275e-06, "loss": 38.6541, "step": 5165 }, { "epoch": 123.0, "grad_norm": 28.839818954467773, "learning_rate": 9.772893772893773e-06, "loss": 35.9493, "step": 5166 }, { "epoch": 123.02388059701492, "grad_norm": 32.29933547973633, "learning_rate": 9.771062271062271e-06, "loss": 40.9199, "step": 5167 }, { "epoch": 123.04776119402985, "grad_norm": 26.617511749267578, "learning_rate": 9.76923076923077e-06, "loss": 39.6813, "step": 5168 }, { "epoch": 123.07164179104478, "grad_norm": 29.118209838867188, "learning_rate": 9.767399267399268e-06, "loss": 40.971, "step": 5169 }, { "epoch": 123.0955223880597, "grad_norm": 26.295345306396484, "learning_rate": 9.765567765567766e-06, "loss": 40.8982, "step": 5170 }, { "epoch": 123.11940298507463, "grad_norm": 33.3271369934082, "learning_rate": 9.763736263736264e-06, "loss": 41.9198, "step": 5171 }, { "epoch": 123.14328358208955, "grad_norm": 25.857398986816406, "learning_rate": 9.761904761904762e-06, "loss": 39.5357, "step": 5172 }, { "epoch": 123.16716417910447, "grad_norm": 32.97218704223633, "learning_rate": 9.76007326007326e-06, "loss": 41.1038, "step": 5173 }, { "epoch": 123.1910447761194, "grad_norm": 28.88793182373047, "learning_rate": 9.75824175824176e-06, "loss": 40.8065, "step": 5174 }, { "epoch": 123.21492537313434, "grad_norm": 24.024185180664062, "learning_rate": 9.756410256410257e-06, "loss": 39.8969, "step": 5175 }, { "epoch": 123.23880597014926, "grad_norm": 23.380300521850586, "learning_rate": 9.754578754578755e-06, "loss": 40.962, "step": 5176 }, { "epoch": 123.26268656716418, "grad_norm": 28.82596778869629, "learning_rate": 9.752747252747253e-06, "loss": 40.4959, "step": 5177 }, { "epoch": 123.2865671641791, "grad_norm": 19.895410537719727, "learning_rate": 9.750915750915751e-06, "loss": 41.0015, "step": 5178 }, { "epoch": 123.31044776119403, "grad_norm": 28.44173812866211, "learning_rate": 9.74908424908425e-06, "loss": 40.7281, "step": 5179 }, { "epoch": 123.33432835820895, "grad_norm": 22.277742385864258, "learning_rate": 9.747252747252748e-06, "loss": 40.0391, "step": 5180 }, { "epoch": 123.35820895522389, "grad_norm": 27.770545959472656, "learning_rate": 9.745421245421246e-06, "loss": 41.0649, "step": 5181 }, { "epoch": 123.38208955223881, "grad_norm": 22.383668899536133, "learning_rate": 9.743589743589744e-06, "loss": 40.735, "step": 5182 }, { "epoch": 123.40597014925373, "grad_norm": 31.16164779663086, "learning_rate": 9.741758241758242e-06, "loss": 41.1004, "step": 5183 }, { "epoch": 123.42985074626866, "grad_norm": 25.458309173583984, "learning_rate": 9.73992673992674e-06, "loss": 40.4399, "step": 5184 }, { "epoch": 123.45373134328358, "grad_norm": 37.73893356323242, "learning_rate": 9.73809523809524e-06, "loss": 42.08, "step": 5185 }, { "epoch": 123.4776119402985, "grad_norm": 28.723541259765625, "learning_rate": 9.736263736263737e-06, "loss": 40.479, "step": 5186 }, { "epoch": 123.50149253731344, "grad_norm": 30.29216194152832, "learning_rate": 9.734432234432235e-06, "loss": 40.76, "step": 5187 }, { "epoch": 123.52537313432836, "grad_norm": 25.559480667114258, "learning_rate": 9.732600732600733e-06, "loss": 39.7645, "step": 5188 }, { "epoch": 123.54925373134328, "grad_norm": 30.328344345092773, "learning_rate": 9.730769230769231e-06, "loss": 42.2182, "step": 5189 }, { "epoch": 123.57313432835821, "grad_norm": 24.075218200683594, "learning_rate": 9.728937728937729e-06, "loss": 39.7574, "step": 5190 }, { "epoch": 123.59701492537313, "grad_norm": 29.823719024658203, "learning_rate": 9.727106227106228e-06, "loss": 41.1253, "step": 5191 }, { "epoch": 123.62089552238805, "grad_norm": 21.241701126098633, "learning_rate": 9.725274725274726e-06, "loss": 41.0588, "step": 5192 }, { "epoch": 123.64477611940299, "grad_norm": 34.10343933105469, "learning_rate": 9.723443223443224e-06, "loss": 40.7287, "step": 5193 }, { "epoch": 123.66865671641791, "grad_norm": 24.037466049194336, "learning_rate": 9.721611721611722e-06, "loss": 41.1033, "step": 5194 }, { "epoch": 123.69253731343284, "grad_norm": 26.837879180908203, "learning_rate": 9.71978021978022e-06, "loss": 41.1869, "step": 5195 }, { "epoch": 123.71641791044776, "grad_norm": 22.90353012084961, "learning_rate": 9.71794871794872e-06, "loss": 41.4571, "step": 5196 }, { "epoch": 123.74029850746268, "grad_norm": 31.232582092285156, "learning_rate": 9.716117216117217e-06, "loss": 40.6233, "step": 5197 }, { "epoch": 123.7641791044776, "grad_norm": 24.480405807495117, "learning_rate": 9.714285714285715e-06, "loss": 40.8121, "step": 5198 }, { "epoch": 123.78805970149254, "grad_norm": 33.86972427368164, "learning_rate": 9.712454212454213e-06, "loss": 41.596, "step": 5199 }, { "epoch": 123.81194029850747, "grad_norm": 23.727428436279297, "learning_rate": 9.710622710622711e-06, "loss": 41.1944, "step": 5200 }, { "epoch": 123.83582089552239, "grad_norm": 32.29154586791992, "learning_rate": 9.708791208791209e-06, "loss": 40.361, "step": 5201 }, { "epoch": 123.85970149253731, "grad_norm": 22.611989974975586, "learning_rate": 9.706959706959708e-06, "loss": 39.6001, "step": 5202 }, { "epoch": 123.88358208955223, "grad_norm": 33.92005157470703, "learning_rate": 9.705128205128206e-06, "loss": 42.1112, "step": 5203 }, { "epoch": 123.90746268656716, "grad_norm": 28.993995666503906, "learning_rate": 9.703296703296704e-06, "loss": 41.436, "step": 5204 }, { "epoch": 123.9313432835821, "grad_norm": 27.87895393371582, "learning_rate": 9.701465201465202e-06, "loss": 40.7799, "step": 5205 }, { "epoch": 123.95522388059702, "grad_norm": 29.898271560668945, "learning_rate": 9.6996336996337e-06, "loss": 41.2078, "step": 5206 }, { "epoch": 123.97910447761194, "grad_norm": 24.88825798034668, "learning_rate": 9.697802197802198e-06, "loss": 40.1142, "step": 5207 }, { "epoch": 124.0, "grad_norm": 20.064050674438477, "learning_rate": 9.695970695970697e-06, "loss": 35.74, "step": 5208 }, { "epoch": 124.02388059701492, "grad_norm": 30.132843017578125, "learning_rate": 9.694139194139195e-06, "loss": 41.1361, "step": 5209 }, { "epoch": 124.04776119402985, "grad_norm": 21.139568328857422, "learning_rate": 9.692307692307693e-06, "loss": 41.4147, "step": 5210 }, { "epoch": 124.07164179104478, "grad_norm": 29.36510467529297, "learning_rate": 9.690476190476191e-06, "loss": 40.6358, "step": 5211 }, { "epoch": 124.0955223880597, "grad_norm": 27.090465545654297, "learning_rate": 9.688644688644689e-06, "loss": 41.6791, "step": 5212 }, { "epoch": 124.11940298507463, "grad_norm": 24.170644760131836, "learning_rate": 9.686813186813188e-06, "loss": 40.7704, "step": 5213 }, { "epoch": 124.14328358208955, "grad_norm": 26.26068115234375, "learning_rate": 9.684981684981686e-06, "loss": 39.3922, "step": 5214 }, { "epoch": 124.16716417910447, "grad_norm": 22.155975341796875, "learning_rate": 9.683150183150184e-06, "loss": 40.9681, "step": 5215 }, { "epoch": 124.1910447761194, "grad_norm": 25.21603012084961, "learning_rate": 9.681318681318682e-06, "loss": 42.1241, "step": 5216 }, { "epoch": 124.21492537313434, "grad_norm": 16.18509292602539, "learning_rate": 9.67948717948718e-06, "loss": 40.9757, "step": 5217 }, { "epoch": 124.23880597014926, "grad_norm": 21.46571159362793, "learning_rate": 9.677655677655678e-06, "loss": 41.6829, "step": 5218 }, { "epoch": 124.26268656716418, "grad_norm": 18.195297241210938, "learning_rate": 9.675824175824177e-06, "loss": 40.194, "step": 5219 }, { "epoch": 124.2865671641791, "grad_norm": 18.00617790222168, "learning_rate": 9.673992673992675e-06, "loss": 39.3802, "step": 5220 }, { "epoch": 124.31044776119403, "grad_norm": 18.236934661865234, "learning_rate": 9.672161172161173e-06, "loss": 41.0138, "step": 5221 }, { "epoch": 124.33432835820895, "grad_norm": 16.526309967041016, "learning_rate": 9.670329670329671e-06, "loss": 40.2031, "step": 5222 }, { "epoch": 124.35820895522389, "grad_norm": 20.008708953857422, "learning_rate": 9.668498168498169e-06, "loss": 40.9772, "step": 5223 }, { "epoch": 124.38208955223881, "grad_norm": 14.738056182861328, "learning_rate": 9.666666666666667e-06, "loss": 40.5985, "step": 5224 }, { "epoch": 124.40597014925373, "grad_norm": 19.540645599365234, "learning_rate": 9.664835164835166e-06, "loss": 41.1823, "step": 5225 }, { "epoch": 124.42985074626866, "grad_norm": 17.26000213623047, "learning_rate": 9.663003663003664e-06, "loss": 40.2975, "step": 5226 }, { "epoch": 124.45373134328358, "grad_norm": 19.984989166259766, "learning_rate": 9.661172161172162e-06, "loss": 40.6366, "step": 5227 }, { "epoch": 124.4776119402985, "grad_norm": 24.717369079589844, "learning_rate": 9.65934065934066e-06, "loss": 40.665, "step": 5228 }, { "epoch": 124.50149253731344, "grad_norm": 16.406538009643555, "learning_rate": 9.657509157509158e-06, "loss": 40.4751, "step": 5229 }, { "epoch": 124.52537313432836, "grad_norm": 23.191200256347656, "learning_rate": 9.655677655677657e-06, "loss": 40.6781, "step": 5230 }, { "epoch": 124.54925373134328, "grad_norm": 18.91063690185547, "learning_rate": 9.653846153846155e-06, "loss": 40.9245, "step": 5231 }, { "epoch": 124.57313432835821, "grad_norm": 23.012889862060547, "learning_rate": 9.652014652014653e-06, "loss": 41.7688, "step": 5232 }, { "epoch": 124.59701492537313, "grad_norm": 20.35813331604004, "learning_rate": 9.650183150183151e-06, "loss": 40.9169, "step": 5233 }, { "epoch": 124.62089552238805, "grad_norm": 22.06452751159668, "learning_rate": 9.648351648351649e-06, "loss": 41.0061, "step": 5234 }, { "epoch": 124.64477611940299, "grad_norm": 23.17784309387207, "learning_rate": 9.646520146520147e-06, "loss": 40.7811, "step": 5235 }, { "epoch": 124.66865671641791, "grad_norm": 19.43151092529297, "learning_rate": 9.644688644688646e-06, "loss": 40.3725, "step": 5236 }, { "epoch": 124.69253731343284, "grad_norm": 23.144960403442383, "learning_rate": 9.642857142857144e-06, "loss": 42.382, "step": 5237 }, { "epoch": 124.71641791044776, "grad_norm": 18.223936080932617, "learning_rate": 9.641025641025642e-06, "loss": 40.3686, "step": 5238 }, { "epoch": 124.74029850746268, "grad_norm": 21.855030059814453, "learning_rate": 9.63919413919414e-06, "loss": 41.8939, "step": 5239 }, { "epoch": 124.7641791044776, "grad_norm": 21.252012252807617, "learning_rate": 9.637362637362638e-06, "loss": 40.5954, "step": 5240 }, { "epoch": 124.78805970149254, "grad_norm": 20.320215225219727, "learning_rate": 9.635531135531136e-06, "loss": 40.2941, "step": 5241 }, { "epoch": 124.81194029850747, "grad_norm": 20.874921798706055, "learning_rate": 9.633699633699635e-06, "loss": 39.8523, "step": 5242 }, { "epoch": 124.83582089552239, "grad_norm": 20.560138702392578, "learning_rate": 9.631868131868133e-06, "loss": 40.1141, "step": 5243 }, { "epoch": 124.85970149253731, "grad_norm": 16.412206649780273, "learning_rate": 9.630036630036631e-06, "loss": 41.1038, "step": 5244 }, { "epoch": 124.88358208955223, "grad_norm": 20.39592170715332, "learning_rate": 9.628205128205129e-06, "loss": 39.8884, "step": 5245 }, { "epoch": 124.90746268656716, "grad_norm": 16.07599639892578, "learning_rate": 9.626373626373627e-06, "loss": 40.7929, "step": 5246 }, { "epoch": 124.9313432835821, "grad_norm": 14.183424949645996, "learning_rate": 9.624542124542126e-06, "loss": 41.5261, "step": 5247 }, { "epoch": 124.95522388059702, "grad_norm": 17.80473518371582, "learning_rate": 9.622710622710624e-06, "loss": 39.6694, "step": 5248 }, { "epoch": 124.97910447761194, "grad_norm": 16.59119987487793, "learning_rate": 9.620879120879122e-06, "loss": 40.9024, "step": 5249 }, { "epoch": 125.0, "grad_norm": 15.37125301361084, "learning_rate": 9.61904761904762e-06, "loss": 35.3595, "step": 5250 }, { "epoch": 125.02388059701492, "grad_norm": 18.345430374145508, "learning_rate": 9.617216117216118e-06, "loss": 40.2401, "step": 5251 }, { "epoch": 125.04776119402985, "grad_norm": 17.491918563842773, "learning_rate": 9.615384615384616e-06, "loss": 39.8787, "step": 5252 }, { "epoch": 125.07164179104478, "grad_norm": 16.483713150024414, "learning_rate": 9.613553113553115e-06, "loss": 41.3826, "step": 5253 }, { "epoch": 125.0955223880597, "grad_norm": 15.222822189331055, "learning_rate": 9.611721611721613e-06, "loss": 41.1321, "step": 5254 }, { "epoch": 125.11940298507463, "grad_norm": 16.675804138183594, "learning_rate": 9.609890109890111e-06, "loss": 41.0334, "step": 5255 }, { "epoch": 125.14328358208955, "grad_norm": 17.025266647338867, "learning_rate": 9.608058608058609e-06, "loss": 40.6213, "step": 5256 }, { "epoch": 125.16716417910447, "grad_norm": 15.499921798706055, "learning_rate": 9.606227106227107e-06, "loss": 39.8817, "step": 5257 }, { "epoch": 125.1910447761194, "grad_norm": 14.926107406616211, "learning_rate": 9.604395604395605e-06, "loss": 42.1196, "step": 5258 }, { "epoch": 125.21492537313434, "grad_norm": 17.896583557128906, "learning_rate": 9.602564102564104e-06, "loss": 40.1941, "step": 5259 }, { "epoch": 125.23880597014926, "grad_norm": 21.413713455200195, "learning_rate": 9.600732600732602e-06, "loss": 39.9496, "step": 5260 }, { "epoch": 125.26268656716418, "grad_norm": 20.262035369873047, "learning_rate": 9.5989010989011e-06, "loss": 40.8554, "step": 5261 }, { "epoch": 125.2865671641791, "grad_norm": 17.94382095336914, "learning_rate": 9.597069597069598e-06, "loss": 40.618, "step": 5262 }, { "epoch": 125.31044776119403, "grad_norm": 14.720929145812988, "learning_rate": 9.595238095238096e-06, "loss": 41.6634, "step": 5263 }, { "epoch": 125.33432835820895, "grad_norm": 21.567907333374023, "learning_rate": 9.593406593406595e-06, "loss": 41.6142, "step": 5264 }, { "epoch": 125.35820895522389, "grad_norm": 23.717586517333984, "learning_rate": 9.591575091575093e-06, "loss": 41.458, "step": 5265 }, { "epoch": 125.38208955223881, "grad_norm": 13.948038101196289, "learning_rate": 9.589743589743591e-06, "loss": 41.2909, "step": 5266 }, { "epoch": 125.40597014925373, "grad_norm": 27.392465591430664, "learning_rate": 9.587912087912089e-06, "loss": 41.3308, "step": 5267 }, { "epoch": 125.42985074626866, "grad_norm": 20.557374954223633, "learning_rate": 9.586080586080587e-06, "loss": 41.7717, "step": 5268 }, { "epoch": 125.45373134328358, "grad_norm": 21.076601028442383, "learning_rate": 9.584249084249085e-06, "loss": 39.8928, "step": 5269 }, { "epoch": 125.4776119402985, "grad_norm": 22.425079345703125, "learning_rate": 9.582417582417584e-06, "loss": 41.5856, "step": 5270 }, { "epoch": 125.50149253731344, "grad_norm": 19.16175079345703, "learning_rate": 9.580586080586082e-06, "loss": 40.7785, "step": 5271 }, { "epoch": 125.52537313432836, "grad_norm": 18.763565063476562, "learning_rate": 9.57875457875458e-06, "loss": 39.8277, "step": 5272 }, { "epoch": 125.54925373134328, "grad_norm": 18.3720645904541, "learning_rate": 9.576923076923078e-06, "loss": 40.9783, "step": 5273 }, { "epoch": 125.57313432835821, "grad_norm": 18.62623405456543, "learning_rate": 9.575091575091576e-06, "loss": 40.5206, "step": 5274 }, { "epoch": 125.59701492537313, "grad_norm": 20.078596115112305, "learning_rate": 9.573260073260074e-06, "loss": 40.2231, "step": 5275 }, { "epoch": 125.62089552238805, "grad_norm": 28.77025032043457, "learning_rate": 9.571428571428573e-06, "loss": 40.4628, "step": 5276 }, { "epoch": 125.64477611940299, "grad_norm": 17.48457145690918, "learning_rate": 9.569597069597071e-06, "loss": 38.3776, "step": 5277 }, { "epoch": 125.66865671641791, "grad_norm": 29.077014923095703, "learning_rate": 9.567765567765569e-06, "loss": 40.7467, "step": 5278 }, { "epoch": 125.69253731343284, "grad_norm": 22.95465660095215, "learning_rate": 9.565934065934067e-06, "loss": 40.831, "step": 5279 }, { "epoch": 125.71641791044776, "grad_norm": 26.317485809326172, "learning_rate": 9.564102564102565e-06, "loss": 40.4036, "step": 5280 }, { "epoch": 125.74029850746268, "grad_norm": 20.771020889282227, "learning_rate": 9.562271062271064e-06, "loss": 40.7238, "step": 5281 }, { "epoch": 125.7641791044776, "grad_norm": 21.856155395507812, "learning_rate": 9.560439560439562e-06, "loss": 40.1953, "step": 5282 }, { "epoch": 125.78805970149254, "grad_norm": 24.748037338256836, "learning_rate": 9.55860805860806e-06, "loss": 39.5605, "step": 5283 }, { "epoch": 125.81194029850747, "grad_norm": NaN, "learning_rate": 9.556776556776558e-06, "loss": 60.4649, "step": 5284 }, { "epoch": 125.83582089552239, "grad_norm": 18.565261840820312, "learning_rate": 9.556776556776558e-06, "loss": 41.9694, "step": 5285 }, { "epoch": 125.85970149253731, "grad_norm": 29.6701717376709, "learning_rate": 9.554945054945056e-06, "loss": 41.4843, "step": 5286 }, { "epoch": 125.88358208955223, "grad_norm": 20.192317962646484, "learning_rate": 9.553113553113554e-06, "loss": 39.8961, "step": 5287 }, { "epoch": 125.90746268656716, "grad_norm": 25.22960662841797, "learning_rate": 9.551282051282053e-06, "loss": 39.7754, "step": 5288 }, { "epoch": 125.9313432835821, "grad_norm": 19.892139434814453, "learning_rate": 9.549450549450551e-06, "loss": 40.257, "step": 5289 }, { "epoch": 125.95522388059702, "grad_norm": 18.426124572753906, "learning_rate": 9.547619047619049e-06, "loss": 41.252, "step": 5290 }, { "epoch": 125.97910447761194, "grad_norm": 24.085840225219727, "learning_rate": 9.545787545787547e-06, "loss": 41.3266, "step": 5291 }, { "epoch": 126.0, "grad_norm": 14.462137222290039, "learning_rate": 9.543956043956045e-06, "loss": 36.235, "step": 5292 }, { "epoch": 126.02388059701492, "grad_norm": 21.527910232543945, "learning_rate": 9.542124542124543e-06, "loss": 41.1059, "step": 5293 }, { "epoch": 126.04776119402985, "grad_norm": 19.539413452148438, "learning_rate": 9.540293040293042e-06, "loss": 41.8102, "step": 5294 }, { "epoch": 126.07164179104478, "grad_norm": 16.535566329956055, "learning_rate": 9.53846153846154e-06, "loss": 40.4373, "step": 5295 }, { "epoch": 126.0955223880597, "grad_norm": 30.60129737854004, "learning_rate": 9.536630036630038e-06, "loss": 40.3107, "step": 5296 }, { "epoch": 126.11940298507463, "grad_norm": 19.504737854003906, "learning_rate": 9.534798534798536e-06, "loss": 39.7933, "step": 5297 }, { "epoch": 126.14328358208955, "grad_norm": 40.68082809448242, "learning_rate": 9.532967032967034e-06, "loss": 40.4788, "step": 5298 }, { "epoch": 126.16716417910447, "grad_norm": 29.288623809814453, "learning_rate": 9.531135531135532e-06, "loss": 40.3154, "step": 5299 }, { "epoch": 126.1910447761194, "grad_norm": 39.86507797241211, "learning_rate": 9.52930402930403e-06, "loss": 40.9565, "step": 5300 }, { "epoch": 126.21492537313434, "grad_norm": 37.94214630126953, "learning_rate": 9.527472527472527e-06, "loss": 39.7678, "step": 5301 }, { "epoch": 126.23880597014926, "grad_norm": 30.128881454467773, "learning_rate": 9.525641025641025e-06, "loss": 40.9812, "step": 5302 }, { "epoch": 126.26268656716418, "grad_norm": 32.11579895019531, "learning_rate": 9.523809523809525e-06, "loss": 40.7709, "step": 5303 }, { "epoch": 126.2865671641791, "grad_norm": 30.610383987426758, "learning_rate": 9.521978021978023e-06, "loss": 39.3623, "step": 5304 }, { "epoch": 126.31044776119403, "grad_norm": 25.186908721923828, "learning_rate": 9.52014652014652e-06, "loss": 40.5833, "step": 5305 }, { "epoch": 126.33432835820895, "grad_norm": 35.33464050292969, "learning_rate": 9.518315018315018e-06, "loss": 40.4571, "step": 5306 }, { "epoch": 126.35820895522389, "grad_norm": 30.900115966796875, "learning_rate": 9.516483516483516e-06, "loss": 41.5089, "step": 5307 }, { "epoch": 126.38208955223881, "grad_norm": 30.170385360717773, "learning_rate": 9.514652014652014e-06, "loss": 40.4776, "step": 5308 }, { "epoch": 126.40597014925373, "grad_norm": 25.576396942138672, "learning_rate": 9.512820512820514e-06, "loss": 40.5452, "step": 5309 }, { "epoch": 126.42985074626866, "grad_norm": 31.52381706237793, "learning_rate": 9.510989010989012e-06, "loss": 41.0569, "step": 5310 }, { "epoch": 126.45373134328358, "grad_norm": 28.613876342773438, "learning_rate": 9.50915750915751e-06, "loss": 40.4693, "step": 5311 }, { "epoch": 126.4776119402985, "grad_norm": 34.052391052246094, "learning_rate": 9.507326007326007e-06, "loss": 39.9473, "step": 5312 }, { "epoch": 126.50149253731344, "grad_norm": 28.65314292907715, "learning_rate": 9.505494505494505e-06, "loss": 39.012, "step": 5313 }, { "epoch": 126.52537313432836, "grad_norm": 28.400449752807617, "learning_rate": 9.503663003663005e-06, "loss": 40.3811, "step": 5314 }, { "epoch": 126.54925373134328, "grad_norm": 26.874284744262695, "learning_rate": 9.501831501831503e-06, "loss": 39.8546, "step": 5315 }, { "epoch": 126.57313432835821, "grad_norm": 29.638126373291016, "learning_rate": 9.5e-06, "loss": 41.6115, "step": 5316 }, { "epoch": 126.59701492537313, "grad_norm": 27.8295841217041, "learning_rate": 9.498168498168498e-06, "loss": 40.7197, "step": 5317 }, { "epoch": 126.62089552238805, "grad_norm": 34.33130645751953, "learning_rate": 9.496336996336996e-06, "loss": 41.2572, "step": 5318 }, { "epoch": 126.64477611940299, "grad_norm": 28.334978103637695, "learning_rate": 9.494505494505494e-06, "loss": 41.4411, "step": 5319 }, { "epoch": 126.66865671641791, "grad_norm": 28.492050170898438, "learning_rate": 9.492673992673994e-06, "loss": 39.6498, "step": 5320 }, { "epoch": 126.69253731343284, "grad_norm": 26.12206268310547, "learning_rate": 9.490842490842492e-06, "loss": 41.3662, "step": 5321 }, { "epoch": 126.71641791044776, "grad_norm": 31.722883224487305, "learning_rate": 9.48901098901099e-06, "loss": 41.3887, "step": 5322 }, { "epoch": 126.74029850746268, "grad_norm": 26.18402862548828, "learning_rate": 9.487179487179487e-06, "loss": 41.5819, "step": 5323 }, { "epoch": 126.7641791044776, "grad_norm": 29.256437301635742, "learning_rate": 9.485347985347985e-06, "loss": 41.0093, "step": 5324 }, { "epoch": 126.78805970149254, "grad_norm": 26.79650115966797, "learning_rate": 9.483516483516483e-06, "loss": 39.9968, "step": 5325 }, { "epoch": 126.81194029850747, "grad_norm": 27.056190490722656, "learning_rate": 9.481684981684983e-06, "loss": 39.1216, "step": 5326 }, { "epoch": 126.83582089552239, "grad_norm": 22.335859298706055, "learning_rate": 9.47985347985348e-06, "loss": 40.7778, "step": 5327 }, { "epoch": 126.85970149253731, "grad_norm": NaN, "learning_rate": 9.478021978021978e-06, "loss": 58.1092, "step": 5328 }, { "epoch": 126.88358208955223, "grad_norm": 24.419767379760742, "learning_rate": 9.478021978021978e-06, "loss": 40.4599, "step": 5329 }, { "epoch": 126.90746268656716, "grad_norm": 20.041467666625977, "learning_rate": 9.476190476190476e-06, "loss": 40.1727, "step": 5330 }, { "epoch": 126.9313432835821, "grad_norm": 26.40553855895996, "learning_rate": 9.474358974358974e-06, "loss": 41.0726, "step": 5331 }, { "epoch": 126.95522388059702, "grad_norm": 24.665653228759766, "learning_rate": 9.472527472527474e-06, "loss": 42.0728, "step": 5332 }, { "epoch": 126.97910447761194, "grad_norm": 21.605026245117188, "learning_rate": 9.470695970695972e-06, "loss": 40.6338, "step": 5333 }, { "epoch": 127.0, "grad_norm": 20.224733352661133, "learning_rate": 9.46886446886447e-06, "loss": 35.2993, "step": 5334 }, { "epoch": 127.02388059701492, "grad_norm": 21.891176223754883, "learning_rate": 9.467032967032967e-06, "loss": 40.7857, "step": 5335 }, { "epoch": 127.04776119402985, "grad_norm": 24.02487564086914, "learning_rate": 9.465201465201465e-06, "loss": 39.9996, "step": 5336 }, { "epoch": 127.07164179104478, "grad_norm": 26.67331314086914, "learning_rate": 9.463369963369963e-06, "loss": 41.6034, "step": 5337 }, { "epoch": 127.0955223880597, "grad_norm": 18.6497802734375, "learning_rate": 9.461538461538463e-06, "loss": 42.0516, "step": 5338 }, { "epoch": 127.11940298507463, "grad_norm": 31.833471298217773, "learning_rate": 9.45970695970696e-06, "loss": 40.597, "step": 5339 }, { "epoch": 127.14328358208955, "grad_norm": 27.305522918701172, "learning_rate": 9.457875457875458e-06, "loss": 40.7429, "step": 5340 }, { "epoch": 127.16716417910447, "grad_norm": 29.530677795410156, "learning_rate": 9.456043956043956e-06, "loss": 40.4873, "step": 5341 }, { "epoch": 127.1910447761194, "grad_norm": 29.030101776123047, "learning_rate": 9.454212454212454e-06, "loss": 39.0437, "step": 5342 }, { "epoch": 127.21492537313434, "grad_norm": NaN, "learning_rate": 9.452380952380952e-06, "loss": 60.0619, "step": 5343 }, { "epoch": 127.23880597014926, "grad_norm": 31.528074264526367, "learning_rate": 9.452380952380952e-06, "loss": 40.4362, "step": 5344 }, { "epoch": 127.26268656716418, "grad_norm": 28.54173469543457, "learning_rate": 9.450549450549452e-06, "loss": 41.0408, "step": 5345 }, { "epoch": 127.2865671641791, "grad_norm": 33.24728775024414, "learning_rate": 9.44871794871795e-06, "loss": 40.6743, "step": 5346 }, { "epoch": 127.31044776119403, "grad_norm": 29.59555435180664, "learning_rate": 9.446886446886447e-06, "loss": 40.3809, "step": 5347 }, { "epoch": 127.33432835820895, "grad_norm": 32.15523147583008, "learning_rate": 9.445054945054945e-06, "loss": 40.6554, "step": 5348 }, { "epoch": 127.35820895522389, "grad_norm": 31.1282901763916, "learning_rate": 9.443223443223443e-06, "loss": 39.8469, "step": 5349 }, { "epoch": 127.38208955223881, "grad_norm": 30.29323959350586, "learning_rate": 9.441391941391943e-06, "loss": 40.155, "step": 5350 }, { "epoch": 127.40597014925373, "grad_norm": 28.87554931640625, "learning_rate": 9.43956043956044e-06, "loss": 39.2159, "step": 5351 }, { "epoch": 127.42985074626866, "grad_norm": 28.77309226989746, "learning_rate": 9.437728937728938e-06, "loss": 40.7679, "step": 5352 }, { "epoch": 127.45373134328358, "grad_norm": 24.797941207885742, "learning_rate": 9.435897435897436e-06, "loss": 40.2341, "step": 5353 }, { "epoch": 127.4776119402985, "grad_norm": 30.226261138916016, "learning_rate": 9.434065934065934e-06, "loss": 41.5231, "step": 5354 }, { "epoch": 127.50149253731344, "grad_norm": 23.113122940063477, "learning_rate": 9.432234432234432e-06, "loss": 40.5916, "step": 5355 }, { "epoch": 127.52537313432836, "grad_norm": 36.03447341918945, "learning_rate": 9.430402930402932e-06, "loss": 40.8739, "step": 5356 }, { "epoch": 127.54925373134328, "grad_norm": 32.825225830078125, "learning_rate": 9.42857142857143e-06, "loss": 40.2842, "step": 5357 }, { "epoch": 127.57313432835821, "grad_norm": 26.586402893066406, "learning_rate": 9.426739926739927e-06, "loss": 40.3329, "step": 5358 }, { "epoch": 127.59701492537313, "grad_norm": 25.394254684448242, "learning_rate": 9.424908424908425e-06, "loss": 41.4049, "step": 5359 }, { "epoch": 127.62089552238805, "grad_norm": 28.440998077392578, "learning_rate": 9.423076923076923e-06, "loss": 39.6021, "step": 5360 }, { "epoch": 127.64477611940299, "grad_norm": 25.379180908203125, "learning_rate": 9.421245421245421e-06, "loss": 40.6451, "step": 5361 }, { "epoch": 127.66865671641791, "grad_norm": 33.607208251953125, "learning_rate": 9.41941391941392e-06, "loss": 41.685, "step": 5362 }, { "epoch": 127.69253731343284, "grad_norm": 24.925783157348633, "learning_rate": 9.417582417582418e-06, "loss": 41.2479, "step": 5363 }, { "epoch": 127.71641791044776, "grad_norm": 34.97409439086914, "learning_rate": 9.415750915750916e-06, "loss": 40.9911, "step": 5364 }, { "epoch": 127.74029850746268, "grad_norm": 25.85514259338379, "learning_rate": 9.413919413919414e-06, "loss": 39.0472, "step": 5365 }, { "epoch": 127.7641791044776, "grad_norm": 32.1847038269043, "learning_rate": 9.412087912087912e-06, "loss": 40.6535, "step": 5366 }, { "epoch": 127.78805970149254, "grad_norm": 29.844226837158203, "learning_rate": 9.410256410256412e-06, "loss": 39.3755, "step": 5367 }, { "epoch": 127.81194029850747, "grad_norm": 31.974084854125977, "learning_rate": 9.40842490842491e-06, "loss": 41.1137, "step": 5368 }, { "epoch": 127.83582089552239, "grad_norm": 30.38601303100586, "learning_rate": 9.406593406593407e-06, "loss": 40.9944, "step": 5369 }, { "epoch": 127.85970149253731, "grad_norm": 25.314817428588867, "learning_rate": 9.404761904761905e-06, "loss": 40.0512, "step": 5370 }, { "epoch": 127.88358208955223, "grad_norm": 26.56514549255371, "learning_rate": 9.402930402930403e-06, "loss": 40.3742, "step": 5371 }, { "epoch": 127.90746268656716, "grad_norm": 27.275182723999023, "learning_rate": 9.401098901098901e-06, "loss": 41.387, "step": 5372 }, { "epoch": 127.9313432835821, "grad_norm": 24.307111740112305, "learning_rate": 9.3992673992674e-06, "loss": 41.2045, "step": 5373 }, { "epoch": 127.95522388059702, "grad_norm": 34.821327209472656, "learning_rate": 9.397435897435899e-06, "loss": 40.7932, "step": 5374 }, { "epoch": 127.97910447761194, "grad_norm": 31.300153732299805, "learning_rate": 9.395604395604396e-06, "loss": 41.0882, "step": 5375 }, { "epoch": 128.0, "grad_norm": 25.360746383666992, "learning_rate": 9.393772893772894e-06, "loss": 35.6565, "step": 5376 }, { "epoch": 128.02388059701494, "grad_norm": 32.15913772583008, "learning_rate": 9.391941391941392e-06, "loss": 39.5816, "step": 5377 }, { "epoch": 128.04776119402985, "grad_norm": 22.382192611694336, "learning_rate": 9.39010989010989e-06, "loss": 41.6509, "step": 5378 }, { "epoch": 128.07164179104478, "grad_norm": 23.480571746826172, "learning_rate": 9.38827838827839e-06, "loss": 40.7536, "step": 5379 }, { "epoch": 128.0955223880597, "grad_norm": 19.44637107849121, "learning_rate": 9.386446886446887e-06, "loss": 40.499, "step": 5380 }, { "epoch": 128.11940298507463, "grad_norm": 20.136741638183594, "learning_rate": 9.384615384615385e-06, "loss": 41.3551, "step": 5381 }, { "epoch": 128.14328358208957, "grad_norm": 20.516332626342773, "learning_rate": 9.382783882783883e-06, "loss": 40.7676, "step": 5382 }, { "epoch": 128.16716417910447, "grad_norm": 18.942041397094727, "learning_rate": 9.380952380952381e-06, "loss": 40.5404, "step": 5383 }, { "epoch": 128.1910447761194, "grad_norm": 22.05898666381836, "learning_rate": 9.37912087912088e-06, "loss": 40.9921, "step": 5384 }, { "epoch": 128.21492537313432, "grad_norm": 15.969873428344727, "learning_rate": 9.377289377289379e-06, "loss": 40.1066, "step": 5385 }, { "epoch": 128.23880597014926, "grad_norm": 18.438854217529297, "learning_rate": 9.375457875457876e-06, "loss": 39.8564, "step": 5386 }, { "epoch": 128.26268656716417, "grad_norm": 18.526012420654297, "learning_rate": 9.373626373626374e-06, "loss": 40.388, "step": 5387 }, { "epoch": 128.2865671641791, "grad_norm": 13.87939167022705, "learning_rate": 9.371794871794872e-06, "loss": 41.2088, "step": 5388 }, { "epoch": 128.31044776119404, "grad_norm": 19.515592575073242, "learning_rate": 9.36996336996337e-06, "loss": 40.3321, "step": 5389 }, { "epoch": 128.33432835820895, "grad_norm": 17.547893524169922, "learning_rate": 9.36813186813187e-06, "loss": 40.0459, "step": 5390 }, { "epoch": 128.3582089552239, "grad_norm": 24.08388900756836, "learning_rate": 9.366300366300367e-06, "loss": 40.7233, "step": 5391 }, { "epoch": 128.3820895522388, "grad_norm": 25.02381134033203, "learning_rate": 9.364468864468865e-06, "loss": 41.4629, "step": 5392 }, { "epoch": 128.40597014925373, "grad_norm": 17.845233917236328, "learning_rate": 9.362637362637363e-06, "loss": 40.136, "step": 5393 }, { "epoch": 128.42985074626867, "grad_norm": 24.73293685913086, "learning_rate": 9.360805860805861e-06, "loss": 40.1744, "step": 5394 }, { "epoch": 128.45373134328358, "grad_norm": 18.738384246826172, "learning_rate": 9.358974358974359e-06, "loss": 40.9566, "step": 5395 }, { "epoch": 128.47761194029852, "grad_norm": 22.628456115722656, "learning_rate": 9.357142857142859e-06, "loss": 39.9645, "step": 5396 }, { "epoch": 128.50149253731342, "grad_norm": 19.057598114013672, "learning_rate": 9.355311355311356e-06, "loss": 38.6498, "step": 5397 }, { "epoch": 128.52537313432836, "grad_norm": 20.58139419555664, "learning_rate": 9.353479853479854e-06, "loss": 41.7546, "step": 5398 }, { "epoch": 128.54925373134327, "grad_norm": 23.596145629882812, "learning_rate": 9.351648351648352e-06, "loss": 39.7231, "step": 5399 }, { "epoch": 128.5731343283582, "grad_norm": 18.677183151245117, "learning_rate": 9.34981684981685e-06, "loss": 39.6687, "step": 5400 }, { "epoch": 128.59701492537314, "grad_norm": 22.48053550720215, "learning_rate": 9.34798534798535e-06, "loss": 41.1109, "step": 5401 }, { "epoch": 128.62089552238805, "grad_norm": 18.408390045166016, "learning_rate": 9.346153846153847e-06, "loss": 40.0313, "step": 5402 }, { "epoch": 128.644776119403, "grad_norm": 18.866302490234375, "learning_rate": 9.344322344322345e-06, "loss": 41.4068, "step": 5403 }, { "epoch": 128.6686567164179, "grad_norm": 18.15769386291504, "learning_rate": 9.342490842490843e-06, "loss": 40.0289, "step": 5404 }, { "epoch": 128.69253731343284, "grad_norm": 21.213743209838867, "learning_rate": 9.340659340659341e-06, "loss": 41.5406, "step": 5405 }, { "epoch": 128.71641791044777, "grad_norm": 14.050131797790527, "learning_rate": 9.338827838827839e-06, "loss": 40.9447, "step": 5406 }, { "epoch": 128.74029850746268, "grad_norm": 20.822832107543945, "learning_rate": 9.336996336996339e-06, "loss": 40.7616, "step": 5407 }, { "epoch": 128.76417910447762, "grad_norm": 16.915830612182617, "learning_rate": 9.335164835164836e-06, "loss": 39.8529, "step": 5408 }, { "epoch": 128.78805970149253, "grad_norm": 24.053998947143555, "learning_rate": 9.333333333333334e-06, "loss": 41.2986, "step": 5409 }, { "epoch": 128.81194029850747, "grad_norm": 21.357769012451172, "learning_rate": 9.331501831501832e-06, "loss": 41.6824, "step": 5410 }, { "epoch": 128.83582089552237, "grad_norm": 16.31240463256836, "learning_rate": 9.32967032967033e-06, "loss": 40.1068, "step": 5411 }, { "epoch": 128.8597014925373, "grad_norm": 18.027111053466797, "learning_rate": 9.327838827838828e-06, "loss": 39.9807, "step": 5412 }, { "epoch": 128.88358208955225, "grad_norm": 17.471216201782227, "learning_rate": 9.326007326007328e-06, "loss": 40.1997, "step": 5413 }, { "epoch": 128.90746268656716, "grad_norm": 14.707521438598633, "learning_rate": 9.324175824175825e-06, "loss": 40.2096, "step": 5414 }, { "epoch": 128.9313432835821, "grad_norm": 16.55643081665039, "learning_rate": 9.322344322344323e-06, "loss": 41.4804, "step": 5415 }, { "epoch": 128.955223880597, "grad_norm": 17.47356414794922, "learning_rate": 9.320512820512821e-06, "loss": 40.0436, "step": 5416 }, { "epoch": 128.97910447761194, "grad_norm": 18.92135238647461, "learning_rate": 9.318681318681319e-06, "loss": 40.1894, "step": 5417 }, { "epoch": 129.0, "grad_norm": 17.002300262451172, "learning_rate": 9.316849816849819e-06, "loss": 36.4327, "step": 5418 }, { "epoch": 129.02388059701494, "grad_norm": 18.210742950439453, "learning_rate": 9.315018315018316e-06, "loss": 40.8625, "step": 5419 }, { "epoch": 129.04776119402985, "grad_norm": 17.84212875366211, "learning_rate": 9.313186813186814e-06, "loss": 39.4351, "step": 5420 }, { "epoch": 129.07164179104478, "grad_norm": 16.789724349975586, "learning_rate": 9.311355311355312e-06, "loss": 40.7753, "step": 5421 }, { "epoch": 129.0955223880597, "grad_norm": 16.961986541748047, "learning_rate": 9.30952380952381e-06, "loss": 39.5132, "step": 5422 }, { "epoch": 129.11940298507463, "grad_norm": 19.639286041259766, "learning_rate": 9.307692307692308e-06, "loss": 41.1626, "step": 5423 }, { "epoch": 129.14328358208957, "grad_norm": 15.542900085449219, "learning_rate": 9.305860805860808e-06, "loss": 37.8755, "step": 5424 }, { "epoch": 129.16716417910447, "grad_norm": 21.57238006591797, "learning_rate": 9.304029304029305e-06, "loss": 40.1871, "step": 5425 }, { "epoch": 129.1910447761194, "grad_norm": 18.78668785095215, "learning_rate": 9.302197802197803e-06, "loss": 40.0839, "step": 5426 }, { "epoch": 129.21492537313432, "grad_norm": 19.953189849853516, "learning_rate": 9.300366300366301e-06, "loss": 39.1708, "step": 5427 }, { "epoch": 129.23880597014926, "grad_norm": 19.159618377685547, "learning_rate": 9.298534798534799e-06, "loss": 40.0572, "step": 5428 }, { "epoch": 129.26268656716417, "grad_norm": 20.108295440673828, "learning_rate": 9.296703296703297e-06, "loss": 40.5194, "step": 5429 }, { "epoch": 129.2865671641791, "grad_norm": 18.625139236450195, "learning_rate": 9.294871794871796e-06, "loss": 39.8611, "step": 5430 }, { "epoch": 129.31044776119404, "grad_norm": 23.48390007019043, "learning_rate": 9.293040293040294e-06, "loss": 39.9747, "step": 5431 }, { "epoch": 129.33432835820895, "grad_norm": 17.067564010620117, "learning_rate": 9.291208791208792e-06, "loss": 40.5828, "step": 5432 }, { "epoch": 129.3582089552239, "grad_norm": 24.928804397583008, "learning_rate": 9.28937728937729e-06, "loss": 41.1937, "step": 5433 }, { "epoch": 129.3820895522388, "grad_norm": 20.61871910095215, "learning_rate": 9.287545787545788e-06, "loss": 40.7314, "step": 5434 }, { "epoch": 129.40597014925373, "grad_norm": 28.40680694580078, "learning_rate": 9.285714285714288e-06, "loss": 40.6506, "step": 5435 }, { "epoch": 129.42985074626867, "grad_norm": 22.84246253967285, "learning_rate": 9.283882783882785e-06, "loss": 41.2816, "step": 5436 }, { "epoch": 129.45373134328358, "grad_norm": 29.882131576538086, "learning_rate": 9.282051282051283e-06, "loss": 40.7815, "step": 5437 }, { "epoch": 129.47761194029852, "grad_norm": 21.24380111694336, "learning_rate": 9.280219780219781e-06, "loss": 41.2894, "step": 5438 }, { "epoch": 129.50149253731342, "grad_norm": 23.89835548400879, "learning_rate": 9.278388278388279e-06, "loss": 40.1416, "step": 5439 }, { "epoch": 129.52537313432836, "grad_norm": 21.517475128173828, "learning_rate": 9.276556776556777e-06, "loss": 41.05, "step": 5440 }, { "epoch": 129.54925373134327, "grad_norm": 17.225387573242188, "learning_rate": 9.274725274725277e-06, "loss": 41.058, "step": 5441 }, { "epoch": 129.5731343283582, "grad_norm": 17.844186782836914, "learning_rate": 9.272893772893774e-06, "loss": 41.5639, "step": 5442 }, { "epoch": 129.59701492537314, "grad_norm": 18.40740394592285, "learning_rate": 9.271062271062272e-06, "loss": 40.1886, "step": 5443 }, { "epoch": 129.62089552238805, "grad_norm": 16.104562759399414, "learning_rate": 9.26923076923077e-06, "loss": 40.9611, "step": 5444 }, { "epoch": 129.644776119403, "grad_norm": 15.872597694396973, "learning_rate": 9.267399267399268e-06, "loss": 41.0908, "step": 5445 }, { "epoch": 129.6686567164179, "grad_norm": 16.474458694458008, "learning_rate": 9.265567765567766e-06, "loss": 39.5688, "step": 5446 }, { "epoch": 129.69253731343284, "grad_norm": 18.232454299926758, "learning_rate": 9.263736263736265e-06, "loss": 40.8028, "step": 5447 }, { "epoch": 129.71641791044777, "grad_norm": 16.598379135131836, "learning_rate": 9.261904761904763e-06, "loss": 40.5047, "step": 5448 }, { "epoch": 129.74029850746268, "grad_norm": 19.88945770263672, "learning_rate": 9.260073260073261e-06, "loss": 41.5815, "step": 5449 }, { "epoch": 129.76417910447762, "grad_norm": 18.88849449157715, "learning_rate": 9.258241758241759e-06, "loss": 41.1635, "step": 5450 }, { "epoch": 129.78805970149253, "grad_norm": 16.19620704650879, "learning_rate": 9.256410256410257e-06, "loss": 39.988, "step": 5451 }, { "epoch": 129.81194029850747, "grad_norm": 17.755510330200195, "learning_rate": 9.254578754578757e-06, "loss": 39.5852, "step": 5452 }, { "epoch": 129.83582089552237, "grad_norm": 18.566909790039062, "learning_rate": 9.252747252747254e-06, "loss": 40.5909, "step": 5453 }, { "epoch": 129.8597014925373, "grad_norm": NaN, "learning_rate": 9.250915750915752e-06, "loss": 36.0401, "step": 5454 }, { "epoch": 129.88358208955225, "grad_norm": 16.708894729614258, "learning_rate": 9.250915750915752e-06, "loss": 40.8427, "step": 5455 }, { "epoch": 129.90746268656716, "grad_norm": 18.091861724853516, "learning_rate": 9.24908424908425e-06, "loss": 40.9939, "step": 5456 }, { "epoch": 129.9313432835821, "grad_norm": 19.519044876098633, "learning_rate": 9.247252747252748e-06, "loss": 40.0013, "step": 5457 }, { "epoch": 129.955223880597, "grad_norm": 15.587594032287598, "learning_rate": 9.245421245421246e-06, "loss": 40.2778, "step": 5458 }, { "epoch": 129.97910447761194, "grad_norm": 19.010610580444336, "learning_rate": 9.243589743589745e-06, "loss": 40.7324, "step": 5459 }, { "epoch": 130.0, "grad_norm": 16.66925048828125, "learning_rate": 9.241758241758243e-06, "loss": 35.6576, "step": 5460 }, { "epoch": 130.0, "step": 5460, "total_flos": 2.6841554727339034e+17, "train_loss": 3.138686427441272, "train_runtime": 12817.2093, "train_samples_per_second": 54.283, "train_steps_per_second": 0.426 }, { "epoch": 130.02388059701494, "grad_norm": 18.575708389282227, "learning_rate": 1e-05, "loss": 40.4192, "step": 5461 }, { "epoch": 130.04776119402985, "grad_norm": Infinity, "learning_rate": 9.998299319727893e-06, "loss": 45.3289, "step": 5462 }, { "epoch": 130.07164179104478, "grad_norm": 228.45680236816406, "learning_rate": 9.998299319727893e-06, "loss": 44.984, "step": 5463 }, { "epoch": 130.0955223880597, "grad_norm": 129.42657470703125, "learning_rate": 9.996598639455783e-06, "loss": 44.7713, "step": 5464 }, { "epoch": 130.11940298507463, "grad_norm": 55.86802291870117, "learning_rate": 9.994897959183675e-06, "loss": 43.4058, "step": 5465 }, { "epoch": 130.14328358208957, "grad_norm": 93.52910614013672, "learning_rate": 9.993197278911566e-06, "loss": 42.3555, "step": 5466 }, { "epoch": 130.16716417910447, "grad_norm": 49.01381301879883, "learning_rate": 9.991496598639456e-06, "loss": 41.0505, "step": 5467 }, { "epoch": 130.1910447761194, "grad_norm": 54.29282760620117, "learning_rate": 9.989795918367348e-06, "loss": 40.938, "step": 5468 }, { "epoch": 130.21492537313432, "grad_norm": 37.95635223388672, "learning_rate": 9.988095238095239e-06, "loss": 40.3188, "step": 5469 }, { "epoch": 130.23880597014926, "grad_norm": 36.652523040771484, "learning_rate": 9.98639455782313e-06, "loss": 41.5117, "step": 5470 }, { "epoch": 130.26268656716417, "grad_norm": 40.90021896362305, "learning_rate": 9.984693877551021e-06, "loss": 42.5635, "step": 5471 }, { "epoch": 130.2865671641791, "grad_norm": 28.823503494262695, "learning_rate": 9.982993197278913e-06, "loss": 41.404, "step": 5472 }, { "epoch": 130.31044776119404, "grad_norm": 24.62152862548828, "learning_rate": 9.981292517006804e-06, "loss": 40.9372, "step": 5473 }, { "epoch": 130.33432835820895, "grad_norm": 29.644268035888672, "learning_rate": 9.979591836734694e-06, "loss": 40.5455, "step": 5474 }, { "epoch": 130.3582089552239, "grad_norm": 21.859779357910156, "learning_rate": 9.977891156462586e-06, "loss": 41.4169, "step": 5475 }, { "epoch": 130.3820895522388, "grad_norm": 23.489789962768555, "learning_rate": 9.976190476190477e-06, "loss": 39.8592, "step": 5476 }, { "epoch": 130.40597014925373, "grad_norm": 18.39851951599121, "learning_rate": 9.974489795918369e-06, "loss": 39.969, "step": 5477 }, { "epoch": 130.42985074626867, "grad_norm": 25.369873046875, "learning_rate": 9.972789115646259e-06, "loss": 39.7739, "step": 5478 }, { "epoch": 130.45373134328358, "grad_norm": 22.13943862915039, "learning_rate": 9.97108843537415e-06, "loss": 40.0817, "step": 5479 }, { "epoch": 130.47761194029852, "grad_norm": 22.7308292388916, "learning_rate": 9.969387755102042e-06, "loss": 41.4501, "step": 5480 }, { "epoch": 130.50149253731342, "grad_norm": 16.09027671813965, "learning_rate": 9.967687074829932e-06, "loss": 41.1944, "step": 5481 }, { "epoch": 130.52537313432836, "grad_norm": 20.12171745300293, "learning_rate": 9.965986394557824e-06, "loss": 40.5477, "step": 5482 }, { "epoch": 130.54925373134327, "grad_norm": 18.88404655456543, "learning_rate": 9.964285714285714e-06, "loss": 39.402, "step": 5483 }, { "epoch": 130.5731343283582, "grad_norm": 20.32000732421875, "learning_rate": 9.962585034013607e-06, "loss": 40.6678, "step": 5484 }, { "epoch": 130.59701492537314, "grad_norm": 20.351774215698242, "learning_rate": 9.960884353741499e-06, "loss": 40.075, "step": 5485 }, { "epoch": 130.62089552238805, "grad_norm": 14.881600379943848, "learning_rate": 9.959183673469387e-06, "loss": 40.0113, "step": 5486 }, { "epoch": 130.644776119403, "grad_norm": 21.500431060791016, "learning_rate": 9.95748299319728e-06, "loss": 39.5136, "step": 5487 }, { "epoch": 130.6686567164179, "grad_norm": 20.59417152404785, "learning_rate": 9.955782312925172e-06, "loss": 40.3664, "step": 5488 }, { "epoch": 130.69253731343284, "grad_norm": 20.141138076782227, "learning_rate": 9.954081632653062e-06, "loss": 40.8991, "step": 5489 }, { "epoch": 130.71641791044777, "grad_norm": 16.41176986694336, "learning_rate": 9.952380952380954e-06, "loss": 40.3613, "step": 5490 }, { "epoch": 130.74029850746268, "grad_norm": 20.832176208496094, "learning_rate": 9.950680272108844e-06, "loss": 40.709, "step": 5491 }, { "epoch": 130.76417910447762, "grad_norm": 18.355520248413086, "learning_rate": 9.948979591836737e-06, "loss": 41.1332, "step": 5492 }, { "epoch": 130.78805970149253, "grad_norm": 21.0073299407959, "learning_rate": 9.947278911564627e-06, "loss": 40.496, "step": 5493 }, { "epoch": 130.81194029850747, "grad_norm": 18.184412002563477, "learning_rate": 9.945578231292517e-06, "loss": 40.9514, "step": 5494 }, { "epoch": 130.83582089552237, "grad_norm": 23.272981643676758, "learning_rate": 9.94387755102041e-06, "loss": 40.2932, "step": 5495 }, { "epoch": 130.8597014925373, "grad_norm": 16.066865921020508, "learning_rate": 9.9421768707483e-06, "loss": 41.2145, "step": 5496 }, { "epoch": 130.88358208955225, "grad_norm": 19.863813400268555, "learning_rate": 9.940476190476192e-06, "loss": 40.9969, "step": 5497 }, { "epoch": 130.90746268656716, "grad_norm": 20.84225082397461, "learning_rate": 9.938775510204082e-06, "loss": 40.1575, "step": 5498 }, { "epoch": 130.9313432835821, "grad_norm": 16.452865600585938, "learning_rate": 9.937074829931974e-06, "loss": 38.9115, "step": 5499 }, { "epoch": 130.955223880597, "grad_norm": 19.1783390045166, "learning_rate": 9.935374149659865e-06, "loss": 40.7441, "step": 5500 }, { "epoch": 130.97910447761194, "grad_norm": 21.94544219970703, "learning_rate": 9.933673469387755e-06, "loss": 41.8275, "step": 5501 }, { "epoch": 131.0, "grad_norm": 13.472136497497559, "learning_rate": 9.931972789115647e-06, "loss": 34.5508, "step": 5502 }, { "epoch": 131.02388059701494, "grad_norm": 18.82528305053711, "learning_rate": 9.930272108843538e-06, "loss": 41.3352, "step": 5503 }, { "epoch": 131.04776119402985, "grad_norm": 16.815523147583008, "learning_rate": 9.92857142857143e-06, "loss": 39.1606, "step": 5504 }, { "epoch": 131.07164179104478, "grad_norm": 18.014087677001953, "learning_rate": 9.92687074829932e-06, "loss": 41.389, "step": 5505 }, { "epoch": 131.0955223880597, "grad_norm": 18.153976440429688, "learning_rate": 9.92517006802721e-06, "loss": 41.0835, "step": 5506 }, { "epoch": 131.11940298507463, "grad_norm": 16.97452163696289, "learning_rate": 9.923469387755103e-06, "loss": 41.149, "step": 5507 }, { "epoch": 131.14328358208957, "grad_norm": 16.83989143371582, "learning_rate": 9.921768707482993e-06, "loss": 40.9826, "step": 5508 }, { "epoch": 131.16716417910447, "grad_norm": 15.62459659576416, "learning_rate": 9.920068027210885e-06, "loss": 41.0703, "step": 5509 }, { "epoch": 131.1910447761194, "grad_norm": 14.438183784484863, "learning_rate": 9.918367346938776e-06, "loss": 41.3628, "step": 5510 }, { "epoch": 131.21492537313432, "grad_norm": 23.413602828979492, "learning_rate": 9.916666666666668e-06, "loss": 40.0985, "step": 5511 }, { "epoch": 131.23880597014926, "grad_norm": 19.558998107910156, "learning_rate": 9.91496598639456e-06, "loss": 40.2111, "step": 5512 }, { "epoch": 131.26268656716417, "grad_norm": 12.165032386779785, "learning_rate": 9.913265306122449e-06, "loss": 40.6546, "step": 5513 }, { "epoch": 131.2865671641791, "grad_norm": 18.257869720458984, "learning_rate": 9.91156462585034e-06, "loss": 42.2008, "step": 5514 }, { "epoch": 131.31044776119404, "grad_norm": 21.828651428222656, "learning_rate": 9.909863945578233e-06, "loss": 39.9445, "step": 5515 }, { "epoch": 131.33432835820895, "grad_norm": 16.42722511291504, "learning_rate": 9.908163265306123e-06, "loss": 40.4844, "step": 5516 }, { "epoch": 131.3582089552239, "grad_norm": 15.241820335388184, "learning_rate": 9.906462585034015e-06, "loss": 39.5317, "step": 5517 }, { "epoch": 131.3820895522388, "grad_norm": 14.893516540527344, "learning_rate": 9.904761904761906e-06, "loss": 40.9785, "step": 5518 }, { "epoch": 131.40597014925373, "grad_norm": 15.520328521728516, "learning_rate": 9.903061224489798e-06, "loss": 40.2672, "step": 5519 }, { "epoch": 131.42985074626867, "grad_norm": 20.18976402282715, "learning_rate": 9.901360544217688e-06, "loss": 39.9149, "step": 5520 }, { "epoch": 131.45373134328358, "grad_norm": 17.364437103271484, "learning_rate": 9.899659863945579e-06, "loss": 39.2209, "step": 5521 }, { "epoch": 131.47761194029852, "grad_norm": 18.58165168762207, "learning_rate": 9.89795918367347e-06, "loss": 41.0448, "step": 5522 }, { "epoch": 131.50149253731342, "grad_norm": 14.185492515563965, "learning_rate": 9.896258503401361e-06, "loss": 40.0128, "step": 5523 }, { "epoch": 131.52537313432836, "grad_norm": 18.741228103637695, "learning_rate": 9.894557823129253e-06, "loss": 40.4496, "step": 5524 }, { "epoch": 131.54925373134327, "grad_norm": 16.92427635192871, "learning_rate": 9.892857142857143e-06, "loss": 39.8989, "step": 5525 }, { "epoch": 131.5731343283582, "grad_norm": 18.248445510864258, "learning_rate": 9.891156462585036e-06, "loss": 39.7473, "step": 5526 }, { "epoch": 131.59701492537314, "grad_norm": 18.88313865661621, "learning_rate": 9.889455782312926e-06, "loss": 40.1553, "step": 5527 }, { "epoch": 131.62089552238805, "grad_norm": 19.88404655456543, "learning_rate": 9.887755102040816e-06, "loss": 40.9204, "step": 5528 }, { "epoch": 131.644776119403, "grad_norm": 17.367191314697266, "learning_rate": 9.886054421768708e-06, "loss": 39.9127, "step": 5529 }, { "epoch": 131.6686567164179, "grad_norm": 18.826900482177734, "learning_rate": 9.884353741496599e-06, "loss": 40.3812, "step": 5530 }, { "epoch": 131.69253731343284, "grad_norm": 16.4368953704834, "learning_rate": 9.882653061224491e-06, "loss": 40.536, "step": 5531 }, { "epoch": 131.71641791044777, "grad_norm": 17.32594108581543, "learning_rate": 9.880952380952381e-06, "loss": 41.1266, "step": 5532 }, { "epoch": 131.74029850746268, "grad_norm": 25.614294052124023, "learning_rate": 9.879251700680272e-06, "loss": 40.5684, "step": 5533 }, { "epoch": 131.76417910447762, "grad_norm": 18.330366134643555, "learning_rate": 9.877551020408164e-06, "loss": 40.9372, "step": 5534 }, { "epoch": 131.78805970149253, "grad_norm": 17.743070602416992, "learning_rate": 9.875850340136054e-06, "loss": 40.3854, "step": 5535 }, { "epoch": 131.81194029850747, "grad_norm": 20.566457748413086, "learning_rate": 9.874149659863946e-06, "loss": 40.7082, "step": 5536 }, { "epoch": 131.83582089552237, "grad_norm": 24.459489822387695, "learning_rate": 9.872448979591838e-06, "loss": 41.2391, "step": 5537 }, { "epoch": 131.8597014925373, "grad_norm": 15.706886291503906, "learning_rate": 9.870748299319729e-06, "loss": 39.2229, "step": 5538 }, { "epoch": 131.88358208955225, "grad_norm": 18.37516975402832, "learning_rate": 9.869047619047621e-06, "loss": 39.3149, "step": 5539 }, { "epoch": 131.90746268656716, "grad_norm": 18.098176956176758, "learning_rate": 9.867346938775511e-06, "loss": 39.4227, "step": 5540 }, { "epoch": 131.9313432835821, "grad_norm": 14.898340225219727, "learning_rate": 9.865646258503402e-06, "loss": 39.4192, "step": 5541 }, { "epoch": 131.955223880597, "grad_norm": 21.42721176147461, "learning_rate": 9.863945578231294e-06, "loss": 40.879, "step": 5542 }, { "epoch": 131.97910447761194, "grad_norm": 19.07784652709961, "learning_rate": 9.862244897959184e-06, "loss": 40.1433, "step": 5543 }, { "epoch": 132.0, "grad_norm": 16.04290199279785, "learning_rate": 9.860544217687076e-06, "loss": 34.6443, "step": 5544 }, { "epoch": 132.02388059701494, "grad_norm": 17.641210556030273, "learning_rate": 9.858843537414967e-06, "loss": 40.6522, "step": 5545 }, { "epoch": 132.04776119402985, "grad_norm": 22.368637084960938, "learning_rate": 9.857142857142859e-06, "loss": 41.3016, "step": 5546 }, { "epoch": 132.07164179104478, "grad_norm": 19.166706085205078, "learning_rate": 9.85544217687075e-06, "loss": 40.4856, "step": 5547 }, { "epoch": 132.0955223880597, "grad_norm": 20.525333404541016, "learning_rate": 9.85374149659864e-06, "loss": 41.073, "step": 5548 }, { "epoch": 132.11940298507463, "grad_norm": 19.90342140197754, "learning_rate": 9.852040816326532e-06, "loss": 39.3624, "step": 5549 }, { "epoch": 132.14328358208957, "grad_norm": 15.503653526306152, "learning_rate": 9.850340136054422e-06, "loss": 41.8662, "step": 5550 }, { "epoch": 132.16716417910447, "grad_norm": 15.871750831604004, "learning_rate": 9.848639455782314e-06, "loss": 39.9852, "step": 5551 }, { "epoch": 132.1910447761194, "grad_norm": 25.539695739746094, "learning_rate": 9.846938775510205e-06, "loss": 38.6591, "step": 5552 }, { "epoch": 132.21492537313432, "grad_norm": 20.75281524658203, "learning_rate": 9.845238095238097e-06, "loss": 40.6497, "step": 5553 }, { "epoch": 132.23880597014926, "grad_norm": 17.729764938354492, "learning_rate": 9.843537414965987e-06, "loss": 39.8891, "step": 5554 }, { "epoch": 132.26268656716417, "grad_norm": 16.33675765991211, "learning_rate": 9.841836734693878e-06, "loss": 41.0467, "step": 5555 }, { "epoch": 132.2865671641791, "grad_norm": 16.76603126525879, "learning_rate": 9.84013605442177e-06, "loss": 41.4239, "step": 5556 }, { "epoch": 132.31044776119404, "grad_norm": 20.384475708007812, "learning_rate": 9.83843537414966e-06, "loss": 40.4335, "step": 5557 }, { "epoch": 132.33432835820895, "grad_norm": 16.604625701904297, "learning_rate": 9.836734693877552e-06, "loss": 40.5462, "step": 5558 }, { "epoch": 132.3582089552239, "grad_norm": 19.941377639770508, "learning_rate": 9.835034013605444e-06, "loss": 41.7404, "step": 5559 }, { "epoch": 132.3820895522388, "grad_norm": 15.349913597106934, "learning_rate": 9.833333333333333e-06, "loss": 38.5686, "step": 5560 }, { "epoch": 132.40597014925373, "grad_norm": 21.03326988220215, "learning_rate": 9.831632653061225e-06, "loss": 41.3306, "step": 5561 }, { "epoch": 132.42985074626867, "grad_norm": 18.98489761352539, "learning_rate": 9.829931972789115e-06, "loss": 39.5212, "step": 5562 }, { "epoch": 132.45373134328358, "grad_norm": 15.476447105407715, "learning_rate": 9.828231292517008e-06, "loss": 39.31, "step": 5563 }, { "epoch": 132.47761194029852, "grad_norm": 17.050857543945312, "learning_rate": 9.8265306122449e-06, "loss": 40.1559, "step": 5564 }, { "epoch": 132.50149253731342, "grad_norm": 17.630809783935547, "learning_rate": 9.82482993197279e-06, "loss": 39.8047, "step": 5565 }, { "epoch": 132.52537313432836, "grad_norm": NaN, "learning_rate": 9.823129251700682e-06, "loss": 54.2096, "step": 5566 }, { "epoch": 132.54925373134327, "grad_norm": 18.909269332885742, "learning_rate": 9.823129251700682e-06, "loss": 40.1987, "step": 5567 }, { "epoch": 132.5731343283582, "grad_norm": 20.534330368041992, "learning_rate": 9.821428571428573e-06, "loss": 40.7122, "step": 5568 }, { "epoch": 132.59701492537314, "grad_norm": 16.048946380615234, "learning_rate": 9.819727891156463e-06, "loss": 40.2584, "step": 5569 }, { "epoch": 132.62089552238805, "grad_norm": 14.615914344787598, "learning_rate": 9.818027210884355e-06, "loss": 40.1147, "step": 5570 }, { "epoch": 132.644776119403, "grad_norm": 16.347827911376953, "learning_rate": 9.816326530612245e-06, "loss": 41.0445, "step": 5571 }, { "epoch": 132.6686567164179, "grad_norm": 20.02432632446289, "learning_rate": 9.814625850340137e-06, "loss": 40.1337, "step": 5572 }, { "epoch": 132.69253731343284, "grad_norm": 18.087976455688477, "learning_rate": 9.812925170068028e-06, "loss": 39.9454, "step": 5573 }, { "epoch": 132.71641791044777, "grad_norm": 16.806800842285156, "learning_rate": 9.81122448979592e-06, "loss": 40.7469, "step": 5574 }, { "epoch": 132.74029850746268, "grad_norm": 14.957366943359375, "learning_rate": 9.80952380952381e-06, "loss": 41.9708, "step": 5575 }, { "epoch": 132.76417910447762, "grad_norm": 15.429438591003418, "learning_rate": 9.8078231292517e-06, "loss": 40.5727, "step": 5576 }, { "epoch": 132.78805970149253, "grad_norm": 18.437835693359375, "learning_rate": 9.806122448979593e-06, "loss": 39.3392, "step": 5577 }, { "epoch": 132.81194029850747, "grad_norm": 23.49526023864746, "learning_rate": 9.804421768707483e-06, "loss": 40.8007, "step": 5578 }, { "epoch": 132.83582089552237, "grad_norm": 15.580110549926758, "learning_rate": 9.802721088435375e-06, "loss": 40.2113, "step": 5579 }, { "epoch": 132.8597014925373, "grad_norm": 13.494383811950684, "learning_rate": 9.801020408163266e-06, "loss": 39.5501, "step": 5580 }, { "epoch": 132.88358208955225, "grad_norm": 14.148122787475586, "learning_rate": 9.799319727891158e-06, "loss": 39.5385, "step": 5581 }, { "epoch": 132.90746268656716, "grad_norm": 14.981057167053223, "learning_rate": 9.797619047619048e-06, "loss": 40.1832, "step": 5582 }, { "epoch": 132.9313432835821, "grad_norm": 17.651594161987305, "learning_rate": 9.795918367346939e-06, "loss": 40.8822, "step": 5583 }, { "epoch": 132.955223880597, "grad_norm": 23.53675079345703, "learning_rate": 9.79421768707483e-06, "loss": 40.4374, "step": 5584 }, { "epoch": 132.97910447761194, "grad_norm": 14.797532081604004, "learning_rate": 9.792517006802721e-06, "loss": 40.3035, "step": 5585 }, { "epoch": 133.0, "grad_norm": 19.286834716796875, "learning_rate": 9.790816326530613e-06, "loss": 35.0022, "step": 5586 }, { "epoch": 133.02388059701494, "grad_norm": 25.947200775146484, "learning_rate": 9.789115646258505e-06, "loss": 40.0884, "step": 5587 }, { "epoch": 133.04776119402985, "grad_norm": 17.286386489868164, "learning_rate": 9.787414965986394e-06, "loss": 40.6761, "step": 5588 }, { "epoch": 133.07164179104478, "grad_norm": 16.327795028686523, "learning_rate": 9.785714285714286e-06, "loss": 39.5775, "step": 5589 }, { "epoch": 133.0955223880597, "grad_norm": 25.301265716552734, "learning_rate": 9.784013605442178e-06, "loss": 39.6754, "step": 5590 }, { "epoch": 133.11940298507463, "grad_norm": 18.68819236755371, "learning_rate": 9.782312925170069e-06, "loss": 40.444, "step": 5591 }, { "epoch": 133.14328358208957, "grad_norm": 16.839736938476562, "learning_rate": 9.78061224489796e-06, "loss": 39.586, "step": 5592 }, { "epoch": 133.16716417910447, "grad_norm": 27.723005294799805, "learning_rate": 9.778911564625851e-06, "loss": 40.631, "step": 5593 }, { "epoch": 133.1910447761194, "grad_norm": 16.834030151367188, "learning_rate": 9.777210884353743e-06, "loss": 39.9121, "step": 5594 }, { "epoch": 133.21492537313432, "grad_norm": 16.289016723632812, "learning_rate": 9.775510204081634e-06, "loss": 39.8342, "step": 5595 }, { "epoch": 133.23880597014926, "grad_norm": 23.45367431640625, "learning_rate": 9.773809523809524e-06, "loss": 39.8418, "step": 5596 }, { "epoch": 133.26268656716417, "grad_norm": 18.50150489807129, "learning_rate": 9.772108843537416e-06, "loss": 40.8921, "step": 5597 }, { "epoch": 133.2865671641791, "grad_norm": 15.655564308166504, "learning_rate": 9.770408163265307e-06, "loss": 39.9306, "step": 5598 }, { "epoch": 133.31044776119404, "grad_norm": 23.770095825195312, "learning_rate": 9.768707482993199e-06, "loss": 40.686, "step": 5599 }, { "epoch": 133.33432835820895, "grad_norm": 21.083984375, "learning_rate": 9.767006802721089e-06, "loss": 40.5774, "step": 5600 }, { "epoch": 133.3582089552239, "grad_norm": 14.010787010192871, "learning_rate": 9.765306122448981e-06, "loss": 40.3888, "step": 5601 }, { "epoch": 133.3820895522388, "grad_norm": 29.777660369873047, "learning_rate": 9.763605442176872e-06, "loss": 41.4408, "step": 5602 }, { "epoch": 133.40597014925373, "grad_norm": 19.067794799804688, "learning_rate": 9.761904761904762e-06, "loss": 40.1208, "step": 5603 }, { "epoch": 133.42985074626867, "grad_norm": 30.848791122436523, "learning_rate": 9.760204081632654e-06, "loss": 40.7094, "step": 5604 }, { "epoch": 133.45373134328358, "grad_norm": 29.024898529052734, "learning_rate": 9.758503401360544e-06, "loss": 40.7004, "step": 5605 }, { "epoch": 133.47761194029852, "grad_norm": 22.88898468017578, "learning_rate": 9.756802721088437e-06, "loss": 40.257, "step": 5606 }, { "epoch": 133.50149253731342, "grad_norm": 39.7208137512207, "learning_rate": 9.755102040816327e-06, "loss": 38.6114, "step": 5607 }, { "epoch": 133.52537313432836, "grad_norm": 30.543888092041016, "learning_rate": 9.753401360544217e-06, "loss": 40.1867, "step": 5608 }, { "epoch": 133.54925373134327, "grad_norm": 36.19719314575195, "learning_rate": 9.75170068027211e-06, "loss": 40.5948, "step": 5609 }, { "epoch": 133.5731343283582, "grad_norm": 32.90020751953125, "learning_rate": 9.75e-06, "loss": 39.8139, "step": 5610 }, { "epoch": 133.59701492537314, "grad_norm": 34.50712585449219, "learning_rate": 9.748299319727892e-06, "loss": 40.1155, "step": 5611 }, { "epoch": 133.62089552238805, "grad_norm": 32.24649429321289, "learning_rate": 9.746598639455784e-06, "loss": 40.149, "step": 5612 }, { "epoch": 133.644776119403, "grad_norm": 35.7637939453125, "learning_rate": 9.744897959183674e-06, "loss": 39.7303, "step": 5613 }, { "epoch": 133.6686567164179, "grad_norm": 31.09421157836914, "learning_rate": 9.743197278911567e-06, "loss": 41.0925, "step": 5614 }, { "epoch": 133.69253731343284, "grad_norm": 37.82075881958008, "learning_rate": 9.741496598639457e-06, "loss": 39.9909, "step": 5615 }, { "epoch": 133.71641791044777, "grad_norm": 33.92351150512695, "learning_rate": 9.739795918367347e-06, "loss": 40.0986, "step": 5616 }, { "epoch": 133.74029850746268, "grad_norm": 29.645198822021484, "learning_rate": 9.73809523809524e-06, "loss": 41.5591, "step": 5617 }, { "epoch": 133.76417910447762, "grad_norm": 24.506332397460938, "learning_rate": 9.73639455782313e-06, "loss": 41.3366, "step": 5618 }, { "epoch": 133.78805970149253, "grad_norm": 38.3758544921875, "learning_rate": 9.734693877551022e-06, "loss": 41.016, "step": 5619 }, { "epoch": 133.81194029850747, "grad_norm": 33.210044860839844, "learning_rate": 9.732993197278912e-06, "loss": 40.9384, "step": 5620 }, { "epoch": 133.83582089552237, "grad_norm": 33.01791000366211, "learning_rate": 9.731292517006804e-06, "loss": 39.6658, "step": 5621 }, { "epoch": 133.8597014925373, "grad_norm": 34.2905158996582, "learning_rate": 9.729591836734695e-06, "loss": 40.4843, "step": 5622 }, { "epoch": 133.88358208955225, "grad_norm": 29.771053314208984, "learning_rate": 9.727891156462585e-06, "loss": 40.2978, "step": 5623 }, { "epoch": 133.90746268656716, "grad_norm": 30.07183837890625, "learning_rate": 9.726190476190477e-06, "loss": 40.2479, "step": 5624 }, { "epoch": 133.9313432835821, "grad_norm": 30.720661163330078, "learning_rate": 9.724489795918368e-06, "loss": 39.5252, "step": 5625 }, { "epoch": 133.955223880597, "grad_norm": 27.56161117553711, "learning_rate": 9.72278911564626e-06, "loss": 40.4758, "step": 5626 }, { "epoch": 133.97910447761194, "grad_norm": 32.74715805053711, "learning_rate": 9.72108843537415e-06, "loss": 40.6321, "step": 5627 }, { "epoch": 134.0, "grad_norm": 25.854846954345703, "learning_rate": 9.719387755102042e-06, "loss": 34.2593, "step": 5628 }, { "epoch": 134.02388059701494, "grad_norm": 33.82636642456055, "learning_rate": 9.717687074829933e-06, "loss": 40.2388, "step": 5629 }, { "epoch": 134.04776119402985, "grad_norm": 29.441238403320312, "learning_rate": 9.715986394557823e-06, "loss": 40.5805, "step": 5630 }, { "epoch": 134.07164179104478, "grad_norm": 29.590694427490234, "learning_rate": 9.714285714285715e-06, "loss": 38.7185, "step": 5631 }, { "epoch": 134.0955223880597, "grad_norm": 26.878095626831055, "learning_rate": 9.712585034013606e-06, "loss": 41.1294, "step": 5632 }, { "epoch": 134.11940298507463, "grad_norm": 31.240013122558594, "learning_rate": 9.710884353741498e-06, "loss": 40.0814, "step": 5633 }, { "epoch": 134.14328358208957, "grad_norm": 27.573955535888672, "learning_rate": 9.70918367346939e-06, "loss": 40.6451, "step": 5634 }, { "epoch": 134.16716417910447, "grad_norm": 35.54013442993164, "learning_rate": 9.707482993197278e-06, "loss": 41.3382, "step": 5635 }, { "epoch": 134.1910447761194, "grad_norm": 33.757408142089844, "learning_rate": 9.70578231292517e-06, "loss": 39.4768, "step": 5636 }, { "epoch": 134.21492537313432, "grad_norm": 29.37469482421875, "learning_rate": 9.704081632653061e-06, "loss": 39.8421, "step": 5637 }, { "epoch": 134.23880597014926, "grad_norm": 29.495834350585938, "learning_rate": 9.702380952380953e-06, "loss": 39.2846, "step": 5638 }, { "epoch": 134.26268656716417, "grad_norm": 28.723642349243164, "learning_rate": 9.700680272108845e-06, "loss": 39.4364, "step": 5639 }, { "epoch": 134.2865671641791, "grad_norm": 25.51703453063965, "learning_rate": 9.698979591836736e-06, "loss": 39.4578, "step": 5640 }, { "epoch": 134.31044776119404, "grad_norm": 34.16410446166992, "learning_rate": 9.697278911564628e-06, "loss": 40.5937, "step": 5641 }, { "epoch": 134.33432835820895, "grad_norm": 30.546810150146484, "learning_rate": 9.695578231292518e-06, "loss": 39.912, "step": 5642 }, { "epoch": 134.3582089552239, "grad_norm": 30.73379898071289, "learning_rate": 9.693877551020408e-06, "loss": 41.5471, "step": 5643 }, { "epoch": 134.3820895522388, "grad_norm": 30.759567260742188, "learning_rate": 9.6921768707483e-06, "loss": 40.3315, "step": 5644 }, { "epoch": 134.40597014925373, "grad_norm": 28.02313995361328, "learning_rate": 9.690476190476191e-06, "loss": 40.2851, "step": 5645 }, { "epoch": 134.42985074626867, "grad_norm": 24.580036163330078, "learning_rate": 9.688775510204083e-06, "loss": 40.9942, "step": 5646 }, { "epoch": 134.45373134328358, "grad_norm": 32.100738525390625, "learning_rate": 9.687074829931973e-06, "loss": 40.2184, "step": 5647 }, { "epoch": 134.47761194029852, "grad_norm": 30.24114418029785, "learning_rate": 9.685374149659866e-06, "loss": 40.3371, "step": 5648 }, { "epoch": 134.50149253731342, "grad_norm": 32.3997917175293, "learning_rate": 9.683673469387756e-06, "loss": 40.7586, "step": 5649 }, { "epoch": 134.52537313432836, "grad_norm": 25.58622169494629, "learning_rate": 9.681972789115646e-06, "loss": 40.1238, "step": 5650 }, { "epoch": 134.54925373134327, "grad_norm": 32.82097244262695, "learning_rate": 9.680272108843538e-06, "loss": 40.6563, "step": 5651 }, { "epoch": 134.5731343283582, "grad_norm": 27.216670989990234, "learning_rate": 9.678571428571429e-06, "loss": 38.6664, "step": 5652 }, { "epoch": 134.59701492537314, "grad_norm": 30.91448211669922, "learning_rate": 9.676870748299321e-06, "loss": 40.0405, "step": 5653 }, { "epoch": 134.62089552238805, "grad_norm": 27.467674255371094, "learning_rate": 9.675170068027211e-06, "loss": 40.8484, "step": 5654 }, { "epoch": 134.644776119403, "grad_norm": 33.313507080078125, "learning_rate": 9.673469387755103e-06, "loss": 40.5139, "step": 5655 }, { "epoch": 134.6686567164179, "grad_norm": 28.826663970947266, "learning_rate": 9.671768707482994e-06, "loss": 39.9436, "step": 5656 }, { "epoch": 134.69253731343284, "grad_norm": 31.69590950012207, "learning_rate": 9.670068027210884e-06, "loss": 40.458, "step": 5657 }, { "epoch": 134.71641791044777, "grad_norm": 24.371248245239258, "learning_rate": 9.668367346938776e-06, "loss": 40.4455, "step": 5658 }, { "epoch": 134.74029850746268, "grad_norm": 31.334495544433594, "learning_rate": 9.666666666666667e-06, "loss": 40.7902, "step": 5659 }, { "epoch": 134.76417910447762, "grad_norm": 27.586498260498047, "learning_rate": 9.664965986394559e-06, "loss": 40.5867, "step": 5660 }, { "epoch": 134.78805970149253, "grad_norm": 28.80315399169922, "learning_rate": 9.663265306122451e-06, "loss": 39.4688, "step": 5661 }, { "epoch": 134.81194029850747, "grad_norm": 24.875734329223633, "learning_rate": 9.66156462585034e-06, "loss": 39.2296, "step": 5662 }, { "epoch": 134.83582089552237, "grad_norm": 26.77202033996582, "learning_rate": 9.659863945578232e-06, "loss": 41.5271, "step": 5663 }, { "epoch": 134.8597014925373, "grad_norm": 21.632478713989258, "learning_rate": 9.658163265306124e-06, "loss": 39.7494, "step": 5664 }, { "epoch": 134.88358208955225, "grad_norm": 33.85261154174805, "learning_rate": 9.656462585034014e-06, "loss": 39.4471, "step": 5665 }, { "epoch": 134.90746268656716, "grad_norm": 27.42376708984375, "learning_rate": 9.654761904761906e-06, "loss": 40.2511, "step": 5666 }, { "epoch": 134.9313432835821, "grad_norm": 29.52701187133789, "learning_rate": 9.653061224489797e-06, "loss": 39.9535, "step": 5667 }, { "epoch": 134.955223880597, "grad_norm": 25.98667335510254, "learning_rate": 9.651360544217689e-06, "loss": 40.6712, "step": 5668 }, { "epoch": 134.97910447761194, "grad_norm": 26.950590133666992, "learning_rate": 9.64965986394558e-06, "loss": 40.4322, "step": 5669 }, { "epoch": 135.0, "grad_norm": 18.281841278076172, "learning_rate": 9.64795918367347e-06, "loss": 34.9402, "step": 5670 }, { "epoch": 135.02388059701494, "grad_norm": 30.714963912963867, "learning_rate": 9.646258503401362e-06, "loss": 40.2777, "step": 5671 }, { "epoch": 135.04776119402985, "grad_norm": 21.559858322143555, "learning_rate": 9.644557823129252e-06, "loss": 39.8327, "step": 5672 }, { "epoch": 135.07164179104478, "grad_norm": 27.76194953918457, "learning_rate": 9.642857142857144e-06, "loss": 40.0835, "step": 5673 }, { "epoch": 135.0955223880597, "grad_norm": 22.750877380371094, "learning_rate": 9.641156462585035e-06, "loss": 40.3429, "step": 5674 }, { "epoch": 135.11940298507463, "grad_norm": 28.511995315551758, "learning_rate": 9.639455782312927e-06, "loss": 39.3794, "step": 5675 }, { "epoch": 135.14328358208957, "grad_norm": 21.04129409790039, "learning_rate": 9.637755102040817e-06, "loss": 41.34, "step": 5676 }, { "epoch": 135.16716417910447, "grad_norm": 27.8126277923584, "learning_rate": 9.636054421768707e-06, "loss": 40.3671, "step": 5677 }, { "epoch": 135.1910447761194, "grad_norm": 23.500349044799805, "learning_rate": 9.6343537414966e-06, "loss": 40.235, "step": 5678 }, { "epoch": 135.21492537313432, "grad_norm": 25.186744689941406, "learning_rate": 9.63265306122449e-06, "loss": 40.7387, "step": 5679 }, { "epoch": 135.23880597014926, "grad_norm": 21.36899185180664, "learning_rate": 9.630952380952382e-06, "loss": 40.0717, "step": 5680 }, { "epoch": 135.26268656716417, "grad_norm": 23.584760665893555, "learning_rate": 9.629251700680272e-06, "loss": 40.1511, "step": 5681 }, { "epoch": 135.2865671641791, "grad_norm": 22.20633316040039, "learning_rate": 9.627551020408165e-06, "loss": 40.0655, "step": 5682 }, { "epoch": 135.31044776119404, "grad_norm": 19.99517822265625, "learning_rate": 9.625850340136055e-06, "loss": 39.8154, "step": 5683 }, { "epoch": 135.33432835820895, "grad_norm": 22.59499168395996, "learning_rate": 9.624149659863945e-06, "loss": 40.6277, "step": 5684 }, { "epoch": 135.3582089552239, "grad_norm": 17.33830451965332, "learning_rate": 9.622448979591837e-06, "loss": 39.4982, "step": 5685 }, { "epoch": 135.3820895522388, "grad_norm": 22.377470016479492, "learning_rate": 9.62074829931973e-06, "loss": 39.4303, "step": 5686 }, { "epoch": 135.40597014925373, "grad_norm": 22.994571685791016, "learning_rate": 9.61904761904762e-06, "loss": 38.643, "step": 5687 }, { "epoch": 135.42985074626867, "grad_norm": 17.39454460144043, "learning_rate": 9.617346938775512e-06, "loss": 41.7366, "step": 5688 }, { "epoch": 135.45373134328358, "grad_norm": 24.441268920898438, "learning_rate": 9.6156462585034e-06, "loss": 41.1602, "step": 5689 }, { "epoch": 135.47761194029852, "grad_norm": 16.182247161865234, "learning_rate": 9.613945578231293e-06, "loss": 40.4013, "step": 5690 }, { "epoch": 135.50149253731342, "grad_norm": 23.803049087524414, "learning_rate": 9.612244897959185e-06, "loss": 41.3252, "step": 5691 }, { "epoch": 135.52537313432836, "grad_norm": 20.565837860107422, "learning_rate": 9.610544217687075e-06, "loss": 40.3434, "step": 5692 }, { "epoch": 135.54925373134327, "grad_norm": 26.256967544555664, "learning_rate": 9.608843537414967e-06, "loss": 40.2281, "step": 5693 }, { "epoch": 135.5731343283582, "grad_norm": 18.350553512573242, "learning_rate": 9.607142857142858e-06, "loss": 39.1361, "step": 5694 }, { "epoch": 135.59701492537314, "grad_norm": 25.684616088867188, "learning_rate": 9.60544217687075e-06, "loss": 39.7602, "step": 5695 }, { "epoch": 135.62089552238805, "grad_norm": 22.026763916015625, "learning_rate": 9.60374149659864e-06, "loss": 40.2298, "step": 5696 }, { "epoch": 135.644776119403, "grad_norm": 15.483604431152344, "learning_rate": 9.60204081632653e-06, "loss": 39.8388, "step": 5697 }, { "epoch": 135.6686567164179, "grad_norm": 21.13356590270996, "learning_rate": 9.600340136054423e-06, "loss": 39.5239, "step": 5698 }, { "epoch": 135.69253731343284, "grad_norm": 17.695802688598633, "learning_rate": 9.598639455782313e-06, "loss": 40.336, "step": 5699 }, { "epoch": 135.71641791044777, "grad_norm": 16.947023391723633, "learning_rate": 9.596938775510205e-06, "loss": 39.7942, "step": 5700 }, { "epoch": 135.74029850746268, "grad_norm": 18.580827713012695, "learning_rate": 9.595238095238096e-06, "loss": 41.1081, "step": 5701 }, { "epoch": 135.76417910447762, "grad_norm": 19.310028076171875, "learning_rate": 9.593537414965988e-06, "loss": 38.773, "step": 5702 }, { "epoch": 135.78805970149253, "grad_norm": 17.71697235107422, "learning_rate": 9.591836734693878e-06, "loss": 41.1084, "step": 5703 }, { "epoch": 135.81194029850747, "grad_norm": 19.53215217590332, "learning_rate": 9.590136054421769e-06, "loss": 40.7152, "step": 5704 }, { "epoch": 135.83582089552237, "grad_norm": 26.050701141357422, "learning_rate": 9.58843537414966e-06, "loss": 41.2326, "step": 5705 }, { "epoch": 135.8597014925373, "grad_norm": 21.59418296813965, "learning_rate": 9.586734693877551e-06, "loss": 39.7008, "step": 5706 }, { "epoch": 135.88358208955225, "grad_norm": 17.44019889831543, "learning_rate": 9.585034013605443e-06, "loss": 40.0913, "step": 5707 }, { "epoch": 135.90746268656716, "grad_norm": 22.628219604492188, "learning_rate": 9.583333333333335e-06, "loss": 41.0258, "step": 5708 }, { "epoch": 135.9313432835821, "grad_norm": 18.4293155670166, "learning_rate": 9.581632653061226e-06, "loss": 39.4461, "step": 5709 }, { "epoch": 135.955223880597, "grad_norm": 17.186227798461914, "learning_rate": 9.579931972789116e-06, "loss": 38.9269, "step": 5710 }, { "epoch": 135.97910447761194, "grad_norm": 20.301193237304688, "learning_rate": 9.578231292517007e-06, "loss": 39.9266, "step": 5711 }, { "epoch": 136.0, "grad_norm": 18.15862464904785, "learning_rate": 9.576530612244899e-06, "loss": 35.705, "step": 5712 }, { "epoch": 136.02388059701494, "grad_norm": 17.27276611328125, "learning_rate": 9.57482993197279e-06, "loss": 39.9615, "step": 5713 }, { "epoch": 136.04776119402985, "grad_norm": 16.116933822631836, "learning_rate": 9.573129251700681e-06, "loss": 40.3388, "step": 5714 }, { "epoch": 136.07164179104478, "grad_norm": 14.106700897216797, "learning_rate": 9.571428571428573e-06, "loss": 39.8286, "step": 5715 }, { "epoch": 136.0955223880597, "grad_norm": 18.357019424438477, "learning_rate": 9.569727891156464e-06, "loss": 40.6918, "step": 5716 }, { "epoch": 136.11940298507463, "grad_norm": 16.41695213317871, "learning_rate": 9.568027210884354e-06, "loss": 40.1238, "step": 5717 }, { "epoch": 136.14328358208957, "grad_norm": 15.24857234954834, "learning_rate": 9.566326530612246e-06, "loss": 39.5314, "step": 5718 }, { "epoch": 136.16716417910447, "grad_norm": 21.097612380981445, "learning_rate": 9.564625850340137e-06, "loss": 39.4418, "step": 5719 }, { "epoch": 136.1910447761194, "grad_norm": 15.658564567565918, "learning_rate": 9.562925170068029e-06, "loss": 40.4354, "step": 5720 }, { "epoch": 136.21492537313432, "grad_norm": 18.364137649536133, "learning_rate": 9.561224489795919e-06, "loss": 39.4063, "step": 5721 }, { "epoch": 136.23880597014926, "grad_norm": 16.437915802001953, "learning_rate": 9.559523809523811e-06, "loss": 39.2412, "step": 5722 }, { "epoch": 136.26268656716417, "grad_norm": 18.161527633666992, "learning_rate": 9.557823129251701e-06, "loss": 40.1167, "step": 5723 }, { "epoch": 136.2865671641791, "grad_norm": 19.824352264404297, "learning_rate": 9.556122448979592e-06, "loss": 39.5653, "step": 5724 }, { "epoch": 136.31044776119404, "grad_norm": 16.736989974975586, "learning_rate": 9.554421768707484e-06, "loss": 39.4445, "step": 5725 }, { "epoch": 136.33432835820895, "grad_norm": NaN, "learning_rate": 9.552721088435374e-06, "loss": 40.8717, "step": 5726 }, { "epoch": 136.3582089552239, "grad_norm": 16.963516235351562, "learning_rate": 9.552721088435374e-06, "loss": 40.3213, "step": 5727 }, { "epoch": 136.3820895522388, "grad_norm": 18.735271453857422, "learning_rate": 9.551020408163266e-06, "loss": 40.8078, "step": 5728 }, { "epoch": 136.40597014925373, "grad_norm": 19.308032989501953, "learning_rate": 9.549319727891157e-06, "loss": 39.9691, "step": 5729 }, { "epoch": 136.42985074626867, "grad_norm": 14.293987274169922, "learning_rate": 9.547619047619049e-06, "loss": 39.0772, "step": 5730 }, { "epoch": 136.45373134328358, "grad_norm": 21.123519897460938, "learning_rate": 9.54591836734694e-06, "loss": 40.995, "step": 5731 }, { "epoch": 136.47761194029852, "grad_norm": 16.979511260986328, "learning_rate": 9.54421768707483e-06, "loss": 41.6872, "step": 5732 }, { "epoch": 136.50149253731342, "grad_norm": 20.042757034301758, "learning_rate": 9.542517006802722e-06, "loss": 40.1547, "step": 5733 }, { "epoch": 136.52537313432836, "grad_norm": 19.689138412475586, "learning_rate": 9.540816326530612e-06, "loss": 40.4422, "step": 5734 }, { "epoch": 136.54925373134327, "grad_norm": 19.830251693725586, "learning_rate": 9.539115646258504e-06, "loss": 38.6685, "step": 5735 }, { "epoch": 136.5731343283582, "grad_norm": 19.68994903564453, "learning_rate": 9.537414965986396e-06, "loss": 40.3769, "step": 5736 }, { "epoch": 136.59701492537314, "grad_norm": 19.520610809326172, "learning_rate": 9.535714285714287e-06, "loss": 39.8802, "step": 5737 }, { "epoch": 136.62089552238805, "grad_norm": 20.209075927734375, "learning_rate": 9.534013605442177e-06, "loss": 40.5337, "step": 5738 }, { "epoch": 136.644776119403, "grad_norm": 18.009183883666992, "learning_rate": 9.53231292517007e-06, "loss": 40.5237, "step": 5739 }, { "epoch": 136.6686567164179, "grad_norm": 17.618444442749023, "learning_rate": 9.53061224489796e-06, "loss": 39.4263, "step": 5740 }, { "epoch": 136.69253731343284, "grad_norm": 17.066255569458008, "learning_rate": 9.528911564625852e-06, "loss": 39.0451, "step": 5741 }, { "epoch": 136.71641791044777, "grad_norm": 16.11752700805664, "learning_rate": 9.527210884353742e-06, "loss": 40.5889, "step": 5742 }, { "epoch": 136.74029850746268, "grad_norm": 24.23548126220703, "learning_rate": 9.525510204081634e-06, "loss": 40.1915, "step": 5743 }, { "epoch": 136.76417910447762, "grad_norm": 17.77320671081543, "learning_rate": 9.523809523809525e-06, "loss": 40.4259, "step": 5744 }, { "epoch": 136.78805970149253, "grad_norm": 18.783700942993164, "learning_rate": 9.522108843537415e-06, "loss": 40.112, "step": 5745 }, { "epoch": 136.81194029850747, "grad_norm": 19.52975845336914, "learning_rate": 9.520408163265307e-06, "loss": 40.2694, "step": 5746 }, { "epoch": 136.83582089552237, "grad_norm": 22.467615127563477, "learning_rate": 9.518707482993198e-06, "loss": 41.4122, "step": 5747 }, { "epoch": 136.8597014925373, "grad_norm": 16.6851806640625, "learning_rate": 9.51700680272109e-06, "loss": 40.2696, "step": 5748 }, { "epoch": 136.88358208955225, "grad_norm": 19.272367477416992, "learning_rate": 9.51530612244898e-06, "loss": 40.4809, "step": 5749 }, { "epoch": 136.90746268656716, "grad_norm": 25.64748764038086, "learning_rate": 9.513605442176872e-06, "loss": 40.3818, "step": 5750 }, { "epoch": 136.9313432835821, "grad_norm": 17.339828491210938, "learning_rate": 9.511904761904763e-06, "loss": 40.3506, "step": 5751 }, { "epoch": 136.955223880597, "grad_norm": 23.17976188659668, "learning_rate": 9.510204081632653e-06, "loss": 39.2061, "step": 5752 }, { "epoch": 136.97910447761194, "grad_norm": 23.762033462524414, "learning_rate": 9.508503401360545e-06, "loss": 40.4341, "step": 5753 }, { "epoch": 137.0, "grad_norm": 14.065231323242188, "learning_rate": 9.506802721088436e-06, "loss": 35.9299, "step": 5754 }, { "epoch": 137.02388059701494, "grad_norm": 22.709367752075195, "learning_rate": 9.505102040816328e-06, "loss": 39.375, "step": 5755 }, { "epoch": 137.04776119402985, "grad_norm": 20.099899291992188, "learning_rate": 9.503401360544218e-06, "loss": 39.1875, "step": 5756 }, { "epoch": 137.07164179104478, "grad_norm": 16.251981735229492, "learning_rate": 9.50170068027211e-06, "loss": 39.7174, "step": 5757 }, { "epoch": 137.0955223880597, "grad_norm": 17.096813201904297, "learning_rate": 9.5e-06, "loss": 39.1641, "step": 5758 }, { "epoch": 137.11940298507463, "grad_norm": 21.969449996948242, "learning_rate": 9.498299319727891e-06, "loss": 39.6796, "step": 5759 }, { "epoch": 137.14328358208957, "grad_norm": 13.48315715789795, "learning_rate": 9.496598639455783e-06, "loss": 40.1312, "step": 5760 }, { "epoch": 137.16716417910447, "grad_norm": 15.142317771911621, "learning_rate": 9.494897959183675e-06, "loss": 39.0918, "step": 5761 }, { "epoch": 137.1910447761194, "grad_norm": 15.083260536193848, "learning_rate": 9.493197278911566e-06, "loss": 40.3378, "step": 5762 }, { "epoch": 137.21492537313432, "grad_norm": 16.5947208404541, "learning_rate": 9.491496598639458e-06, "loss": 40.0254, "step": 5763 }, { "epoch": 137.23880597014926, "grad_norm": 17.31525421142578, "learning_rate": 9.489795918367348e-06, "loss": 39.7925, "step": 5764 }, { "epoch": 137.26268656716417, "grad_norm": 13.33224105834961, "learning_rate": 9.488095238095238e-06, "loss": 39.1608, "step": 5765 }, { "epoch": 137.2865671641791, "grad_norm": 18.62505340576172, "learning_rate": 9.48639455782313e-06, "loss": 39.6955, "step": 5766 }, { "epoch": 137.31044776119404, "grad_norm": 20.526426315307617, "learning_rate": 9.484693877551021e-06, "loss": 40.8692, "step": 5767 }, { "epoch": 137.33432835820895, "grad_norm": 17.54509162902832, "learning_rate": 9.482993197278913e-06, "loss": 38.9902, "step": 5768 }, { "epoch": 137.3582089552239, "grad_norm": 13.5675048828125, "learning_rate": 9.481292517006803e-06, "loss": 40.2917, "step": 5769 }, { "epoch": 137.3820895522388, "grad_norm": 17.16435432434082, "learning_rate": 9.479591836734695e-06, "loss": 39.8777, "step": 5770 }, { "epoch": 137.40597014925373, "grad_norm": NaN, "learning_rate": 9.477891156462586e-06, "loss": 40.3914, "step": 5771 }, { "epoch": 137.42985074626867, "grad_norm": 18.361515045166016, "learning_rate": 9.477891156462586e-06, "loss": 42.1308, "step": 5772 }, { "epoch": 137.45373134328358, "grad_norm": 15.623734474182129, "learning_rate": 9.476190476190476e-06, "loss": 41.3761, "step": 5773 }, { "epoch": 137.47761194029852, "grad_norm": 16.020898818969727, "learning_rate": 9.474489795918368e-06, "loss": 40.9852, "step": 5774 }, { "epoch": 137.50149253731342, "grad_norm": 24.223079681396484, "learning_rate": 9.472789115646259e-06, "loss": 40.3601, "step": 5775 }, { "epoch": 137.52537313432836, "grad_norm": 16.226585388183594, "learning_rate": 9.471088435374151e-06, "loss": 39.25, "step": 5776 }, { "epoch": 137.54925373134327, "grad_norm": 14.546438217163086, "learning_rate": 9.469387755102041e-06, "loss": 41.5317, "step": 5777 }, { "epoch": 137.5731343283582, "grad_norm": 25.475976943969727, "learning_rate": 9.467687074829933e-06, "loss": 40.7448, "step": 5778 }, { "epoch": 137.59701492537314, "grad_norm": 21.050052642822266, "learning_rate": 9.465986394557824e-06, "loss": 39.494, "step": 5779 }, { "epoch": 137.62089552238805, "grad_norm": 14.88813591003418, "learning_rate": 9.464285714285714e-06, "loss": 40.073, "step": 5780 }, { "epoch": 137.644776119403, "grad_norm": 21.426273345947266, "learning_rate": 9.462585034013606e-06, "loss": 39.7685, "step": 5781 }, { "epoch": 137.6686567164179, "grad_norm": 21.00870704650879, "learning_rate": 9.460884353741497e-06, "loss": 39.4509, "step": 5782 }, { "epoch": 137.69253731343284, "grad_norm": 14.929703712463379, "learning_rate": 9.459183673469389e-06, "loss": 41.4336, "step": 5783 }, { "epoch": 137.71641791044777, "grad_norm": 13.802526473999023, "learning_rate": 9.457482993197281e-06, "loss": 40.6038, "step": 5784 }, { "epoch": 137.74029850746268, "grad_norm": 25.661685943603516, "learning_rate": 9.455782312925171e-06, "loss": 39.2058, "step": 5785 }, { "epoch": 137.76417910447762, "grad_norm": 17.845937728881836, "learning_rate": 9.454081632653062e-06, "loss": 41.1643, "step": 5786 }, { "epoch": 137.78805970149253, "grad_norm": 25.97015953063965, "learning_rate": 9.452380952380952e-06, "loss": 39.7219, "step": 5787 }, { "epoch": 137.81194029850747, "grad_norm": 17.875333786010742, "learning_rate": 9.450680272108844e-06, "loss": 39.7798, "step": 5788 }, { "epoch": 137.83582089552237, "grad_norm": 18.28219223022461, "learning_rate": 9.448979591836736e-06, "loss": 39.3965, "step": 5789 }, { "epoch": 137.8597014925373, "grad_norm": 19.815677642822266, "learning_rate": 9.447278911564627e-06, "loss": 40.5489, "step": 5790 }, { "epoch": 137.88358208955225, "grad_norm": 20.447330474853516, "learning_rate": 9.445578231292519e-06, "loss": 39.633, "step": 5791 }, { "epoch": 137.90746268656716, "grad_norm": 16.50349998474121, "learning_rate": 9.44387755102041e-06, "loss": 39.9416, "step": 5792 }, { "epoch": 137.9313432835821, "grad_norm": 22.41202735900879, "learning_rate": 9.4421768707483e-06, "loss": 41.0672, "step": 5793 }, { "epoch": 137.955223880597, "grad_norm": 17.746328353881836, "learning_rate": 9.440476190476192e-06, "loss": 39.8027, "step": 5794 }, { "epoch": 137.97910447761194, "grad_norm": 18.95381736755371, "learning_rate": 9.438775510204082e-06, "loss": 40.2487, "step": 5795 }, { "epoch": 138.0, "grad_norm": 14.501996994018555, "learning_rate": 9.437074829931974e-06, "loss": 35.2047, "step": 5796 }, { "epoch": 138.02388059701494, "grad_norm": 23.209070205688477, "learning_rate": 9.435374149659865e-06, "loss": 40.6912, "step": 5797 }, { "epoch": 138.04776119402985, "grad_norm": 19.782623291015625, "learning_rate": 9.433673469387757e-06, "loss": 40.41, "step": 5798 }, { "epoch": 138.07164179104478, "grad_norm": 18.50634002685547, "learning_rate": 9.431972789115647e-06, "loss": 40.1066, "step": 5799 }, { "epoch": 138.0955223880597, "grad_norm": 19.37914276123047, "learning_rate": 9.430272108843537e-06, "loss": 39.8479, "step": 5800 }, { "epoch": 138.11940298507463, "grad_norm": NaN, "learning_rate": 9.42857142857143e-06, "loss": 69.6561, "step": 5801 }, { "epoch": 138.14328358208957, "grad_norm": 20.234079360961914, "learning_rate": 9.42857142857143e-06, "loss": 40.7243, "step": 5802 }, { "epoch": 138.16716417910447, "grad_norm": 19.689483642578125, "learning_rate": 9.42687074829932e-06, "loss": 40.2885, "step": 5803 }, { "epoch": 138.1910447761194, "grad_norm": 15.311651229858398, "learning_rate": 9.425170068027212e-06, "loss": 40.0564, "step": 5804 }, { "epoch": 138.21492537313432, "grad_norm": 22.144147872924805, "learning_rate": 9.423469387755102e-06, "loss": 40.0064, "step": 5805 }, { "epoch": 138.23880597014926, "grad_norm": 19.106332778930664, "learning_rate": 9.421768707482995e-06, "loss": 38.9603, "step": 5806 }, { "epoch": 138.26268656716417, "grad_norm": 17.888164520263672, "learning_rate": 9.420068027210885e-06, "loss": 38.8051, "step": 5807 }, { "epoch": 138.2865671641791, "grad_norm": NaN, "learning_rate": 9.418367346938775e-06, "loss": 34.5894, "step": 5808 }, { "epoch": 138.31044776119404, "grad_norm": 16.295089721679688, "learning_rate": 9.418367346938775e-06, "loss": 39.8359, "step": 5809 }, { "epoch": 138.33432835820895, "grad_norm": 17.898618698120117, "learning_rate": 9.416666666666667e-06, "loss": 40.9419, "step": 5810 }, { "epoch": 138.3582089552239, "grad_norm": 15.549861907958984, "learning_rate": 9.414965986394558e-06, "loss": 38.8705, "step": 5811 }, { "epoch": 138.3820895522388, "grad_norm": 21.414033889770508, "learning_rate": 9.41326530612245e-06, "loss": 40.366, "step": 5812 }, { "epoch": 138.40597014925373, "grad_norm": 18.34477996826172, "learning_rate": 9.411564625850342e-06, "loss": 41.358, "step": 5813 }, { "epoch": 138.42985074626867, "grad_norm": 18.43037223815918, "learning_rate": 9.409863945578232e-06, "loss": 41.0623, "step": 5814 }, { "epoch": 138.45373134328358, "grad_norm": 22.278278350830078, "learning_rate": 9.408163265306123e-06, "loss": 39.3361, "step": 5815 }, { "epoch": 138.47761194029852, "grad_norm": NaN, "learning_rate": 9.406462585034015e-06, "loss": 39.6796, "step": 5816 }, { "epoch": 138.50149253731342, "grad_norm": 18.300764083862305, "learning_rate": 9.406462585034015e-06, "loss": 40.625, "step": 5817 }, { "epoch": 138.52537313432836, "grad_norm": 16.382335662841797, "learning_rate": 9.404761904761905e-06, "loss": 40.4843, "step": 5818 }, { "epoch": 138.54925373134327, "grad_norm": 20.629667282104492, "learning_rate": 9.403061224489797e-06, "loss": 39.1476, "step": 5819 }, { "epoch": 138.5731343283582, "grad_norm": 25.73557472229004, "learning_rate": 9.401360544217688e-06, "loss": 41.4348, "step": 5820 }, { "epoch": 138.59701492537314, "grad_norm": 15.648715019226074, "learning_rate": 9.39965986394558e-06, "loss": 40.4888, "step": 5821 }, { "epoch": 138.62089552238805, "grad_norm": 16.803377151489258, "learning_rate": 9.39795918367347e-06, "loss": 40.5578, "step": 5822 }, { "epoch": 138.644776119403, "grad_norm": 29.228322982788086, "learning_rate": 9.39625850340136e-06, "loss": 40.6632, "step": 5823 }, { "epoch": 138.6686567164179, "grad_norm": 15.427154541015625, "learning_rate": 9.394557823129253e-06, "loss": 39.1214, "step": 5824 }, { "epoch": 138.69253731343284, "grad_norm": 28.359830856323242, "learning_rate": 9.392857142857143e-06, "loss": 40.3437, "step": 5825 }, { "epoch": 138.71641791044777, "grad_norm": 18.356201171875, "learning_rate": 9.391156462585035e-06, "loss": 40.6458, "step": 5826 }, { "epoch": 138.74029850746268, "grad_norm": 19.94028091430664, "learning_rate": 9.389455782312926e-06, "loss": 39.7405, "step": 5827 }, { "epoch": 138.76417910447762, "grad_norm": 27.602651596069336, "learning_rate": 9.387755102040818e-06, "loss": 40.1602, "step": 5828 }, { "epoch": 138.78805970149253, "grad_norm": 16.400421142578125, "learning_rate": 9.386054421768708e-06, "loss": 38.7246, "step": 5829 }, { "epoch": 138.81194029850747, "grad_norm": 26.579187393188477, "learning_rate": 9.384353741496599e-06, "loss": 38.5539, "step": 5830 }, { "epoch": 138.83582089552237, "grad_norm": 21.284912109375, "learning_rate": 9.38265306122449e-06, "loss": 40.3989, "step": 5831 }, { "epoch": 138.8597014925373, "grad_norm": 23.772157669067383, "learning_rate": 9.380952380952381e-06, "loss": 39.4185, "step": 5832 }, { "epoch": 138.88358208955225, "grad_norm": 29.053791046142578, "learning_rate": 9.379251700680273e-06, "loss": 41.1253, "step": 5833 }, { "epoch": 138.90746268656716, "grad_norm": 16.733402252197266, "learning_rate": 9.377551020408164e-06, "loss": 40.1286, "step": 5834 }, { "epoch": 138.9313432835821, "grad_norm": 36.72946548461914, "learning_rate": 9.375850340136056e-06, "loss": 38.9963, "step": 5835 }, { "epoch": 138.955223880597, "grad_norm": 26.51390838623047, "learning_rate": 9.374149659863946e-06, "loss": 40.031, "step": 5836 }, { "epoch": 138.97910447761194, "grad_norm": 31.663070678710938, "learning_rate": 9.372448979591836e-06, "loss": 40.099, "step": 5837 }, { "epoch": 139.0, "grad_norm": 21.624252319335938, "learning_rate": 9.370748299319729e-06, "loss": 35.3932, "step": 5838 }, { "epoch": 139.02388059701494, "grad_norm": 33.74135971069336, "learning_rate": 9.36904761904762e-06, "loss": 40.1791, "step": 5839 }, { "epoch": 139.04776119402985, "grad_norm": 21.488868713378906, "learning_rate": 9.367346938775511e-06, "loss": 38.745, "step": 5840 }, { "epoch": 139.07164179104478, "grad_norm": 35.68408203125, "learning_rate": 9.365646258503403e-06, "loss": 40.4261, "step": 5841 }, { "epoch": 139.0955223880597, "grad_norm": 27.531938552856445, "learning_rate": 9.363945578231294e-06, "loss": 39.8502, "step": 5842 }, { "epoch": 139.11940298507463, "grad_norm": 41.84492874145508, "learning_rate": 9.362244897959184e-06, "loss": 40.0804, "step": 5843 }, { "epoch": 139.14328358208957, "grad_norm": 34.03583526611328, "learning_rate": 9.360544217687076e-06, "loss": 40.4201, "step": 5844 }, { "epoch": 139.16716417910447, "grad_norm": 27.924837112426758, "learning_rate": 9.358843537414966e-06, "loss": 40.006, "step": 5845 }, { "epoch": 139.1910447761194, "grad_norm": 29.39901351928711, "learning_rate": 9.357142857142859e-06, "loss": 39.8178, "step": 5846 }, { "epoch": 139.21492537313432, "grad_norm": 31.170534133911133, "learning_rate": 9.355442176870749e-06, "loss": 39.8539, "step": 5847 }, { "epoch": 139.23880597014926, "grad_norm": 24.115842819213867, "learning_rate": 9.353741496598641e-06, "loss": 39.5716, "step": 5848 }, { "epoch": 139.26268656716417, "grad_norm": 30.86311149597168, "learning_rate": 9.352040816326531e-06, "loss": 39.3648, "step": 5849 }, { "epoch": 139.2865671641791, "grad_norm": 23.781522750854492, "learning_rate": 9.350340136054422e-06, "loss": 39.8786, "step": 5850 }, { "epoch": 139.31044776119404, "grad_norm": 38.31922149658203, "learning_rate": 9.348639455782314e-06, "loss": 39.9693, "step": 5851 }, { "epoch": 139.33432835820895, "grad_norm": 26.093704223632812, "learning_rate": 9.346938775510204e-06, "loss": 41.1107, "step": 5852 }, { "epoch": 139.3582089552239, "grad_norm": 35.27671813964844, "learning_rate": 9.345238095238096e-06, "loss": 39.3532, "step": 5853 }, { "epoch": 139.3820895522388, "grad_norm": 28.302349090576172, "learning_rate": 9.343537414965987e-06, "loss": 40.0611, "step": 5854 }, { "epoch": 139.40597014925373, "grad_norm": 34.33140182495117, "learning_rate": 9.341836734693879e-06, "loss": 39.3752, "step": 5855 }, { "epoch": 139.42985074626867, "grad_norm": 30.547216415405273, "learning_rate": 9.34013605442177e-06, "loss": 41.2377, "step": 5856 }, { "epoch": 139.45373134328358, "grad_norm": 30.49541664123535, "learning_rate": 9.33843537414966e-06, "loss": 39.2959, "step": 5857 }, { "epoch": 139.47761194029852, "grad_norm": 25.64105796813965, "learning_rate": 9.336734693877552e-06, "loss": 41.0909, "step": 5858 }, { "epoch": 139.50149253731342, "grad_norm": NaN, "learning_rate": 9.335034013605442e-06, "loss": 60.6154, "step": 5859 }, { "epoch": 139.52537313432836, "grad_norm": 35.060829162597656, "learning_rate": 9.335034013605442e-06, "loss": 39.9802, "step": 5860 }, { "epoch": 139.54925373134327, "grad_norm": 27.54969024658203, "learning_rate": 9.333333333333334e-06, "loss": 39.3133, "step": 5861 }, { "epoch": 139.5731343283582, "grad_norm": 38.9669075012207, "learning_rate": 9.331632653061225e-06, "loss": 41.312, "step": 5862 }, { "epoch": 139.59701492537314, "grad_norm": 34.280067443847656, "learning_rate": 9.329931972789117e-06, "loss": 39.4449, "step": 5863 }, { "epoch": 139.62089552238805, "grad_norm": 29.73419189453125, "learning_rate": 9.328231292517007e-06, "loss": 40.2213, "step": 5864 }, { "epoch": 139.644776119403, "grad_norm": 24.6690673828125, "learning_rate": 9.326530612244898e-06, "loss": 40.6231, "step": 5865 }, { "epoch": 139.6686567164179, "grad_norm": 35.420101165771484, "learning_rate": 9.32482993197279e-06, "loss": 39.6659, "step": 5866 }, { "epoch": 139.69253731343284, "grad_norm": 32.64005661010742, "learning_rate": 9.323129251700682e-06, "loss": 39.3794, "step": 5867 }, { "epoch": 139.71641791044777, "grad_norm": 30.59541893005371, "learning_rate": 9.321428571428572e-06, "loss": 40.0863, "step": 5868 }, { "epoch": 139.74029850746268, "grad_norm": 28.199207305908203, "learning_rate": 9.319727891156464e-06, "loss": 39.7205, "step": 5869 }, { "epoch": 139.76417910447762, "grad_norm": 28.04796600341797, "learning_rate": 9.318027210884355e-06, "loss": 39.0113, "step": 5870 }, { "epoch": 139.78805970149253, "grad_norm": 24.012332916259766, "learning_rate": 9.316326530612245e-06, "loss": 40.5084, "step": 5871 }, { "epoch": 139.81194029850747, "grad_norm": 34.23363494873047, "learning_rate": 9.314625850340137e-06, "loss": 39.5083, "step": 5872 }, { "epoch": 139.83582089552237, "grad_norm": 29.06350326538086, "learning_rate": 9.312925170068028e-06, "loss": 39.3723, "step": 5873 }, { "epoch": 139.8597014925373, "grad_norm": 28.527681350708008, "learning_rate": 9.31122448979592e-06, "loss": 40.6419, "step": 5874 }, { "epoch": 139.88358208955225, "grad_norm": 29.12566375732422, "learning_rate": 9.30952380952381e-06, "loss": 40.6302, "step": 5875 }, { "epoch": 139.90746268656716, "grad_norm": 31.133377075195312, "learning_rate": 9.307823129251702e-06, "loss": 40.5931, "step": 5876 }, { "epoch": 139.9313432835821, "grad_norm": 24.83881378173828, "learning_rate": 9.306122448979593e-06, "loss": 39.411, "step": 5877 }, { "epoch": 139.955223880597, "grad_norm": 31.597652435302734, "learning_rate": 9.304421768707483e-06, "loss": 39.0108, "step": 5878 }, { "epoch": 139.97910447761194, "grad_norm": 29.325529098510742, "learning_rate": 9.302721088435375e-06, "loss": 39.4128, "step": 5879 }, { "epoch": 140.0, "grad_norm": 25.26357078552246, "learning_rate": 9.301020408163265e-06, "loss": 35.0399, "step": 5880 }, { "epoch": 140.0, "step": 5880, "total_flos": 2.890707963725509e+17, "train_loss": 2.877911633861308, "train_runtime": 12847.333, "train_samples_per_second": 58.322, "train_steps_per_second": 0.458 }, { "epoch": 140.02388059701494, "grad_norm": 2.1956427097320557, "learning_rate": 1e-05, "loss": 39.3237, "step": 5881 }, { "epoch": 140.04776119402985, "grad_norm": 2.145946502685547, "learning_rate": 9.998511904761904e-06, "loss": 39.4125, "step": 5882 }, { "epoch": 140.07164179104478, "grad_norm": 2.0076515674591064, "learning_rate": 9.99702380952381e-06, "loss": 39.6506, "step": 5883 }, { "epoch": 140.0955223880597, "grad_norm": 1.7847540378570557, "learning_rate": 9.995535714285715e-06, "loss": 38.8137, "step": 5884 }, { "epoch": 140.11940298507463, "grad_norm": 1.6407201290130615, "learning_rate": 9.99404761904762e-06, "loss": 38.0156, "step": 5885 }, { "epoch": 140.14328358208957, "grad_norm": 1.5394906997680664, "learning_rate": 9.992559523809524e-06, "loss": 39.1583, "step": 5886 }, { "epoch": 140.16716417910447, "grad_norm": 1.445509672164917, "learning_rate": 9.99107142857143e-06, "loss": 39.4676, "step": 5887 }, { "epoch": 140.1910447761194, "grad_norm": 1.413928508758545, "learning_rate": 9.989583333333333e-06, "loss": 39.4213, "step": 5888 }, { "epoch": 140.21492537313432, "grad_norm": 1.4625073671340942, "learning_rate": 9.988095238095239e-06, "loss": 39.9539, "step": 5889 }, { "epoch": 140.23880597014926, "grad_norm": 1.5224840641021729, "learning_rate": 9.986607142857142e-06, "loss": 41.0345, "step": 5890 }, { "epoch": 140.26268656716417, "grad_norm": 1.5515761375427246, "learning_rate": 9.985119047619048e-06, "loss": 39.4485, "step": 5891 }, { "epoch": 140.2865671641791, "grad_norm": 1.4581992626190186, "learning_rate": 9.983630952380953e-06, "loss": 40.1383, "step": 5892 }, { "epoch": 140.31044776119404, "grad_norm": 1.493991732597351, "learning_rate": 9.982142857142858e-06, "loss": 39.6906, "step": 5893 }, { "epoch": 140.33432835820895, "grad_norm": 1.5185251235961914, "learning_rate": 9.980654761904762e-06, "loss": 39.6373, "step": 5894 }, { "epoch": 140.3582089552239, "grad_norm": 1.532470703125, "learning_rate": 9.979166666666668e-06, "loss": 40.1425, "step": 5895 }, { "epoch": 140.3820895522388, "grad_norm": 1.42241370677948, "learning_rate": 9.977678571428571e-06, "loss": 40.8013, "step": 5896 }, { "epoch": 140.40597014925373, "grad_norm": 1.435707449913025, "learning_rate": 9.976190476190477e-06, "loss": 39.1784, "step": 5897 }, { "epoch": 140.42985074626867, "grad_norm": 1.6245596408843994, "learning_rate": 9.974702380952382e-06, "loss": 39.457, "step": 5898 }, { "epoch": 140.45373134328358, "grad_norm": 1.447860836982727, "learning_rate": 9.973214285714287e-06, "loss": 40.8529, "step": 5899 }, { "epoch": 140.47761194029852, "grad_norm": 1.4031907320022583, "learning_rate": 9.971726190476191e-06, "loss": 40.7314, "step": 5900 }, { "epoch": 140.50149253731342, "grad_norm": 1.374687671661377, "learning_rate": 9.970238095238096e-06, "loss": 40.2643, "step": 5901 }, { "epoch": 140.52537313432836, "grad_norm": 1.4455846548080444, "learning_rate": 9.96875e-06, "loss": 40.0036, "step": 5902 }, { "epoch": 140.54925373134327, "grad_norm": 1.3764142990112305, "learning_rate": 9.967261904761905e-06, "loss": 40.4805, "step": 5903 }, { "epoch": 140.5731343283582, "grad_norm": 1.375475525856018, "learning_rate": 9.965773809523809e-06, "loss": 39.891, "step": 5904 }, { "epoch": 140.59701492537314, "grad_norm": 1.4794659614562988, "learning_rate": 9.964285714285714e-06, "loss": 40.6726, "step": 5905 }, { "epoch": 140.62089552238805, "grad_norm": 1.5239068269729614, "learning_rate": 9.96279761904762e-06, "loss": 40.6068, "step": 5906 }, { "epoch": 140.644776119403, "grad_norm": 1.4253668785095215, "learning_rate": 9.961309523809525e-06, "loss": 40.7579, "step": 5907 }, { "epoch": 140.6686567164179, "grad_norm": 1.3987441062927246, "learning_rate": 9.959821428571429e-06, "loss": 40.453, "step": 5908 }, { "epoch": 140.69253731343284, "grad_norm": 1.4262009859085083, "learning_rate": 9.958333333333334e-06, "loss": 40.6588, "step": 5909 }, { "epoch": 140.71641791044777, "grad_norm": 1.409367322921753, "learning_rate": 9.956845238095238e-06, "loss": 39.33, "step": 5910 }, { "epoch": 140.74029850746268, "grad_norm": 1.4272910356521606, "learning_rate": 9.955357142857143e-06, "loss": 39.3937, "step": 5911 }, { "epoch": 140.76417910447762, "grad_norm": 1.4336936473846436, "learning_rate": 9.953869047619049e-06, "loss": 40.6611, "step": 5912 }, { "epoch": 140.78805970149253, "grad_norm": 1.361899971961975, "learning_rate": 9.952380952380954e-06, "loss": 39.887, "step": 5913 }, { "epoch": 140.81194029850747, "grad_norm": 1.3805387020111084, "learning_rate": 9.950892857142858e-06, "loss": 39.9583, "step": 5914 }, { "epoch": 140.83582089552237, "grad_norm": 1.4234700202941895, "learning_rate": 9.949404761904763e-06, "loss": 39.7577, "step": 5915 }, { "epoch": 140.8597014925373, "grad_norm": 1.4284934997558594, "learning_rate": 9.947916666666667e-06, "loss": 39.5982, "step": 5916 }, { "epoch": 140.88358208955225, "grad_norm": 1.4097250699996948, "learning_rate": 9.946428571428572e-06, "loss": 40.799, "step": 5917 }, { "epoch": 140.90746268656716, "grad_norm": 1.4488868713378906, "learning_rate": 9.944940476190476e-06, "loss": 39.4747, "step": 5918 }, { "epoch": 140.9313432835821, "grad_norm": 1.3826762437820435, "learning_rate": 9.943452380952381e-06, "loss": 38.6073, "step": 5919 }, { "epoch": 140.955223880597, "grad_norm": 1.4141124486923218, "learning_rate": 9.941964285714287e-06, "loss": 39.8121, "step": 5920 }, { "epoch": 140.97910447761194, "grad_norm": 1.4182112216949463, "learning_rate": 9.940476190476192e-06, "loss": 39.3799, "step": 5921 }, { "epoch": 141.0, "grad_norm": NaN, "learning_rate": 9.938988095238096e-06, "loss": 45.2733, "step": 5922 }, { "epoch": 141.02388059701494, "grad_norm": 1.3648548126220703, "learning_rate": 9.938988095238096e-06, "loss": 39.5509, "step": 5923 }, { "epoch": 141.04776119402985, "grad_norm": 1.3872442245483398, "learning_rate": 9.937500000000001e-06, "loss": 39.3372, "step": 5924 }, { "epoch": 141.07164179104478, "grad_norm": 1.3625893592834473, "learning_rate": 9.936011904761905e-06, "loss": 39.1771, "step": 5925 }, { "epoch": 141.0955223880597, "grad_norm": 1.4325307607650757, "learning_rate": 9.93452380952381e-06, "loss": 38.8989, "step": 5926 }, { "epoch": 141.11940298507463, "grad_norm": 1.4066414833068848, "learning_rate": 9.933035714285715e-06, "loss": 38.9658, "step": 5927 }, { "epoch": 141.14328358208957, "grad_norm": 1.4074149131774902, "learning_rate": 9.93154761904762e-06, "loss": 39.6492, "step": 5928 }, { "epoch": 141.16716417910447, "grad_norm": 1.390856146812439, "learning_rate": 9.930059523809524e-06, "loss": 40.8488, "step": 5929 }, { "epoch": 141.1910447761194, "grad_norm": 1.3865633010864258, "learning_rate": 9.92857142857143e-06, "loss": 40.0843, "step": 5930 }, { "epoch": 141.21492537313432, "grad_norm": 1.3783724308013916, "learning_rate": 9.927083333333334e-06, "loss": 40.0549, "step": 5931 }, { "epoch": 141.23880597014926, "grad_norm": 1.419382929801941, "learning_rate": 9.925595238095239e-06, "loss": 40.1145, "step": 5932 }, { "epoch": 141.26268656716417, "grad_norm": 1.379937767982483, "learning_rate": 9.924107142857143e-06, "loss": 40.5286, "step": 5933 }, { "epoch": 141.2865671641791, "grad_norm": 1.3764898777008057, "learning_rate": 9.922619047619048e-06, "loss": 41.1253, "step": 5934 }, { "epoch": 141.31044776119404, "grad_norm": 1.3856515884399414, "learning_rate": 9.921130952380953e-06, "loss": 40.3682, "step": 5935 }, { "epoch": 141.33432835820895, "grad_norm": 1.4042420387268066, "learning_rate": 9.919642857142859e-06, "loss": 40.1638, "step": 5936 }, { "epoch": 141.3582089552239, "grad_norm": 1.4129002094268799, "learning_rate": 9.918154761904762e-06, "loss": 39.9503, "step": 5937 }, { "epoch": 141.3820895522388, "grad_norm": 1.4068022966384888, "learning_rate": 9.916666666666668e-06, "loss": 40.203, "step": 5938 }, { "epoch": 141.40597014925373, "grad_norm": 1.3902758359909058, "learning_rate": 9.915178571428571e-06, "loss": 39.2127, "step": 5939 }, { "epoch": 141.42985074626867, "grad_norm": 1.3683074712753296, "learning_rate": 9.913690476190477e-06, "loss": 40.1236, "step": 5940 }, { "epoch": 141.45373134328358, "grad_norm": 1.3739783763885498, "learning_rate": 9.912202380952382e-06, "loss": 39.7844, "step": 5941 }, { "epoch": 141.47761194029852, "grad_norm": 1.3771177530288696, "learning_rate": 9.910714285714288e-06, "loss": 39.3311, "step": 5942 }, { "epoch": 141.50149253731342, "grad_norm": 1.3981363773345947, "learning_rate": 9.909226190476191e-06, "loss": 39.2049, "step": 5943 }, { "epoch": 141.52537313432836, "grad_norm": 1.350259780883789, "learning_rate": 9.907738095238097e-06, "loss": 40.6457, "step": 5944 }, { "epoch": 141.54925373134327, "grad_norm": 2.0622353553771973, "learning_rate": 9.90625e-06, "loss": 39.7696, "step": 5945 }, { "epoch": 141.5731343283582, "grad_norm": 1.3534224033355713, "learning_rate": 9.904761904761906e-06, "loss": 40.4064, "step": 5946 }, { "epoch": 141.59701492537314, "grad_norm": 1.4330462217330933, "learning_rate": 9.90327380952381e-06, "loss": 39.4263, "step": 5947 }, { "epoch": 141.62089552238805, "grad_norm": 1.4924534559249878, "learning_rate": 9.901785714285715e-06, "loss": 39.6182, "step": 5948 }, { "epoch": 141.644776119403, "grad_norm": 1.3821979761123657, "learning_rate": 9.90029761904762e-06, "loss": 40.2637, "step": 5949 }, { "epoch": 141.6686567164179, "grad_norm": 1.3730716705322266, "learning_rate": 9.898809523809525e-06, "loss": 39.4017, "step": 5950 }, { "epoch": 141.69253731343284, "grad_norm": 1.3731303215026855, "learning_rate": 9.897321428571429e-06, "loss": 39.1627, "step": 5951 }, { "epoch": 141.71641791044777, "grad_norm": 1.3594568967819214, "learning_rate": 9.895833333333334e-06, "loss": 39.5755, "step": 5952 }, { "epoch": 141.74029850746268, "grad_norm": 1.4027025699615479, "learning_rate": 9.894345238095238e-06, "loss": 38.8803, "step": 5953 }, { "epoch": 141.76417910447762, "grad_norm": 1.40875244140625, "learning_rate": 9.892857142857143e-06, "loss": 39.6903, "step": 5954 }, { "epoch": 141.78805970149253, "grad_norm": 1.3927994966506958, "learning_rate": 9.891369047619049e-06, "loss": 40.8048, "step": 5955 }, { "epoch": 141.81194029850747, "grad_norm": 1.4039700031280518, "learning_rate": 9.889880952380954e-06, "loss": 39.6393, "step": 5956 }, { "epoch": 141.83582089552237, "grad_norm": 1.3763232231140137, "learning_rate": 9.888392857142858e-06, "loss": 39.9408, "step": 5957 }, { "epoch": 141.8597014925373, "grad_norm": 1.404998779296875, "learning_rate": 9.886904761904763e-06, "loss": 39.1475, "step": 5958 }, { "epoch": 141.88358208955225, "grad_norm": 1.4189761877059937, "learning_rate": 9.885416666666667e-06, "loss": 40.6848, "step": 5959 }, { "epoch": 141.90746268656716, "grad_norm": 1.4213534593582153, "learning_rate": 9.883928571428572e-06, "loss": 39.2496, "step": 5960 }, { "epoch": 141.9313432835821, "grad_norm": 1.3752295970916748, "learning_rate": 9.882440476190478e-06, "loss": 40.6287, "step": 5961 }, { "epoch": 141.955223880597, "grad_norm": 1.5009838342666626, "learning_rate": 9.880952380952381e-06, "loss": 40.0882, "step": 5962 }, { "epoch": 141.97910447761194, "grad_norm": 1.3895057439804077, "learning_rate": 9.879464285714287e-06, "loss": 40.1355, "step": 5963 }, { "epoch": 142.0, "grad_norm": 1.4038840532302856, "learning_rate": 9.877976190476192e-06, "loss": 34.5323, "step": 5964 }, { "epoch": 142.02388059701494, "grad_norm": 1.3578976392745972, "learning_rate": 9.876488095238096e-06, "loss": 40.7048, "step": 5965 }, { "epoch": 142.04776119402985, "grad_norm": 1.4314500093460083, "learning_rate": 9.875000000000001e-06, "loss": 39.6964, "step": 5966 }, { "epoch": 142.07164179104478, "grad_norm": 1.377135157585144, "learning_rate": 9.873511904761905e-06, "loss": 39.9149, "step": 5967 }, { "epoch": 142.0955223880597, "grad_norm": 1.34178626537323, "learning_rate": 9.87202380952381e-06, "loss": 39.8173, "step": 5968 }, { "epoch": 142.11940298507463, "grad_norm": 1.3858922719955444, "learning_rate": 9.870535714285716e-06, "loss": 39.7895, "step": 5969 }, { "epoch": 142.14328358208957, "grad_norm": 1.4754875898361206, "learning_rate": 9.869047619047621e-06, "loss": 39.0416, "step": 5970 }, { "epoch": 142.16716417910447, "grad_norm": 1.3947091102600098, "learning_rate": 9.867559523809525e-06, "loss": 39.6741, "step": 5971 }, { "epoch": 142.1910447761194, "grad_norm": 1.4490790367126465, "learning_rate": 9.86607142857143e-06, "loss": 39.5923, "step": 5972 }, { "epoch": 142.21492537313432, "grad_norm": 1.3713910579681396, "learning_rate": 9.864583333333334e-06, "loss": 40.3603, "step": 5973 }, { "epoch": 142.23880597014926, "grad_norm": 1.4266058206558228, "learning_rate": 9.863095238095239e-06, "loss": 40.1188, "step": 5974 }, { "epoch": 142.26268656716417, "grad_norm": 1.3901604413986206, "learning_rate": 9.861607142857144e-06, "loss": 38.5814, "step": 5975 }, { "epoch": 142.2865671641791, "grad_norm": 1.3343604803085327, "learning_rate": 9.860119047619048e-06, "loss": 39.6169, "step": 5976 }, { "epoch": 142.31044776119404, "grad_norm": 1.4000598192214966, "learning_rate": 9.858630952380953e-06, "loss": 39.2677, "step": 5977 }, { "epoch": 142.33432835820895, "grad_norm": 1.360597848892212, "learning_rate": 9.857142857142859e-06, "loss": 41.1602, "step": 5978 }, { "epoch": 142.3582089552239, "grad_norm": 1.3995593786239624, "learning_rate": 9.855654761904763e-06, "loss": 40.3632, "step": 5979 }, { "epoch": 142.3820895522388, "grad_norm": 1.396146297454834, "learning_rate": 9.854166666666668e-06, "loss": 38.5094, "step": 5980 }, { "epoch": 142.40597014925373, "grad_norm": 1.3890860080718994, "learning_rate": 9.852678571428572e-06, "loss": 40.0796, "step": 5981 }, { "epoch": 142.42985074626867, "grad_norm": 1.4069410562515259, "learning_rate": 9.851190476190477e-06, "loss": 40.1833, "step": 5982 }, { "epoch": 142.45373134328358, "grad_norm": 1.380134105682373, "learning_rate": 9.849702380952382e-06, "loss": 39.3675, "step": 5983 }, { "epoch": 142.47761194029852, "grad_norm": 1.3769725561141968, "learning_rate": 9.848214285714288e-06, "loss": 40.0246, "step": 5984 }, { "epoch": 142.50149253731342, "grad_norm": 1.4535309076309204, "learning_rate": 9.846726190476191e-06, "loss": 40.4016, "step": 5985 }, { "epoch": 142.52537313432836, "grad_norm": 1.3841209411621094, "learning_rate": 9.845238095238097e-06, "loss": 39.7619, "step": 5986 }, { "epoch": 142.54925373134327, "grad_norm": 1.4177577495574951, "learning_rate": 9.84375e-06, "loss": 41.2222, "step": 5987 }, { "epoch": 142.5731343283582, "grad_norm": 1.4472057819366455, "learning_rate": 9.842261904761906e-06, "loss": 39.2836, "step": 5988 }, { "epoch": 142.59701492537314, "grad_norm": 1.3923413753509521, "learning_rate": 9.840773809523811e-06, "loss": 39.2379, "step": 5989 }, { "epoch": 142.62089552238805, "grad_norm": 1.3854180574417114, "learning_rate": 9.839285714285715e-06, "loss": 39.7138, "step": 5990 }, { "epoch": 142.644776119403, "grad_norm": 1.420424222946167, "learning_rate": 9.83779761904762e-06, "loss": 39.9484, "step": 5991 }, { "epoch": 142.6686567164179, "grad_norm": 1.3684422969818115, "learning_rate": 9.836309523809524e-06, "loss": 40.5253, "step": 5992 }, { "epoch": 142.69253731343284, "grad_norm": 1.3718334436416626, "learning_rate": 9.83482142857143e-06, "loss": 39.6221, "step": 5993 }, { "epoch": 142.71641791044777, "grad_norm": 1.4342639446258545, "learning_rate": 9.833333333333333e-06, "loss": 38.8138, "step": 5994 }, { "epoch": 142.74029850746268, "grad_norm": 1.4213155508041382, "learning_rate": 9.831845238095238e-06, "loss": 39.4317, "step": 5995 }, { "epoch": 142.76417910447762, "grad_norm": 1.3667598962783813, "learning_rate": 9.830357142857144e-06, "loss": 40.657, "step": 5996 }, { "epoch": 142.78805970149253, "grad_norm": 1.3903329372406006, "learning_rate": 9.828869047619049e-06, "loss": 41.2072, "step": 5997 }, { "epoch": 142.81194029850747, "grad_norm": 1.4049357175827026, "learning_rate": 9.827380952380953e-06, "loss": 40.4166, "step": 5998 }, { "epoch": 142.83582089552237, "grad_norm": 1.3844646215438843, "learning_rate": 9.825892857142858e-06, "loss": 39.295, "step": 5999 }, { "epoch": 142.8597014925373, "grad_norm": 1.4348889589309692, "learning_rate": 9.824404761904762e-06, "loss": 39.5918, "step": 6000 }, { "epoch": 142.88358208955225, "grad_norm": 1.4351601600646973, "learning_rate": 9.822916666666667e-06, "loss": 40.0885, "step": 6001 }, { "epoch": 142.90746268656716, "grad_norm": 1.3810704946517944, "learning_rate": 9.821428571428573e-06, "loss": 38.9639, "step": 6002 }, { "epoch": 142.9313432835821, "grad_norm": 1.3953403234481812, "learning_rate": 9.819940476190478e-06, "loss": 39.3367, "step": 6003 }, { "epoch": 142.955223880597, "grad_norm": 1.443451166152954, "learning_rate": 9.818452380952382e-06, "loss": 39.9607, "step": 6004 }, { "epoch": 142.97910447761194, "grad_norm": 1.379336953163147, "learning_rate": 9.816964285714287e-06, "loss": 38.9749, "step": 6005 }, { "epoch": 143.0, "grad_norm": 1.3852283954620361, "learning_rate": 9.81547619047619e-06, "loss": 35.8944, "step": 6006 }, { "epoch": 143.02388059701494, "grad_norm": 1.3471453189849854, "learning_rate": 9.813988095238096e-06, "loss": 40.7985, "step": 6007 }, { "epoch": 143.04776119402985, "grad_norm": 1.4004846811294556, "learning_rate": 9.8125e-06, "loss": 39.6182, "step": 6008 }, { "epoch": 143.07164179104478, "grad_norm": 1.3630460500717163, "learning_rate": 9.811011904761905e-06, "loss": 40.6632, "step": 6009 }, { "epoch": 143.0955223880597, "grad_norm": 1.3970879316329956, "learning_rate": 9.80952380952381e-06, "loss": 40.6313, "step": 6010 }, { "epoch": 143.11940298507463, "grad_norm": 1.3826817274093628, "learning_rate": 9.808035714285716e-06, "loss": 39.7652, "step": 6011 }, { "epoch": 143.14328358208957, "grad_norm": 1.3927830457687378, "learning_rate": 9.80654761904762e-06, "loss": 39.6597, "step": 6012 }, { "epoch": 143.16716417910447, "grad_norm": 1.3665093183517456, "learning_rate": 9.805059523809525e-06, "loss": 38.9194, "step": 6013 }, { "epoch": 143.1910447761194, "grad_norm": 1.407300591468811, "learning_rate": 9.803571428571428e-06, "loss": 37.8587, "step": 6014 }, { "epoch": 143.21492537313432, "grad_norm": 1.41067636013031, "learning_rate": 9.802083333333334e-06, "loss": 39.7461, "step": 6015 }, { "epoch": 143.23880597014926, "grad_norm": 1.4113224744796753, "learning_rate": 9.80059523809524e-06, "loss": 40.6357, "step": 6016 }, { "epoch": 143.26268656716417, "grad_norm": 1.4048856496810913, "learning_rate": 9.799107142857145e-06, "loss": 39.9219, "step": 6017 }, { "epoch": 143.2865671641791, "grad_norm": 1.4577566385269165, "learning_rate": 9.797619047619048e-06, "loss": 40.3134, "step": 6018 }, { "epoch": 143.31044776119404, "grad_norm": 1.417279839515686, "learning_rate": 9.796130952380954e-06, "loss": 39.6904, "step": 6019 }, { "epoch": 143.33432835820895, "grad_norm": 1.3892061710357666, "learning_rate": 9.794642857142857e-06, "loss": 39.7603, "step": 6020 }, { "epoch": 143.3582089552239, "grad_norm": 1.436864972114563, "learning_rate": 9.793154761904763e-06, "loss": 41.2942, "step": 6021 }, { "epoch": 143.3820895522388, "grad_norm": 1.7995113134384155, "learning_rate": 9.791666666666666e-06, "loss": 40.7254, "step": 6022 }, { "epoch": 143.40597014925373, "grad_norm": 1.380489706993103, "learning_rate": 9.790178571428572e-06, "loss": 39.5229, "step": 6023 }, { "epoch": 143.42985074626867, "grad_norm": 1.3832708597183228, "learning_rate": 9.788690476190477e-06, "loss": 38.813, "step": 6024 }, { "epoch": 143.45373134328358, "grad_norm": 1.3795461654663086, "learning_rate": 9.787202380952382e-06, "loss": 38.8605, "step": 6025 }, { "epoch": 143.47761194029852, "grad_norm": 1.382491946220398, "learning_rate": 9.785714285714286e-06, "loss": 40.9531, "step": 6026 }, { "epoch": 143.50149253731342, "grad_norm": 1.49003267288208, "learning_rate": 9.784226190476192e-06, "loss": 40.1792, "step": 6027 }, { "epoch": 143.52537313432836, "grad_norm": 1.3664973974227905, "learning_rate": 9.782738095238095e-06, "loss": 39.9905, "step": 6028 }, { "epoch": 143.54925373134327, "grad_norm": 1.3992332220077515, "learning_rate": 9.78125e-06, "loss": 39.5054, "step": 6029 }, { "epoch": 143.5731343283582, "grad_norm": 1.4198120832443237, "learning_rate": 9.779761904761906e-06, "loss": 39.0915, "step": 6030 }, { "epoch": 143.59701492537314, "grad_norm": 1.4307273626327515, "learning_rate": 9.778273809523811e-06, "loss": 39.2144, "step": 6031 }, { "epoch": 143.62089552238805, "grad_norm": 1.3632030487060547, "learning_rate": 9.776785714285715e-06, "loss": 40.7892, "step": 6032 }, { "epoch": 143.644776119403, "grad_norm": 1.4021145105361938, "learning_rate": 9.77529761904762e-06, "loss": 39.6195, "step": 6033 }, { "epoch": 143.6686567164179, "grad_norm": 1.4163109064102173, "learning_rate": 9.773809523809524e-06, "loss": 40.103, "step": 6034 }, { "epoch": 143.69253731343284, "grad_norm": 1.4031790494918823, "learning_rate": 9.77232142857143e-06, "loss": 40.0806, "step": 6035 }, { "epoch": 143.71641791044777, "grad_norm": 1.5377390384674072, "learning_rate": 9.770833333333333e-06, "loss": 40.9214, "step": 6036 }, { "epoch": 143.74029850746268, "grad_norm": 1.3926348686218262, "learning_rate": 9.769345238095238e-06, "loss": 40.3404, "step": 6037 }, { "epoch": 143.76417910447762, "grad_norm": 1.435250163078308, "learning_rate": 9.767857142857144e-06, "loss": 40.8794, "step": 6038 }, { "epoch": 143.78805970149253, "grad_norm": 1.4155412912368774, "learning_rate": 9.76636904761905e-06, "loss": 39.3568, "step": 6039 }, { "epoch": 143.81194029850747, "grad_norm": 1.3966821432113647, "learning_rate": 9.764880952380953e-06, "loss": 39.6443, "step": 6040 }, { "epoch": 143.83582089552237, "grad_norm": 1.3798326253890991, "learning_rate": 9.763392857142858e-06, "loss": 38.8876, "step": 6041 }, { "epoch": 143.8597014925373, "grad_norm": 1.4059666395187378, "learning_rate": 9.761904761904762e-06, "loss": 38.3494, "step": 6042 }, { "epoch": 143.88358208955225, "grad_norm": 1.4073784351348877, "learning_rate": 9.760416666666667e-06, "loss": 40.2028, "step": 6043 }, { "epoch": 143.90746268656716, "grad_norm": 1.44059157371521, "learning_rate": 9.758928571428573e-06, "loss": 38.6974, "step": 6044 }, { "epoch": 143.9313432835821, "grad_norm": 1.4106340408325195, "learning_rate": 9.757440476190478e-06, "loss": 40.9646, "step": 6045 }, { "epoch": 143.955223880597, "grad_norm": 1.354474425315857, "learning_rate": 9.755952380952382e-06, "loss": 37.9019, "step": 6046 }, { "epoch": 143.97910447761194, "grad_norm": 1.3941643238067627, "learning_rate": 9.754464285714287e-06, "loss": 39.7041, "step": 6047 }, { "epoch": 144.0, "grad_norm": 1.4012821912765503, "learning_rate": 9.75297619047619e-06, "loss": 35.0156, "step": 6048 }, { "epoch": 144.02388059701494, "grad_norm": 1.4515129327774048, "learning_rate": 9.751488095238096e-06, "loss": 39.1394, "step": 6049 }, { "epoch": 144.04776119402985, "grad_norm": 1.4204045534133911, "learning_rate": 9.75e-06, "loss": 40.4347, "step": 6050 }, { "epoch": 144.07164179104478, "grad_norm": 1.4348249435424805, "learning_rate": 9.748511904761905e-06, "loss": 39.606, "step": 6051 }, { "epoch": 144.0955223880597, "grad_norm": 1.403592586517334, "learning_rate": 9.74702380952381e-06, "loss": 40.8889, "step": 6052 }, { "epoch": 144.11940298507463, "grad_norm": 1.4368479251861572, "learning_rate": 9.745535714285716e-06, "loss": 37.6744, "step": 6053 }, { "epoch": 144.14328358208957, "grad_norm": 1.3880430459976196, "learning_rate": 9.74404761904762e-06, "loss": 40.532, "step": 6054 }, { "epoch": 144.16716417910447, "grad_norm": 1.411418080329895, "learning_rate": 9.742559523809525e-06, "loss": 39.8998, "step": 6055 }, { "epoch": 144.1910447761194, "grad_norm": 1.4041367769241333, "learning_rate": 9.741071428571429e-06, "loss": 40.0883, "step": 6056 }, { "epoch": 144.21492537313432, "grad_norm": 1.4015413522720337, "learning_rate": 9.739583333333334e-06, "loss": 39.9906, "step": 6057 }, { "epoch": 144.23880597014926, "grad_norm": 1.4331082105636597, "learning_rate": 9.73809523809524e-06, "loss": 40.5134, "step": 6058 }, { "epoch": 144.26268656716417, "grad_norm": 1.4271198511123657, "learning_rate": 9.736607142857145e-06, "loss": 39.1911, "step": 6059 }, { "epoch": 144.2865671641791, "grad_norm": 1.3693294525146484, "learning_rate": 9.735119047619048e-06, "loss": 40.7628, "step": 6060 }, { "epoch": 144.31044776119404, "grad_norm": 1.398993730545044, "learning_rate": 9.733630952380954e-06, "loss": 40.4236, "step": 6061 }, { "epoch": 144.33432835820895, "grad_norm": 1.4403541088104248, "learning_rate": 9.732142857142858e-06, "loss": 38.5328, "step": 6062 }, { "epoch": 144.3582089552239, "grad_norm": 1.3752585649490356, "learning_rate": 9.730654761904763e-06, "loss": 40.1457, "step": 6063 }, { "epoch": 144.3820895522388, "grad_norm": 1.3909778594970703, "learning_rate": 9.729166666666667e-06, "loss": 38.3859, "step": 6064 }, { "epoch": 144.40597014925373, "grad_norm": 1.38352632522583, "learning_rate": 9.727678571428572e-06, "loss": 39.4806, "step": 6065 }, { "epoch": 144.42985074626867, "grad_norm": 1.4205783605575562, "learning_rate": 9.726190476190477e-06, "loss": 40.7107, "step": 6066 }, { "epoch": 144.45373134328358, "grad_norm": 1.3417353630065918, "learning_rate": 9.724702380952383e-06, "loss": 40.3032, "step": 6067 }, { "epoch": 144.47761194029852, "grad_norm": 1.390173316001892, "learning_rate": 9.723214285714286e-06, "loss": 40.0918, "step": 6068 }, { "epoch": 144.50149253731342, "grad_norm": 1.3733923435211182, "learning_rate": 9.721726190476192e-06, "loss": 39.9388, "step": 6069 }, { "epoch": 144.52537313432836, "grad_norm": 1.408699631690979, "learning_rate": 9.720238095238095e-06, "loss": 38.4551, "step": 6070 }, { "epoch": 144.54925373134327, "grad_norm": 1.3843560218811035, "learning_rate": 9.71875e-06, "loss": 39.4648, "step": 6071 }, { "epoch": 144.5731343283582, "grad_norm": 1.3717235326766968, "learning_rate": 9.717261904761906e-06, "loss": 40.0075, "step": 6072 }, { "epoch": 144.59701492537314, "grad_norm": 1.3695107698440552, "learning_rate": 9.715773809523812e-06, "loss": 41.4901, "step": 6073 }, { "epoch": 144.62089552238805, "grad_norm": 1.4248261451721191, "learning_rate": 9.714285714285715e-06, "loss": 40.0792, "step": 6074 }, { "epoch": 144.644776119403, "grad_norm": 1.3698327541351318, "learning_rate": 9.71279761904762e-06, "loss": 38.8968, "step": 6075 }, { "epoch": 144.6686567164179, "grad_norm": 1.351628065109253, "learning_rate": 9.711309523809524e-06, "loss": 39.9651, "step": 6076 }, { "epoch": 144.69253731343284, "grad_norm": 1.4898093938827515, "learning_rate": 9.70982142857143e-06, "loss": 39.1547, "step": 6077 }, { "epoch": 144.71641791044777, "grad_norm": 1.3827260732650757, "learning_rate": 9.708333333333333e-06, "loss": 38.3707, "step": 6078 }, { "epoch": 144.74029850746268, "grad_norm": 1.4313433170318604, "learning_rate": 9.706845238095239e-06, "loss": 40.463, "step": 6079 }, { "epoch": 144.76417910447762, "grad_norm": 1.4004284143447876, "learning_rate": 9.705357142857144e-06, "loss": 40.6709, "step": 6080 }, { "epoch": 144.78805970149253, "grad_norm": 1.4038856029510498, "learning_rate": 9.70386904761905e-06, "loss": 40.3661, "step": 6081 }, { "epoch": 144.81194029850747, "grad_norm": 1.3778727054595947, "learning_rate": 9.702380952380953e-06, "loss": 40.1146, "step": 6082 }, { "epoch": 144.83582089552237, "grad_norm": 1.454242467880249, "learning_rate": 9.700892857142858e-06, "loss": 39.2078, "step": 6083 }, { "epoch": 144.8597014925373, "grad_norm": 1.5347250699996948, "learning_rate": 9.699404761904762e-06, "loss": 40.4021, "step": 6084 }, { "epoch": 144.88358208955225, "grad_norm": 1.4426525831222534, "learning_rate": 9.697916666666667e-06, "loss": 39.1542, "step": 6085 }, { "epoch": 144.90746268656716, "grad_norm": 1.3978263139724731, "learning_rate": 9.696428571428573e-06, "loss": 40.2953, "step": 6086 }, { "epoch": 144.9313432835821, "grad_norm": 1.396606683731079, "learning_rate": 9.694940476190478e-06, "loss": 40.3671, "step": 6087 }, { "epoch": 144.955223880597, "grad_norm": 1.4006145000457764, "learning_rate": 9.693452380952382e-06, "loss": 39.5413, "step": 6088 }, { "epoch": 144.97910447761194, "grad_norm": 1.3811942338943481, "learning_rate": 9.691964285714287e-06, "loss": 40.3324, "step": 6089 }, { "epoch": 145.0, "grad_norm": 1.4124764204025269, "learning_rate": 9.690476190476191e-06, "loss": 33.489, "step": 6090 }, { "epoch": 145.02388059701494, "grad_norm": 1.4196065664291382, "learning_rate": 9.688988095238096e-06, "loss": 39.907, "step": 6091 }, { "epoch": 145.04776119402985, "grad_norm": 1.3632993698120117, "learning_rate": 9.6875e-06, "loss": 39.8741, "step": 6092 }, { "epoch": 145.07164179104478, "grad_norm": 1.405542254447937, "learning_rate": 9.686011904761905e-06, "loss": 39.0949, "step": 6093 }, { "epoch": 145.0955223880597, "grad_norm": 1.386886715888977, "learning_rate": 9.68452380952381e-06, "loss": 39.4713, "step": 6094 }, { "epoch": 145.11940298507463, "grad_norm": 1.379988670349121, "learning_rate": 9.683035714285714e-06, "loss": 39.3421, "step": 6095 }, { "epoch": 145.14328358208957, "grad_norm": 1.3504042625427246, "learning_rate": 9.68154761904762e-06, "loss": 40.1228, "step": 6096 }, { "epoch": 145.16716417910447, "grad_norm": 1.3774610757827759, "learning_rate": 9.680059523809523e-06, "loss": 38.9503, "step": 6097 }, { "epoch": 145.1910447761194, "grad_norm": 1.3615082502365112, "learning_rate": 9.678571428571429e-06, "loss": 39.443, "step": 6098 }, { "epoch": 145.21492537313432, "grad_norm": 1.3816218376159668, "learning_rate": 9.677083333333334e-06, "loss": 39.7324, "step": 6099 }, { "epoch": 145.23880597014926, "grad_norm": 1.4039734601974487, "learning_rate": 9.67559523809524e-06, "loss": 38.5656, "step": 6100 }, { "epoch": 145.26268656716417, "grad_norm": 1.3444157838821411, "learning_rate": 9.674107142857143e-06, "loss": 41.0232, "step": 6101 }, { "epoch": 145.2865671641791, "grad_norm": 1.3713635206222534, "learning_rate": 9.672619047619049e-06, "loss": 38.8538, "step": 6102 }, { "epoch": 145.31044776119404, "grad_norm": 1.3671987056732178, "learning_rate": 9.671130952380952e-06, "loss": 40.9108, "step": 6103 }, { "epoch": 145.33432835820895, "grad_norm": 1.3972434997558594, "learning_rate": 9.669642857142858e-06, "loss": 38.0189, "step": 6104 }, { "epoch": 145.3582089552239, "grad_norm": 1.404048204421997, "learning_rate": 9.668154761904763e-06, "loss": 40.6688, "step": 6105 }, { "epoch": 145.3820895522388, "grad_norm": 1.4243193864822388, "learning_rate": 9.666666666666667e-06, "loss": 40.6117, "step": 6106 }, { "epoch": 145.40597014925373, "grad_norm": 1.3722033500671387, "learning_rate": 9.665178571428572e-06, "loss": 40.2802, "step": 6107 }, { "epoch": 145.42985074626867, "grad_norm": 1.3741326332092285, "learning_rate": 9.663690476190477e-06, "loss": 39.8646, "step": 6108 }, { "epoch": 145.45373134328358, "grad_norm": 1.387630581855774, "learning_rate": 9.662202380952381e-06, "loss": 39.8667, "step": 6109 }, { "epoch": 145.47761194029852, "grad_norm": 1.396082878112793, "learning_rate": 9.660714285714287e-06, "loss": 40.5533, "step": 6110 }, { "epoch": 145.50149253731342, "grad_norm": 1.3907204866409302, "learning_rate": 9.65922619047619e-06, "loss": 39.8672, "step": 6111 }, { "epoch": 145.52537313432836, "grad_norm": 1.3973242044448853, "learning_rate": 9.657738095238096e-06, "loss": 39.5084, "step": 6112 }, { "epoch": 145.54925373134327, "grad_norm": 1.4265002012252808, "learning_rate": 9.656250000000001e-06, "loss": 38.1441, "step": 6113 }, { "epoch": 145.5731343283582, "grad_norm": 1.418884515762329, "learning_rate": 9.654761904761906e-06, "loss": 40.3509, "step": 6114 }, { "epoch": 145.59701492537314, "grad_norm": 1.3906280994415283, "learning_rate": 9.65327380952381e-06, "loss": 40.5187, "step": 6115 }, { "epoch": 145.62089552238805, "grad_norm": 1.412100911140442, "learning_rate": 9.651785714285715e-06, "loss": 39.7119, "step": 6116 }, { "epoch": 145.644776119403, "grad_norm": 1.358572244644165, "learning_rate": 9.650297619047619e-06, "loss": 40.6092, "step": 6117 }, { "epoch": 145.6686567164179, "grad_norm": 1.377974510192871, "learning_rate": 9.648809523809524e-06, "loss": 40.8244, "step": 6118 }, { "epoch": 145.69253731343284, "grad_norm": 1.4051403999328613, "learning_rate": 9.64732142857143e-06, "loss": 39.45, "step": 6119 }, { "epoch": 145.71641791044777, "grad_norm": 1.4033808708190918, "learning_rate": 9.645833333333333e-06, "loss": 38.9148, "step": 6120 }, { "epoch": 145.74029850746268, "grad_norm": 1.375597357749939, "learning_rate": 9.644345238095239e-06, "loss": 39.1358, "step": 6121 }, { "epoch": 145.76417910447762, "grad_norm": 1.4282183647155762, "learning_rate": 9.642857142857144e-06, "loss": 39.0551, "step": 6122 }, { "epoch": 145.78805970149253, "grad_norm": 1.3668177127838135, "learning_rate": 9.641369047619048e-06, "loss": 40.6016, "step": 6123 }, { "epoch": 145.81194029850747, "grad_norm": 1.408289909362793, "learning_rate": 9.639880952380953e-06, "loss": 41.7018, "step": 6124 }, { "epoch": 145.83582089552237, "grad_norm": 1.4099043607711792, "learning_rate": 9.638392857142857e-06, "loss": 38.5335, "step": 6125 }, { "epoch": 145.8597014925373, "grad_norm": 1.387242317199707, "learning_rate": 9.636904761904762e-06, "loss": 38.9853, "step": 6126 }, { "epoch": 145.88358208955225, "grad_norm": 1.3849056959152222, "learning_rate": 9.635416666666668e-06, "loss": 39.814, "step": 6127 }, { "epoch": 145.90746268656716, "grad_norm": 1.4107396602630615, "learning_rate": 9.633928571428573e-06, "loss": 40.0784, "step": 6128 }, { "epoch": 145.9313432835821, "grad_norm": 1.4152942895889282, "learning_rate": 9.632440476190477e-06, "loss": 40.4958, "step": 6129 }, { "epoch": 145.955223880597, "grad_norm": 1.3740997314453125, "learning_rate": 9.630952380952382e-06, "loss": 40.6781, "step": 6130 }, { "epoch": 145.97910447761194, "grad_norm": 1.4034487009048462, "learning_rate": 9.629464285714286e-06, "loss": 40.4586, "step": 6131 }, { "epoch": 146.0, "grad_norm": 1.421081304550171, "learning_rate": 9.627976190476191e-06, "loss": 34.3527, "step": 6132 }, { "epoch": 146.02388059701494, "grad_norm": 1.3823906183242798, "learning_rate": 9.626488095238096e-06, "loss": 40.4704, "step": 6133 }, { "epoch": 146.04776119402985, "grad_norm": 1.3911796808242798, "learning_rate": 9.625e-06, "loss": 40.7769, "step": 6134 }, { "epoch": 146.07164179104478, "grad_norm": 1.3679624795913696, "learning_rate": 9.623511904761906e-06, "loss": 39.7024, "step": 6135 }, { "epoch": 146.0955223880597, "grad_norm": 1.4060527086257935, "learning_rate": 9.622023809523811e-06, "loss": 38.9122, "step": 6136 }, { "epoch": 146.11940298507463, "grad_norm": 1.436442494392395, "learning_rate": 9.620535714285715e-06, "loss": 38.1405, "step": 6137 }, { "epoch": 146.14328358208957, "grad_norm": 2.494845151901245, "learning_rate": 9.61904761904762e-06, "loss": 39.4463, "step": 6138 }, { "epoch": 146.16716417910447, "grad_norm": 1.3978315591812134, "learning_rate": 9.617559523809524e-06, "loss": 40.1895, "step": 6139 }, { "epoch": 146.1910447761194, "grad_norm": 1.3785191774368286, "learning_rate": 9.616071428571429e-06, "loss": 40.2124, "step": 6140 }, { "epoch": 146.21492537313432, "grad_norm": 1.3803222179412842, "learning_rate": 9.614583333333334e-06, "loss": 39.8608, "step": 6141 }, { "epoch": 146.23880597014926, "grad_norm": 1.3612922430038452, "learning_rate": 9.61309523809524e-06, "loss": 38.4356, "step": 6142 }, { "epoch": 146.26268656716417, "grad_norm": 1.3717597723007202, "learning_rate": 9.611607142857143e-06, "loss": 39.7964, "step": 6143 }, { "epoch": 146.2865671641791, "grad_norm": 1.3576524257659912, "learning_rate": 9.610119047619049e-06, "loss": 41.3296, "step": 6144 }, { "epoch": 146.31044776119404, "grad_norm": 1.3804219961166382, "learning_rate": 9.608630952380952e-06, "loss": 38.6378, "step": 6145 }, { "epoch": 146.33432835820895, "grad_norm": 1.435280680656433, "learning_rate": 9.607142857142858e-06, "loss": 39.6146, "step": 6146 }, { "epoch": 146.3582089552239, "grad_norm": 1.3810113668441772, "learning_rate": 9.605654761904763e-06, "loss": 40.2568, "step": 6147 }, { "epoch": 146.3820895522388, "grad_norm": 1.3783928155899048, "learning_rate": 9.604166666666669e-06, "loss": 40.2634, "step": 6148 }, { "epoch": 146.40597014925373, "grad_norm": 1.380218744277954, "learning_rate": 9.602678571428572e-06, "loss": 39.7878, "step": 6149 }, { "epoch": 146.42985074626867, "grad_norm": 1.4120827913284302, "learning_rate": 9.601190476190478e-06, "loss": 40.5112, "step": 6150 }, { "epoch": 146.45373134328358, "grad_norm": 1.3981833457946777, "learning_rate": 9.599702380952381e-06, "loss": 39.7239, "step": 6151 }, { "epoch": 146.47761194029852, "grad_norm": 1.385880708694458, "learning_rate": 9.598214285714287e-06, "loss": 39.6368, "step": 6152 }, { "epoch": 146.50149253731342, "grad_norm": 1.3885592222213745, "learning_rate": 9.59672619047619e-06, "loss": 40.8386, "step": 6153 }, { "epoch": 146.52537313432836, "grad_norm": 1.3480048179626465, "learning_rate": 9.595238095238096e-06, "loss": 40.67, "step": 6154 }, { "epoch": 146.54925373134327, "grad_norm": 1.4307184219360352, "learning_rate": 9.593750000000001e-06, "loss": 36.4912, "step": 6155 }, { "epoch": 146.5731343283582, "grad_norm": 1.3704806566238403, "learning_rate": 9.592261904761906e-06, "loss": 38.8165, "step": 6156 }, { "epoch": 146.59701492537314, "grad_norm": 1.4112454652786255, "learning_rate": 9.59077380952381e-06, "loss": 39.8013, "step": 6157 }, { "epoch": 146.62089552238805, "grad_norm": 1.440160870552063, "learning_rate": 9.589285714285716e-06, "loss": 39.3009, "step": 6158 }, { "epoch": 146.644776119403, "grad_norm": 1.3870078325271606, "learning_rate": 9.58779761904762e-06, "loss": 40.2936, "step": 6159 }, { "epoch": 146.6686567164179, "grad_norm": 1.3851828575134277, "learning_rate": 9.586309523809525e-06, "loss": 40.2708, "step": 6160 }, { "epoch": 146.69253731343284, "grad_norm": 1.4020555019378662, "learning_rate": 9.58482142857143e-06, "loss": 40.2866, "step": 6161 }, { "epoch": 146.71641791044777, "grad_norm": 1.3600176572799683, "learning_rate": 9.583333333333335e-06, "loss": 41.0169, "step": 6162 }, { "epoch": 146.74029850746268, "grad_norm": 1.3988215923309326, "learning_rate": 9.581845238095239e-06, "loss": 39.5894, "step": 6163 }, { "epoch": 146.76417910447762, "grad_norm": 1.4320552349090576, "learning_rate": 9.580357142857144e-06, "loss": 40.1592, "step": 6164 }, { "epoch": 146.78805970149253, "grad_norm": 1.3647886514663696, "learning_rate": 9.578869047619048e-06, "loss": 39.3692, "step": 6165 }, { "epoch": 146.81194029850747, "grad_norm": 1.3675220012664795, "learning_rate": 9.577380952380953e-06, "loss": 39.9496, "step": 6166 }, { "epoch": 146.83582089552237, "grad_norm": 1.3815337419509888, "learning_rate": 9.575892857142857e-06, "loss": 39.5509, "step": 6167 }, { "epoch": 146.8597014925373, "grad_norm": 1.3913989067077637, "learning_rate": 9.574404761904762e-06, "loss": 39.3451, "step": 6168 }, { "epoch": 146.88358208955225, "grad_norm": 1.3485398292541504, "learning_rate": 9.572916666666668e-06, "loss": 41.1508, "step": 6169 }, { "epoch": 146.90746268656716, "grad_norm": 1.3577455282211304, "learning_rate": 9.571428571428573e-06, "loss": 39.1247, "step": 6170 }, { "epoch": 146.9313432835821, "grad_norm": 1.380012035369873, "learning_rate": 9.569940476190477e-06, "loss": 39.6708, "step": 6171 }, { "epoch": 146.955223880597, "grad_norm": 1.389444351196289, "learning_rate": 9.568452380952382e-06, "loss": 40.3143, "step": 6172 }, { "epoch": 146.97910447761194, "grad_norm": 1.401086688041687, "learning_rate": 9.566964285714286e-06, "loss": 39.7322, "step": 6173 }, { "epoch": 147.0, "grad_norm": 1.3507823944091797, "learning_rate": 9.565476190476191e-06, "loss": 35.4695, "step": 6174 }, { "epoch": 147.02388059701494, "grad_norm": 1.4110801219940186, "learning_rate": 9.563988095238097e-06, "loss": 38.5371, "step": 6175 }, { "epoch": 147.04776119402985, "grad_norm": 1.3999789953231812, "learning_rate": 9.562500000000002e-06, "loss": 40.3585, "step": 6176 }, { "epoch": 147.07164179104478, "grad_norm": 1.3827368021011353, "learning_rate": 9.561011904761906e-06, "loss": 40.9978, "step": 6177 }, { "epoch": 147.0955223880597, "grad_norm": 1.395845651626587, "learning_rate": 9.559523809523811e-06, "loss": 39.6119, "step": 6178 }, { "epoch": 147.11940298507463, "grad_norm": 1.4209219217300415, "learning_rate": 9.558035714285715e-06, "loss": 39.5315, "step": 6179 }, { "epoch": 147.14328358208957, "grad_norm": 1.3754463195800781, "learning_rate": 9.55654761904762e-06, "loss": 38.5198, "step": 6180 }, { "epoch": 147.16716417910447, "grad_norm": 1.4001225233078003, "learning_rate": 9.555059523809524e-06, "loss": 38.8625, "step": 6181 }, { "epoch": 147.1910447761194, "grad_norm": 1.3903918266296387, "learning_rate": 9.55357142857143e-06, "loss": 39.5597, "step": 6182 }, { "epoch": 147.21492537313432, "grad_norm": 1.3848377466201782, "learning_rate": 9.552083333333335e-06, "loss": 39.8615, "step": 6183 }, { "epoch": 147.23880597014926, "grad_norm": 1.4153218269348145, "learning_rate": 9.55059523809524e-06, "loss": 40.2626, "step": 6184 }, { "epoch": 147.26268656716417, "grad_norm": 1.4158318042755127, "learning_rate": 9.549107142857144e-06, "loss": 40.0232, "step": 6185 }, { "epoch": 147.2865671641791, "grad_norm": 1.4177613258361816, "learning_rate": 9.547619047619049e-06, "loss": 39.2718, "step": 6186 }, { "epoch": 147.31044776119404, "grad_norm": 1.3769272565841675, "learning_rate": 9.546130952380953e-06, "loss": 39.9061, "step": 6187 }, { "epoch": 147.33432835820895, "grad_norm": 1.411452054977417, "learning_rate": 9.544642857142858e-06, "loss": 39.8874, "step": 6188 }, { "epoch": 147.3582089552239, "grad_norm": 1.3373011350631714, "learning_rate": 9.543154761904763e-06, "loss": 40.2973, "step": 6189 }, { "epoch": 147.3820895522388, "grad_norm": 1.3932435512542725, "learning_rate": 9.541666666666669e-06, "loss": 39.4701, "step": 6190 }, { "epoch": 147.40597014925373, "grad_norm": 1.3593153953552246, "learning_rate": 9.540178571428572e-06, "loss": 40.5483, "step": 6191 }, { "epoch": 147.42985074626867, "grad_norm": 1.4109282493591309, "learning_rate": 9.538690476190478e-06, "loss": 39.5907, "step": 6192 }, { "epoch": 147.45373134328358, "grad_norm": 1.383887767791748, "learning_rate": 9.537202380952381e-06, "loss": 39.8616, "step": 6193 }, { "epoch": 147.47761194029852, "grad_norm": 1.3812227249145508, "learning_rate": 9.535714285714287e-06, "loss": 39.5224, "step": 6194 }, { "epoch": 147.50149253731342, "grad_norm": 1.3809988498687744, "learning_rate": 9.53422619047619e-06, "loss": 38.4316, "step": 6195 }, { "epoch": 147.52537313432836, "grad_norm": 1.3987489938735962, "learning_rate": 9.532738095238096e-06, "loss": 39.1178, "step": 6196 }, { "epoch": 147.54925373134327, "grad_norm": 1.3705860376358032, "learning_rate": 9.531250000000001e-06, "loss": 40.4306, "step": 6197 }, { "epoch": 147.5731343283582, "grad_norm": 1.3769049644470215, "learning_rate": 9.529761904761905e-06, "loss": 39.7344, "step": 6198 }, { "epoch": 147.59701492537314, "grad_norm": 1.392330527305603, "learning_rate": 9.52827380952381e-06, "loss": 38.9769, "step": 6199 }, { "epoch": 147.62089552238805, "grad_norm": 1.417297601699829, "learning_rate": 9.526785714285714e-06, "loss": 39.6881, "step": 6200 }, { "epoch": 147.644776119403, "grad_norm": 1.3460383415222168, "learning_rate": 9.52529761904762e-06, "loss": 40.2628, "step": 6201 }, { "epoch": 147.6686567164179, "grad_norm": 1.3962376117706299, "learning_rate": 9.523809523809525e-06, "loss": 40.9397, "step": 6202 }, { "epoch": 147.69253731343284, "grad_norm": 1.3744621276855469, "learning_rate": 9.52232142857143e-06, "loss": 40.5956, "step": 6203 }, { "epoch": 147.71641791044777, "grad_norm": 1.4034968614578247, "learning_rate": 9.520833333333334e-06, "loss": 40.4766, "step": 6204 }, { "epoch": 147.74029850746268, "grad_norm": 1.3716857433319092, "learning_rate": 9.51934523809524e-06, "loss": 40.3413, "step": 6205 }, { "epoch": 147.76417910447762, "grad_norm": 1.4156872034072876, "learning_rate": 9.517857142857143e-06, "loss": 39.7188, "step": 6206 }, { "epoch": 147.78805970149253, "grad_norm": 1.3838863372802734, "learning_rate": 9.516369047619048e-06, "loss": 38.812, "step": 6207 }, { "epoch": 147.81194029850747, "grad_norm": 1.3827248811721802, "learning_rate": 9.514880952380952e-06, "loss": 40.1732, "step": 6208 }, { "epoch": 147.83582089552237, "grad_norm": 1.419411540031433, "learning_rate": 9.513392857142857e-06, "loss": 40.6362, "step": 6209 }, { "epoch": 147.8597014925373, "grad_norm": 1.390061378479004, "learning_rate": 9.511904761904763e-06, "loss": 38.5234, "step": 6210 }, { "epoch": 147.88358208955225, "grad_norm": 1.4754542112350464, "learning_rate": 9.510416666666668e-06, "loss": 40.5257, "step": 6211 }, { "epoch": 147.90746268656716, "grad_norm": 1.435465693473816, "learning_rate": 9.508928571428572e-06, "loss": 39.25, "step": 6212 }, { "epoch": 147.9313432835821, "grad_norm": 1.395843744277954, "learning_rate": 9.507440476190477e-06, "loss": 39.9478, "step": 6213 }, { "epoch": 147.955223880597, "grad_norm": 1.56545090675354, "learning_rate": 9.50595238095238e-06, "loss": 39.4628, "step": 6214 }, { "epoch": 147.97910447761194, "grad_norm": 1.3377258777618408, "learning_rate": 9.504464285714286e-06, "loss": 40.6798, "step": 6215 }, { "epoch": 148.0, "grad_norm": 1.4153507947921753, "learning_rate": 9.502976190476191e-06, "loss": 34.95, "step": 6216 }, { "epoch": 148.02388059701494, "grad_norm": 1.3980624675750732, "learning_rate": 9.501488095238097e-06, "loss": 38.2628, "step": 6217 }, { "epoch": 148.04776119402985, "grad_norm": 1.4658145904541016, "learning_rate": 9.5e-06, "loss": 41.1433, "step": 6218 }, { "epoch": 148.07164179104478, "grad_norm": 1.3758001327514648, "learning_rate": 9.498511904761906e-06, "loss": 38.9271, "step": 6219 }, { "epoch": 148.0955223880597, "grad_norm": 1.3887008428573608, "learning_rate": 9.49702380952381e-06, "loss": 40.767, "step": 6220 }, { "epoch": 148.11940298507463, "grad_norm": 1.3442296981811523, "learning_rate": 9.495535714285715e-06, "loss": 39.6697, "step": 6221 }, { "epoch": 148.14328358208957, "grad_norm": 1.395132303237915, "learning_rate": 9.494047619047619e-06, "loss": 40.1523, "step": 6222 }, { "epoch": 148.16716417910447, "grad_norm": 1.3781839609146118, "learning_rate": 9.492559523809524e-06, "loss": 39.2856, "step": 6223 }, { "epoch": 148.1910447761194, "grad_norm": 1.4388372898101807, "learning_rate": 9.49107142857143e-06, "loss": 40.5247, "step": 6224 }, { "epoch": 148.21492537313432, "grad_norm": 1.387631893157959, "learning_rate": 9.489583333333335e-06, "loss": 39.5943, "step": 6225 }, { "epoch": 148.23880597014926, "grad_norm": 1.435713291168213, "learning_rate": 9.488095238095238e-06, "loss": 40.8741, "step": 6226 }, { "epoch": 148.26268656716417, "grad_norm": 1.4194412231445312, "learning_rate": 9.486607142857144e-06, "loss": 40.6202, "step": 6227 }, { "epoch": 148.2865671641791, "grad_norm": 1.4196182489395142, "learning_rate": 9.485119047619047e-06, "loss": 40.172, "step": 6228 }, { "epoch": 148.31044776119404, "grad_norm": 1.405199408531189, "learning_rate": 9.483630952380953e-06, "loss": 39.6757, "step": 6229 }, { "epoch": 148.33432835820895, "grad_norm": 1.3666656017303467, "learning_rate": 9.482142857142858e-06, "loss": 40.4574, "step": 6230 }, { "epoch": 148.3582089552239, "grad_norm": 1.3975087404251099, "learning_rate": 9.480654761904764e-06, "loss": 39.2513, "step": 6231 }, { "epoch": 148.3820895522388, "grad_norm": 1.4405434131622314, "learning_rate": 9.479166666666667e-06, "loss": 39.9534, "step": 6232 }, { "epoch": 148.40597014925373, "grad_norm": 1.390606164932251, "learning_rate": 9.477678571428573e-06, "loss": 40.6604, "step": 6233 }, { "epoch": 148.42985074626867, "grad_norm": 1.4062482118606567, "learning_rate": 9.476190476190476e-06, "loss": 40.7886, "step": 6234 }, { "epoch": 148.45373134328358, "grad_norm": 1.3654539585113525, "learning_rate": 9.474702380952382e-06, "loss": 40.571, "step": 6235 }, { "epoch": 148.47761194029852, "grad_norm": 1.3653122186660767, "learning_rate": 9.473214285714285e-06, "loss": 39.5069, "step": 6236 }, { "epoch": 148.50149253731342, "grad_norm": 1.372803807258606, "learning_rate": 9.47172619047619e-06, "loss": 39.2034, "step": 6237 }, { "epoch": 148.52537313432836, "grad_norm": 1.4139281511306763, "learning_rate": 9.470238095238096e-06, "loss": 39.3125, "step": 6238 }, { "epoch": 148.54925373134327, "grad_norm": 1.8621400594711304, "learning_rate": 9.468750000000001e-06, "loss": 38.932, "step": 6239 }, { "epoch": 148.5731343283582, "grad_norm": 1.4033026695251465, "learning_rate": 9.467261904761905e-06, "loss": 39.5037, "step": 6240 }, { "epoch": 148.59701492537314, "grad_norm": 1.4204497337341309, "learning_rate": 9.46577380952381e-06, "loss": 38.6693, "step": 6241 }, { "epoch": 148.62089552238805, "grad_norm": 1.390781283378601, "learning_rate": 9.464285714285714e-06, "loss": 40.1227, "step": 6242 }, { "epoch": 148.644776119403, "grad_norm": 1.419391393661499, "learning_rate": 9.46279761904762e-06, "loss": 41.3518, "step": 6243 }, { "epoch": 148.6686567164179, "grad_norm": 1.3940855264663696, "learning_rate": 9.461309523809525e-06, "loss": 38.4497, "step": 6244 }, { "epoch": 148.69253731343284, "grad_norm": 1.4030182361602783, "learning_rate": 9.45982142857143e-06, "loss": 39.6964, "step": 6245 }, { "epoch": 148.71641791044777, "grad_norm": 1.3940331935882568, "learning_rate": 9.458333333333334e-06, "loss": 39.5557, "step": 6246 }, { "epoch": 148.74029850746268, "grad_norm": 1.42631196975708, "learning_rate": 9.45684523809524e-06, "loss": 39.2479, "step": 6247 }, { "epoch": 148.76417910447762, "grad_norm": 1.4061552286148071, "learning_rate": 9.455357142857143e-06, "loss": 40.1229, "step": 6248 }, { "epoch": 148.78805970149253, "grad_norm": 1.3835855722427368, "learning_rate": 9.453869047619048e-06, "loss": 38.6602, "step": 6249 }, { "epoch": 148.81194029850747, "grad_norm": 1.3981024026870728, "learning_rate": 9.452380952380952e-06, "loss": 40.3921, "step": 6250 }, { "epoch": 148.83582089552237, "grad_norm": 1.405151605606079, "learning_rate": 9.450892857142857e-06, "loss": 39.1115, "step": 6251 }, { "epoch": 148.8597014925373, "grad_norm": 1.5286908149719238, "learning_rate": 9.449404761904763e-06, "loss": 39.3679, "step": 6252 }, { "epoch": 148.88358208955225, "grad_norm": 1.3890769481658936, "learning_rate": 9.447916666666668e-06, "loss": 38.4899, "step": 6253 }, { "epoch": 148.90746268656716, "grad_norm": 1.4001574516296387, "learning_rate": 9.446428571428572e-06, "loss": 38.7165, "step": 6254 }, { "epoch": 148.9313432835821, "grad_norm": 1.4038162231445312, "learning_rate": 9.444940476190477e-06, "loss": 40.7954, "step": 6255 }, { "epoch": 148.955223880597, "grad_norm": 1.3911023139953613, "learning_rate": 9.443452380952381e-06, "loss": 40.4265, "step": 6256 }, { "epoch": 148.97910447761194, "grad_norm": 1.3797988891601562, "learning_rate": 9.441964285714286e-06, "loss": 39.0267, "step": 6257 }, { "epoch": 149.0, "grad_norm": 1.388864278793335, "learning_rate": 9.440476190476192e-06, "loss": 36.4949, "step": 6258 }, { "epoch": 149.02388059701494, "grad_norm": 1.3843498229980469, "learning_rate": 9.438988095238097e-06, "loss": 39.4893, "step": 6259 }, { "epoch": 149.04776119402985, "grad_norm": 1.3840149641036987, "learning_rate": 9.4375e-06, "loss": 38.6996, "step": 6260 }, { "epoch": 149.07164179104478, "grad_norm": 1.423953890800476, "learning_rate": 9.436011904761906e-06, "loss": 40.8741, "step": 6261 }, { "epoch": 149.0955223880597, "grad_norm": 1.3947354555130005, "learning_rate": 9.43452380952381e-06, "loss": 39.5506, "step": 6262 }, { "epoch": 149.11940298507463, "grad_norm": 1.4040168523788452, "learning_rate": 9.433035714285715e-06, "loss": 40.6498, "step": 6263 }, { "epoch": 149.14328358208957, "grad_norm": 1.3917887210845947, "learning_rate": 9.431547619047619e-06, "loss": 40.0438, "step": 6264 }, { "epoch": 149.16716417910447, "grad_norm": 1.3801087141036987, "learning_rate": 9.430059523809524e-06, "loss": 38.2411, "step": 6265 }, { "epoch": 149.1910447761194, "grad_norm": 1.3866209983825684, "learning_rate": 9.42857142857143e-06, "loss": 39.3784, "step": 6266 }, { "epoch": 149.21492537313432, "grad_norm": 1.4055087566375732, "learning_rate": 9.427083333333335e-06, "loss": 39.1291, "step": 6267 }, { "epoch": 149.23880597014926, "grad_norm": 1.4009150266647339, "learning_rate": 9.425595238095239e-06, "loss": 39.7463, "step": 6268 }, { "epoch": 149.26268656716417, "grad_norm": 1.385659098625183, "learning_rate": 9.424107142857144e-06, "loss": 39.2429, "step": 6269 }, { "epoch": 149.2865671641791, "grad_norm": 1.4452263116836548, "learning_rate": 9.422619047619048e-06, "loss": 39.277, "step": 6270 }, { "epoch": 149.31044776119404, "grad_norm": 1.3954790830612183, "learning_rate": 9.421130952380953e-06, "loss": 40.1614, "step": 6271 }, { "epoch": 149.33432835820895, "grad_norm": 1.3871071338653564, "learning_rate": 9.419642857142858e-06, "loss": 39.5067, "step": 6272 }, { "epoch": 149.3582089552239, "grad_norm": 1.4189677238464355, "learning_rate": 9.418154761904764e-06, "loss": 39.8683, "step": 6273 }, { "epoch": 149.3820895522388, "grad_norm": 1.4426833391189575, "learning_rate": 9.416666666666667e-06, "loss": 41.4632, "step": 6274 }, { "epoch": 149.40597014925373, "grad_norm": 1.3889070749282837, "learning_rate": 9.415178571428573e-06, "loss": 38.5586, "step": 6275 }, { "epoch": 149.42985074626867, "grad_norm": 1.3770334720611572, "learning_rate": 9.413690476190476e-06, "loss": 39.0292, "step": 6276 }, { "epoch": 149.45373134328358, "grad_norm": 1.3983923196792603, "learning_rate": 9.412202380952382e-06, "loss": 39.4876, "step": 6277 }, { "epoch": 149.47761194029852, "grad_norm": 1.3512320518493652, "learning_rate": 9.410714285714286e-06, "loss": 39.418, "step": 6278 }, { "epoch": 149.50149253731342, "grad_norm": 1.378698468208313, "learning_rate": 9.409226190476191e-06, "loss": 40.9337, "step": 6279 }, { "epoch": 149.52537313432836, "grad_norm": 1.4419883489608765, "learning_rate": 9.407738095238096e-06, "loss": 39.0473, "step": 6280 }, { "epoch": 149.54925373134327, "grad_norm": NaN, "learning_rate": 9.406250000000002e-06, "loss": 51.134, "step": 6281 }, { "epoch": 149.5731343283582, "grad_norm": 1.390204906463623, "learning_rate": 9.406250000000002e-06, "loss": 39.2525, "step": 6282 }, { "epoch": 149.59701492537314, "grad_norm": 1.3576239347457886, "learning_rate": 9.404761904761905e-06, "loss": 40.7061, "step": 6283 }, { "epoch": 149.62089552238805, "grad_norm": 1.4027773141860962, "learning_rate": 9.40327380952381e-06, "loss": 39.3923, "step": 6284 }, { "epoch": 149.644776119403, "grad_norm": 1.3997001647949219, "learning_rate": 9.401785714285714e-06, "loss": 40.3627, "step": 6285 }, { "epoch": 149.6686567164179, "grad_norm": 1.3861162662506104, "learning_rate": 9.40029761904762e-06, "loss": 39.5758, "step": 6286 }, { "epoch": 149.69253731343284, "grad_norm": 1.42082941532135, "learning_rate": 9.398809523809525e-06, "loss": 40.5435, "step": 6287 }, { "epoch": 149.71641791044777, "grad_norm": 1.409765601158142, "learning_rate": 9.39732142857143e-06, "loss": 39.4394, "step": 6288 }, { "epoch": 149.74029850746268, "grad_norm": 1.349176287651062, "learning_rate": 9.395833333333334e-06, "loss": 40.8785, "step": 6289 }, { "epoch": 149.76417910447762, "grad_norm": 1.389966607093811, "learning_rate": 9.39434523809524e-06, "loss": 40.4457, "step": 6290 }, { "epoch": 149.78805970149253, "grad_norm": 1.3934004306793213, "learning_rate": 9.392857142857143e-06, "loss": 38.7531, "step": 6291 }, { "epoch": 149.81194029850747, "grad_norm": 1.3827059268951416, "learning_rate": 9.391369047619049e-06, "loss": 40.1348, "step": 6292 }, { "epoch": 149.83582089552237, "grad_norm": 1.529671311378479, "learning_rate": 9.389880952380954e-06, "loss": 39.2554, "step": 6293 }, { "epoch": 149.8597014925373, "grad_norm": 1.4637616872787476, "learning_rate": 9.388392857142858e-06, "loss": 39.8067, "step": 6294 }, { "epoch": 149.88358208955225, "grad_norm": 1.4756371974945068, "learning_rate": 9.386904761904763e-06, "loss": 40.3342, "step": 6295 }, { "epoch": 149.90746268656716, "grad_norm": 1.387015700340271, "learning_rate": 9.385416666666668e-06, "loss": 41.2026, "step": 6296 }, { "epoch": 149.9313432835821, "grad_norm": 1.3916720151901245, "learning_rate": 9.383928571428572e-06, "loss": 38.9471, "step": 6297 }, { "epoch": 149.955223880597, "grad_norm": 1.3800398111343384, "learning_rate": 9.382440476190477e-06, "loss": 39.2654, "step": 6298 }, { "epoch": 149.97910447761194, "grad_norm": 1.378159761428833, "learning_rate": 9.380952380952381e-06, "loss": 40.231, "step": 6299 }, { "epoch": 150.0, "grad_norm": 1.3771966695785522, "learning_rate": 9.379464285714286e-06, "loss": 34.5004, "step": 6300 }, { "epoch": 150.02388059701494, "grad_norm": NaN, "learning_rate": 9.377976190476192e-06, "loss": 45.6536, "step": 6301 }, { "epoch": 150.04776119402985, "grad_norm": 1.3695188760757446, "learning_rate": 9.377976190476192e-06, "loss": 40.2638, "step": 6302 }, { "epoch": 150.07164179104478, "grad_norm": 1.3854070901870728, "learning_rate": 9.376488095238097e-06, "loss": 40.3868, "step": 6303 }, { "epoch": 150.0955223880597, "grad_norm": 1.3703415393829346, "learning_rate": 9.375000000000001e-06, "loss": 40.1432, "step": 6304 }, { "epoch": 150.11940298507463, "grad_norm": 1.3807013034820557, "learning_rate": 9.373511904761905e-06, "loss": 38.7899, "step": 6305 }, { "epoch": 150.14328358208957, "grad_norm": 1.407753586769104, "learning_rate": 9.37202380952381e-06, "loss": 39.7269, "step": 6306 }, { "epoch": 150.16716417910447, "grad_norm": 1.3865894079208374, "learning_rate": 9.370535714285715e-06, "loss": 40.1437, "step": 6307 }, { "epoch": 150.1910447761194, "grad_norm": 1.390550136566162, "learning_rate": 9.36904761904762e-06, "loss": 40.0804, "step": 6308 }, { "epoch": 150.21492537313432, "grad_norm": 1.3641462326049805, "learning_rate": 9.367559523809524e-06, "loss": 41.2124, "step": 6309 }, { "epoch": 150.23880597014926, "grad_norm": 1.3757225275039673, "learning_rate": 9.36607142857143e-06, "loss": 39.2714, "step": 6310 }, { "epoch": 150.26268656716417, "grad_norm": 1.3771401643753052, "learning_rate": 9.364583333333333e-06, "loss": 39.6912, "step": 6311 }, { "epoch": 150.2865671641791, "grad_norm": 1.372065544128418, "learning_rate": 9.363095238095239e-06, "loss": 39.7919, "step": 6312 }, { "epoch": 150.31044776119404, "grad_norm": 1.3828628063201904, "learning_rate": 9.361607142857142e-06, "loss": 39.5513, "step": 6313 }, { "epoch": 150.33432835820895, "grad_norm": 1.3689607381820679, "learning_rate": 9.360119047619048e-06, "loss": 38.7469, "step": 6314 }, { "epoch": 150.3582089552239, "grad_norm": 1.3981634378433228, "learning_rate": 9.358630952380953e-06, "loss": 40.0202, "step": 6315 }, { "epoch": 150.3820895522388, "grad_norm": 1.4113973379135132, "learning_rate": 9.357142857142859e-06, "loss": 40.6408, "step": 6316 }, { "epoch": 150.40597014925373, "grad_norm": 1.4307994842529297, "learning_rate": 9.355654761904762e-06, "loss": 39.6128, "step": 6317 }, { "epoch": 150.42985074626867, "grad_norm": 1.4116977453231812, "learning_rate": 9.354166666666668e-06, "loss": 40.6825, "step": 6318 }, { "epoch": 150.45373134328358, "grad_norm": 1.379270315170288, "learning_rate": 9.352678571428571e-06, "loss": 40.2438, "step": 6319 }, { "epoch": 150.47761194029852, "grad_norm": 1.4404443502426147, "learning_rate": 9.351190476190477e-06, "loss": 40.0211, "step": 6320 }, { "epoch": 150.50149253731342, "grad_norm": 1.3757104873657227, "learning_rate": 9.349702380952382e-06, "loss": 39.886, "step": 6321 }, { "epoch": 150.52537313432836, "grad_norm": 1.4570364952087402, "learning_rate": 9.348214285714287e-06, "loss": 40.4738, "step": 6322 }, { "epoch": 150.54925373134327, "grad_norm": 1.3832143545150757, "learning_rate": 9.346726190476191e-06, "loss": 40.231, "step": 6323 }, { "epoch": 150.5731343283582, "grad_norm": 1.3976413011550903, "learning_rate": 9.345238095238096e-06, "loss": 40.6676, "step": 6324 }, { "epoch": 150.59701492537314, "grad_norm": 1.4183326959609985, "learning_rate": 9.34375e-06, "loss": 39.4872, "step": 6325 }, { "epoch": 150.62089552238805, "grad_norm": 1.3489983081817627, "learning_rate": 9.342261904761905e-06, "loss": 38.6416, "step": 6326 }, { "epoch": 150.644776119403, "grad_norm": 1.3650174140930176, "learning_rate": 9.34077380952381e-06, "loss": 38.6031, "step": 6327 }, { "epoch": 150.6686567164179, "grad_norm": 1.4183635711669922, "learning_rate": 9.339285714285715e-06, "loss": 39.6012, "step": 6328 }, { "epoch": 150.69253731343284, "grad_norm": 1.357742428779602, "learning_rate": 9.33779761904762e-06, "loss": 39.4966, "step": 6329 }, { "epoch": 150.71641791044777, "grad_norm": 1.3562610149383545, "learning_rate": 9.336309523809525e-06, "loss": 39.2398, "step": 6330 }, { "epoch": 150.74029850746268, "grad_norm": 1.4265776872634888, "learning_rate": 9.334821428571429e-06, "loss": 39.5934, "step": 6331 }, { "epoch": 150.76417910447762, "grad_norm": 1.5908664464950562, "learning_rate": 9.333333333333334e-06, "loss": 39.7534, "step": 6332 }, { "epoch": 150.78805970149253, "grad_norm": 2.0744941234588623, "learning_rate": 9.331845238095238e-06, "loss": 39.9742, "step": 6333 }, { "epoch": 150.81194029850747, "grad_norm": 1.3908894062042236, "learning_rate": 9.330357142857143e-06, "loss": 39.2042, "step": 6334 }, { "epoch": 150.83582089552237, "grad_norm": 1.376988410949707, "learning_rate": 9.328869047619049e-06, "loss": 39.7031, "step": 6335 }, { "epoch": 150.8597014925373, "grad_norm": 1.4004408121109009, "learning_rate": 9.327380952380954e-06, "loss": 40.3653, "step": 6336 }, { "epoch": 150.88358208955225, "grad_norm": 1.40264892578125, "learning_rate": 9.325892857142858e-06, "loss": 39.2493, "step": 6337 }, { "epoch": 150.90746268656716, "grad_norm": 1.3637710809707642, "learning_rate": 9.324404761904763e-06, "loss": 39.4667, "step": 6338 }, { "epoch": 150.9313432835821, "grad_norm": 1.3864154815673828, "learning_rate": 9.322916666666667e-06, "loss": 39.698, "step": 6339 }, { "epoch": 150.955223880597, "grad_norm": 1.4390044212341309, "learning_rate": 9.321428571428572e-06, "loss": 39.7374, "step": 6340 }, { "epoch": 150.97910447761194, "grad_norm": 1.471393346786499, "learning_rate": 9.319940476190476e-06, "loss": 38.8628, "step": 6341 }, { "epoch": 151.0, "grad_norm": 1.3921263217926025, "learning_rate": 9.318452380952381e-06, "loss": 34.0585, "step": 6342 }, { "epoch": 151.02388059701494, "grad_norm": 1.3751156330108643, "learning_rate": 9.316964285714287e-06, "loss": 41.7727, "step": 6343 }, { "epoch": 151.04776119402985, "grad_norm": 1.3640681505203247, "learning_rate": 9.315476190476192e-06, "loss": 40.2243, "step": 6344 }, { "epoch": 151.07164179104478, "grad_norm": 1.4246002435684204, "learning_rate": 9.313988095238096e-06, "loss": 39.3729, "step": 6345 }, { "epoch": 151.0955223880597, "grad_norm": 1.402680516242981, "learning_rate": 9.312500000000001e-06, "loss": 39.1293, "step": 6346 }, { "epoch": 151.11940298507463, "grad_norm": 1.4273099899291992, "learning_rate": 9.311011904761905e-06, "loss": 41.1189, "step": 6347 }, { "epoch": 151.14328358208957, "grad_norm": 1.4016077518463135, "learning_rate": 9.30952380952381e-06, "loss": 39.8152, "step": 6348 }, { "epoch": 151.16716417910447, "grad_norm": 1.3817402124404907, "learning_rate": 9.308035714285715e-06, "loss": 39.4155, "step": 6349 }, { "epoch": 151.1910447761194, "grad_norm": 1.3989639282226562, "learning_rate": 9.30654761904762e-06, "loss": 40.8659, "step": 6350 }, { "epoch": 151.21492537313432, "grad_norm": 1.4121732711791992, "learning_rate": 9.305059523809525e-06, "loss": 39.0516, "step": 6351 }, { "epoch": 151.23880597014926, "grad_norm": 1.3823583126068115, "learning_rate": 9.30357142857143e-06, "loss": 40.4779, "step": 6352 }, { "epoch": 151.26268656716417, "grad_norm": 1.404232382774353, "learning_rate": 9.302083333333334e-06, "loss": 39.0472, "step": 6353 }, { "epoch": 151.2865671641791, "grad_norm": 1.382708191871643, "learning_rate": 9.300595238095239e-06, "loss": 39.7164, "step": 6354 }, { "epoch": 151.31044776119404, "grad_norm": 1.4287365674972534, "learning_rate": 9.299107142857143e-06, "loss": 39.7866, "step": 6355 }, { "epoch": 151.33432835820895, "grad_norm": 1.453171730041504, "learning_rate": 9.297619047619048e-06, "loss": 39.7592, "step": 6356 }, { "epoch": 151.3582089552239, "grad_norm": 1.400652527809143, "learning_rate": 9.296130952380953e-06, "loss": 40.0508, "step": 6357 }, { "epoch": 151.3820895522388, "grad_norm": 1.3427455425262451, "learning_rate": 9.294642857142859e-06, "loss": 39.3438, "step": 6358 }, { "epoch": 151.40597014925373, "grad_norm": 1.3873279094696045, "learning_rate": 9.293154761904762e-06, "loss": 40.4371, "step": 6359 }, { "epoch": 151.42985074626867, "grad_norm": 1.3873059749603271, "learning_rate": 9.291666666666668e-06, "loss": 40.076, "step": 6360 }, { "epoch": 151.45373134328358, "grad_norm": 1.3974792957305908, "learning_rate": 9.290178571428571e-06, "loss": 39.5033, "step": 6361 }, { "epoch": 151.47761194029852, "grad_norm": 1.3707728385925293, "learning_rate": 9.288690476190477e-06, "loss": 40.1599, "step": 6362 }, { "epoch": 151.50149253731342, "grad_norm": 1.3713340759277344, "learning_rate": 9.287202380952382e-06, "loss": 39.3542, "step": 6363 }, { "epoch": 151.52537313432836, "grad_norm": 1.369905710220337, "learning_rate": 9.285714285714288e-06, "loss": 40.7217, "step": 6364 }, { "epoch": 151.54925373134327, "grad_norm": 1.381853699684143, "learning_rate": 9.284226190476191e-06, "loss": 40.0543, "step": 6365 }, { "epoch": 151.5731343283582, "grad_norm": 1.3604148626327515, "learning_rate": 9.282738095238097e-06, "loss": 40.0533, "step": 6366 }, { "epoch": 151.59701492537314, "grad_norm": 1.3774807453155518, "learning_rate": 9.28125e-06, "loss": 39.6794, "step": 6367 }, { "epoch": 151.62089552238805, "grad_norm": 1.3806037902832031, "learning_rate": 9.279761904761906e-06, "loss": 38.7777, "step": 6368 }, { "epoch": 151.644776119403, "grad_norm": 1.391127586364746, "learning_rate": 9.27827380952381e-06, "loss": 39.3546, "step": 6369 }, { "epoch": 151.6686567164179, "grad_norm": 1.3842363357543945, "learning_rate": 9.276785714285715e-06, "loss": 40.2828, "step": 6370 }, { "epoch": 151.69253731343284, "grad_norm": 1.409952163696289, "learning_rate": 9.27529761904762e-06, "loss": 38.9954, "step": 6371 }, { "epoch": 151.71641791044777, "grad_norm": 1.3755418062210083, "learning_rate": 9.273809523809525e-06, "loss": 40.9963, "step": 6372 }, { "epoch": 151.74029850746268, "grad_norm": 1.4096416234970093, "learning_rate": 9.272321428571429e-06, "loss": 40.0994, "step": 6373 }, { "epoch": 151.76417910447762, "grad_norm": 1.4157979488372803, "learning_rate": 9.270833333333334e-06, "loss": 40.2655, "step": 6374 }, { "epoch": 151.78805970149253, "grad_norm": 1.3744465112686157, "learning_rate": 9.269345238095238e-06, "loss": 38.6039, "step": 6375 }, { "epoch": 151.81194029850747, "grad_norm": 1.379152774810791, "learning_rate": 9.267857142857144e-06, "loss": 39.0094, "step": 6376 }, { "epoch": 151.83582089552237, "grad_norm": 1.3681912422180176, "learning_rate": 9.266369047619049e-06, "loss": 40.6443, "step": 6377 }, { "epoch": 151.8597014925373, "grad_norm": 1.4108628034591675, "learning_rate": 9.264880952380954e-06, "loss": 39.5755, "step": 6378 }, { "epoch": 151.88358208955225, "grad_norm": 1.3947722911834717, "learning_rate": 9.263392857142858e-06, "loss": 38.9625, "step": 6379 }, { "epoch": 151.90746268656716, "grad_norm": 1.390696406364441, "learning_rate": 9.261904761904763e-06, "loss": 38.6818, "step": 6380 }, { "epoch": 151.9313432835821, "grad_norm": 1.443642497062683, "learning_rate": 9.260416666666667e-06, "loss": 39.0256, "step": 6381 }, { "epoch": 151.955223880597, "grad_norm": 1.4250284433364868, "learning_rate": 9.258928571428572e-06, "loss": 39.4986, "step": 6382 }, { "epoch": 151.97910447761194, "grad_norm": 1.3986161947250366, "learning_rate": 9.257440476190476e-06, "loss": 39.2093, "step": 6383 }, { "epoch": 152.0, "grad_norm": 1.546371340751648, "learning_rate": 9.255952380952381e-06, "loss": 34.4161, "step": 6384 }, { "epoch": 152.02388059701494, "grad_norm": 1.4240819215774536, "learning_rate": 9.254464285714287e-06, "loss": 41.5147, "step": 6385 }, { "epoch": 152.04776119402985, "grad_norm": 1.3870187997817993, "learning_rate": 9.252976190476192e-06, "loss": 40.49, "step": 6386 }, { "epoch": 152.07164179104478, "grad_norm": 1.3978437185287476, "learning_rate": 9.251488095238096e-06, "loss": 39.2423, "step": 6387 }, { "epoch": 152.0955223880597, "grad_norm": 1.410504698753357, "learning_rate": 9.250000000000001e-06, "loss": 39.8659, "step": 6388 }, { "epoch": 152.11940298507463, "grad_norm": 1.3955955505371094, "learning_rate": 9.248511904761905e-06, "loss": 40.1097, "step": 6389 }, { "epoch": 152.14328358208957, "grad_norm": 1.403869390487671, "learning_rate": 9.24702380952381e-06, "loss": 40.028, "step": 6390 }, { "epoch": 152.16716417910447, "grad_norm": 1.3987133502960205, "learning_rate": 9.245535714285716e-06, "loss": 39.4359, "step": 6391 }, { "epoch": 152.1910447761194, "grad_norm": 1.4566138982772827, "learning_rate": 9.244047619047621e-06, "loss": 39.0895, "step": 6392 }, { "epoch": 152.21492537313432, "grad_norm": 1.3606864213943481, "learning_rate": 9.242559523809525e-06, "loss": 39.6857, "step": 6393 }, { "epoch": 152.23880597014926, "grad_norm": 1.392647624015808, "learning_rate": 9.24107142857143e-06, "loss": 40.1149, "step": 6394 }, { "epoch": 152.26268656716417, "grad_norm": 1.3806025981903076, "learning_rate": 9.239583333333334e-06, "loss": 40.4793, "step": 6395 }, { "epoch": 152.2865671641791, "grad_norm": 1.466804027557373, "learning_rate": 9.238095238095239e-06, "loss": 40.1904, "step": 6396 }, { "epoch": 152.31044776119404, "grad_norm": 1.359878659248352, "learning_rate": 9.236607142857143e-06, "loss": 40.0201, "step": 6397 }, { "epoch": 152.33432835820895, "grad_norm": 1.3926358222961426, "learning_rate": 9.235119047619048e-06, "loss": 40.1683, "step": 6398 }, { "epoch": 152.3582089552239, "grad_norm": 1.3726829290390015, "learning_rate": 9.233630952380954e-06, "loss": 41.0357, "step": 6399 }, { "epoch": 152.3820895522388, "grad_norm": 1.398148775100708, "learning_rate": 9.232142857142859e-06, "loss": 39.1385, "step": 6400 }, { "epoch": 152.40597014925373, "grad_norm": 1.4466731548309326, "learning_rate": 9.230654761904763e-06, "loss": 39.3234, "step": 6401 }, { "epoch": 152.42985074626867, "grad_norm": 1.3835557699203491, "learning_rate": 9.229166666666668e-06, "loss": 39.9192, "step": 6402 }, { "epoch": 152.45373134328358, "grad_norm": 1.3400582075119019, "learning_rate": 9.227678571428572e-06, "loss": 41.0465, "step": 6403 }, { "epoch": 152.47761194029852, "grad_norm": 1.4128707647323608, "learning_rate": 9.226190476190477e-06, "loss": 39.1818, "step": 6404 }, { "epoch": 152.50149253731342, "grad_norm": 1.3981877565383911, "learning_rate": 9.224702380952382e-06, "loss": 39.9853, "step": 6405 }, { "epoch": 152.52537313432836, "grad_norm": 1.4314457178115845, "learning_rate": 9.223214285714288e-06, "loss": 38.106, "step": 6406 }, { "epoch": 152.54925373134327, "grad_norm": 1.3912529945373535, "learning_rate": 9.221726190476191e-06, "loss": 40.0207, "step": 6407 }, { "epoch": 152.5731343283582, "grad_norm": 1.3966842889785767, "learning_rate": 9.220238095238097e-06, "loss": 39.5664, "step": 6408 }, { "epoch": 152.59701492537314, "grad_norm": 1.3606699705123901, "learning_rate": 9.21875e-06, "loss": 39.3699, "step": 6409 }, { "epoch": 152.62089552238805, "grad_norm": 1.3782236576080322, "learning_rate": 9.217261904761904e-06, "loss": 40.4815, "step": 6410 }, { "epoch": 152.644776119403, "grad_norm": 1.3639416694641113, "learning_rate": 9.21577380952381e-06, "loss": 39.8023, "step": 6411 }, { "epoch": 152.6686567164179, "grad_norm": 1.3619394302368164, "learning_rate": 9.214285714285715e-06, "loss": 39.8197, "step": 6412 }, { "epoch": 152.69253731343284, "grad_norm": 1.4065998792648315, "learning_rate": 9.21279761904762e-06, "loss": 39.7324, "step": 6413 }, { "epoch": 152.71641791044777, "grad_norm": 1.4362151622772217, "learning_rate": 9.211309523809524e-06, "loss": 39.3228, "step": 6414 }, { "epoch": 152.74029850746268, "grad_norm": 1.3565478324890137, "learning_rate": 9.20982142857143e-06, "loss": 40.3364, "step": 6415 }, { "epoch": 152.76417910447762, "grad_norm": 1.369446039199829, "learning_rate": 9.208333333333333e-06, "loss": 39.2374, "step": 6416 }, { "epoch": 152.78805970149253, "grad_norm": 1.3780157566070557, "learning_rate": 9.206845238095238e-06, "loss": 39.7181, "step": 6417 }, { "epoch": 152.81194029850747, "grad_norm": 1.4255468845367432, "learning_rate": 9.205357142857144e-06, "loss": 39.6631, "step": 6418 }, { "epoch": 152.83582089552237, "grad_norm": 1.404097080230713, "learning_rate": 9.203869047619049e-06, "loss": 38.2638, "step": 6419 }, { "epoch": 152.8597014925373, "grad_norm": 1.3840886354446411, "learning_rate": 9.202380952380953e-06, "loss": 38.5727, "step": 6420 }, { "epoch": 152.88358208955225, "grad_norm": 1.3577178716659546, "learning_rate": 9.200892857142858e-06, "loss": 40.3453, "step": 6421 }, { "epoch": 152.90746268656716, "grad_norm": 1.3405086994171143, "learning_rate": 9.199404761904762e-06, "loss": 38.9153, "step": 6422 }, { "epoch": 152.9313432835821, "grad_norm": 1.4428417682647705, "learning_rate": 9.197916666666667e-06, "loss": 40.2233, "step": 6423 }, { "epoch": 152.955223880597, "grad_norm": 1.3889491558074951, "learning_rate": 9.196428571428571e-06, "loss": 39.1494, "step": 6424 }, { "epoch": 152.97910447761194, "grad_norm": 1.412009358406067, "learning_rate": 9.194940476190476e-06, "loss": 39.6516, "step": 6425 }, { "epoch": 153.0, "grad_norm": 1.3773317337036133, "learning_rate": 9.193452380952382e-06, "loss": 34.967, "step": 6426 }, { "epoch": 153.02388059701494, "grad_norm": 1.3683569431304932, "learning_rate": 9.191964285714287e-06, "loss": 39.8537, "step": 6427 }, { "epoch": 153.04776119402985, "grad_norm": 1.3886674642562866, "learning_rate": 9.19047619047619e-06, "loss": 39.6767, "step": 6428 }, { "epoch": 153.07164179104478, "grad_norm": 1.4352631568908691, "learning_rate": 9.188988095238096e-06, "loss": 40.6388, "step": 6429 }, { "epoch": 153.0955223880597, "grad_norm": 1.3918040990829468, "learning_rate": 9.1875e-06, "loss": 39.4104, "step": 6430 }, { "epoch": 153.11940298507463, "grad_norm": 1.407638669013977, "learning_rate": 9.186011904761905e-06, "loss": 39.8866, "step": 6431 }, { "epoch": 153.14328358208957, "grad_norm": 1.3927514553070068, "learning_rate": 9.18452380952381e-06, "loss": 39.5617, "step": 6432 }, { "epoch": 153.16716417910447, "grad_norm": 1.3908931016921997, "learning_rate": 9.183035714285716e-06, "loss": 39.309, "step": 6433 }, { "epoch": 153.1910447761194, "grad_norm": 1.378460168838501, "learning_rate": 9.18154761904762e-06, "loss": 39.1732, "step": 6434 }, { "epoch": 153.21492537313432, "grad_norm": 1.3922839164733887, "learning_rate": 9.180059523809525e-06, "loss": 38.9656, "step": 6435 }, { "epoch": 153.23880597014926, "grad_norm": 1.3864825963974, "learning_rate": 9.178571428571429e-06, "loss": 39.6645, "step": 6436 }, { "epoch": 153.26268656716417, "grad_norm": 1.4109711647033691, "learning_rate": 9.177083333333334e-06, "loss": 39.3238, "step": 6437 }, { "epoch": 153.2865671641791, "grad_norm": 1.428055763244629, "learning_rate": 9.17559523809524e-06, "loss": 40.9578, "step": 6438 }, { "epoch": 153.31044776119404, "grad_norm": 1.3780357837677002, "learning_rate": 9.174107142857143e-06, "loss": 39.8133, "step": 6439 }, { "epoch": 153.33432835820895, "grad_norm": 1.4267994165420532, "learning_rate": 9.172619047619048e-06, "loss": 40.239, "step": 6440 }, { "epoch": 153.3582089552239, "grad_norm": 1.3629064559936523, "learning_rate": 9.171130952380954e-06, "loss": 40.9347, "step": 6441 }, { "epoch": 153.3820895522388, "grad_norm": 1.4021692276000977, "learning_rate": 9.169642857142857e-06, "loss": 39.0653, "step": 6442 }, { "epoch": 153.40597014925373, "grad_norm": 1.3850641250610352, "learning_rate": 9.168154761904763e-06, "loss": 39.6734, "step": 6443 }, { "epoch": 153.42985074626867, "grad_norm": 1.3612252473831177, "learning_rate": 9.166666666666666e-06, "loss": 40.0459, "step": 6444 }, { "epoch": 153.45373134328358, "grad_norm": 1.362465739250183, "learning_rate": 9.165178571428572e-06, "loss": 40.0656, "step": 6445 }, { "epoch": 153.47761194029852, "grad_norm": 1.3641369342803955, "learning_rate": 9.163690476190477e-06, "loss": 41.1302, "step": 6446 }, { "epoch": 153.50149253731342, "grad_norm": 1.3700331449508667, "learning_rate": 9.162202380952383e-06, "loss": 39.138, "step": 6447 }, { "epoch": 153.52537313432836, "grad_norm": 1.389829158782959, "learning_rate": 9.160714285714286e-06, "loss": 39.354, "step": 6448 }, { "epoch": 153.54925373134327, "grad_norm": 1.4773973226547241, "learning_rate": 9.159226190476192e-06, "loss": 40.5382, "step": 6449 }, { "epoch": 153.5731343283582, "grad_norm": 1.394035816192627, "learning_rate": 9.157738095238095e-06, "loss": 39.866, "step": 6450 }, { "epoch": 153.59701492537314, "grad_norm": 1.3912874460220337, "learning_rate": 9.15625e-06, "loss": 39.5964, "step": 6451 }, { "epoch": 153.62089552238805, "grad_norm": 1.4528566598892212, "learning_rate": 9.154761904761906e-06, "loss": 39.1674, "step": 6452 }, { "epoch": 153.644776119403, "grad_norm": 1.4267417192459106, "learning_rate": 9.15327380952381e-06, "loss": 40.4524, "step": 6453 }, { "epoch": 153.6686567164179, "grad_norm": 1.3650774955749512, "learning_rate": 9.151785714285715e-06, "loss": 39.8554, "step": 6454 }, { "epoch": 153.69253731343284, "grad_norm": 1.3964145183563232, "learning_rate": 9.15029761904762e-06, "loss": 39.4686, "step": 6455 }, { "epoch": 153.71641791044777, "grad_norm": 1.3960487842559814, "learning_rate": 9.148809523809524e-06, "loss": 39.7847, "step": 6456 }, { "epoch": 153.74029850746268, "grad_norm": 1.3443539142608643, "learning_rate": 9.14732142857143e-06, "loss": 39.9077, "step": 6457 }, { "epoch": 153.76417910447762, "grad_norm": 1.3818011283874512, "learning_rate": 9.145833333333333e-06, "loss": 39.6215, "step": 6458 }, { "epoch": 153.78805970149253, "grad_norm": 1.3401379585266113, "learning_rate": 9.144345238095239e-06, "loss": 38.5315, "step": 6459 }, { "epoch": 153.81194029850747, "grad_norm": 1.3872430324554443, "learning_rate": 9.142857142857144e-06, "loss": 40.4628, "step": 6460 }, { "epoch": 153.83582089552237, "grad_norm": 1.4128799438476562, "learning_rate": 9.14136904761905e-06, "loss": 40.2654, "step": 6461 }, { "epoch": 153.8597014925373, "grad_norm": 1.391183853149414, "learning_rate": 9.139880952380953e-06, "loss": 39.3088, "step": 6462 }, { "epoch": 153.88358208955225, "grad_norm": 1.3619953393936157, "learning_rate": 9.138392857142858e-06, "loss": 39.6545, "step": 6463 }, { "epoch": 153.90746268656716, "grad_norm": 1.4012306928634644, "learning_rate": 9.136904761904762e-06, "loss": 40.165, "step": 6464 }, { "epoch": 153.9313432835821, "grad_norm": 1.4181429147720337, "learning_rate": 9.135416666666667e-06, "loss": 39.069, "step": 6465 }, { "epoch": 153.955223880597, "grad_norm": 1.379982590675354, "learning_rate": 9.133928571428573e-06, "loss": 39.0314, "step": 6466 }, { "epoch": 153.97910447761194, "grad_norm": 1.3665764331817627, "learning_rate": 9.132440476190476e-06, "loss": 39.7033, "step": 6467 }, { "epoch": 154.0, "grad_norm": 1.4315698146820068, "learning_rate": 9.130952380952382e-06, "loss": 34.8645, "step": 6468 }, { "epoch": 154.02388059701494, "grad_norm": 1.3936690092086792, "learning_rate": 9.129464285714287e-06, "loss": 38.4743, "step": 6469 }, { "epoch": 154.04776119402985, "grad_norm": 1.4048714637756348, "learning_rate": 9.12797619047619e-06, "loss": 40.8435, "step": 6470 }, { "epoch": 154.07164179104478, "grad_norm": 1.362539529800415, "learning_rate": 9.126488095238096e-06, "loss": 39.0609, "step": 6471 }, { "epoch": 154.0955223880597, "grad_norm": 1.379938006401062, "learning_rate": 9.125e-06, "loss": 40.6994, "step": 6472 }, { "epoch": 154.11940298507463, "grad_norm": 1.383180022239685, "learning_rate": 9.123511904761905e-06, "loss": 40.166, "step": 6473 }, { "epoch": 154.14328358208957, "grad_norm": 1.430951476097107, "learning_rate": 9.12202380952381e-06, "loss": 38.6122, "step": 6474 }, { "epoch": 154.16716417910447, "grad_norm": 1.4056109189987183, "learning_rate": 9.120535714285716e-06, "loss": 40.9442, "step": 6475 }, { "epoch": 154.1910447761194, "grad_norm": 1.4289765357971191, "learning_rate": 9.11904761904762e-06, "loss": 40.9005, "step": 6476 }, { "epoch": 154.21492537313432, "grad_norm": 1.3859821557998657, "learning_rate": 9.117559523809525e-06, "loss": 39.9563, "step": 6477 }, { "epoch": 154.23880597014926, "grad_norm": 1.3723280429840088, "learning_rate": 9.116071428571429e-06, "loss": 38.6648, "step": 6478 }, { "epoch": 154.26268656716417, "grad_norm": 1.3848645687103271, "learning_rate": 9.114583333333334e-06, "loss": 40.0724, "step": 6479 }, { "epoch": 154.2865671641791, "grad_norm": 1.364415168762207, "learning_rate": 9.11309523809524e-06, "loss": 39.7055, "step": 6480 }, { "epoch": 154.31044776119404, "grad_norm": 1.3730833530426025, "learning_rate": 9.111607142857145e-06, "loss": 39.0251, "step": 6481 }, { "epoch": 154.33432835820895, "grad_norm": 1.3838951587677002, "learning_rate": 9.110119047619049e-06, "loss": 39.1042, "step": 6482 }, { "epoch": 154.3582089552239, "grad_norm": 1.370621681213379, "learning_rate": 9.108630952380954e-06, "loss": 40.2731, "step": 6483 }, { "epoch": 154.3820895522388, "grad_norm": 1.393970012664795, "learning_rate": 9.107142857142858e-06, "loss": 38.2718, "step": 6484 }, { "epoch": 154.40597014925373, "grad_norm": 1.388932228088379, "learning_rate": 9.105654761904763e-06, "loss": 39.526, "step": 6485 }, { "epoch": 154.42985074626867, "grad_norm": 1.3885396718978882, "learning_rate": 9.104166666666667e-06, "loss": 40.2888, "step": 6486 }, { "epoch": 154.45373134328358, "grad_norm": 1.3836418390274048, "learning_rate": 9.102678571428572e-06, "loss": 41.1275, "step": 6487 }, { "epoch": 154.47761194029852, "grad_norm": 1.403534173965454, "learning_rate": 9.101190476190477e-06, "loss": 37.3328, "step": 6488 }, { "epoch": 154.50149253731342, "grad_norm": 1.3783029317855835, "learning_rate": 9.099702380952383e-06, "loss": 38.9422, "step": 6489 }, { "epoch": 154.52537313432836, "grad_norm": 1.3582172393798828, "learning_rate": 9.098214285714286e-06, "loss": 40.8228, "step": 6490 }, { "epoch": 154.54925373134327, "grad_norm": 1.4226086139678955, "learning_rate": 9.096726190476192e-06, "loss": 39.6517, "step": 6491 }, { "epoch": 154.5731343283582, "grad_norm": 1.4253830909729004, "learning_rate": 9.095238095238095e-06, "loss": 39.49, "step": 6492 }, { "epoch": 154.59701492537314, "grad_norm": 1.3958879709243774, "learning_rate": 9.09375e-06, "loss": 40.8509, "step": 6493 }, { "epoch": 154.62089552238805, "grad_norm": 1.4041836261749268, "learning_rate": 9.092261904761906e-06, "loss": 39.9659, "step": 6494 }, { "epoch": 154.644776119403, "grad_norm": 1.430966854095459, "learning_rate": 9.090773809523812e-06, "loss": 40.2594, "step": 6495 }, { "epoch": 154.6686567164179, "grad_norm": 1.3784998655319214, "learning_rate": 9.089285714285715e-06, "loss": 39.9107, "step": 6496 }, { "epoch": 154.69253731343284, "grad_norm": 1.3662457466125488, "learning_rate": 9.08779761904762e-06, "loss": 40.5308, "step": 6497 }, { "epoch": 154.71641791044777, "grad_norm": 1.3649101257324219, "learning_rate": 9.086309523809524e-06, "loss": 39.4463, "step": 6498 }, { "epoch": 154.74029850746268, "grad_norm": 1.3538957834243774, "learning_rate": 9.08482142857143e-06, "loss": 38.2851, "step": 6499 }, { "epoch": 154.76417910447762, "grad_norm": 1.4066088199615479, "learning_rate": 9.083333333333333e-06, "loss": 39.967, "step": 6500 }, { "epoch": 154.78805970149253, "grad_norm": 1.4539544582366943, "learning_rate": 9.081845238095239e-06, "loss": 39.5729, "step": 6501 }, { "epoch": 154.81194029850747, "grad_norm": 1.3698664903640747, "learning_rate": 9.080357142857144e-06, "loss": 40.959, "step": 6502 }, { "epoch": 154.83582089552237, "grad_norm": 1.3509424924850464, "learning_rate": 9.07886904761905e-06, "loss": 39.6216, "step": 6503 }, { "epoch": 154.8597014925373, "grad_norm": 1.6152596473693848, "learning_rate": 9.077380952380953e-06, "loss": 38.6328, "step": 6504 }, { "epoch": 154.88358208955225, "grad_norm": 1.3484715223312378, "learning_rate": 9.075892857142858e-06, "loss": 41.3822, "step": 6505 }, { "epoch": 154.90746268656716, "grad_norm": 1.349065899848938, "learning_rate": 9.074404761904762e-06, "loss": 39.4797, "step": 6506 }, { "epoch": 154.9313432835821, "grad_norm": 1.4149739742279053, "learning_rate": 9.072916666666668e-06, "loss": 38.8328, "step": 6507 }, { "epoch": 154.955223880597, "grad_norm": 1.5287702083587646, "learning_rate": 9.071428571428573e-06, "loss": 40.2416, "step": 6508 }, { "epoch": 154.97910447761194, "grad_norm": 1.374598503112793, "learning_rate": 9.069940476190478e-06, "loss": 40.1355, "step": 6509 }, { "epoch": 155.0, "grad_norm": 1.3844939470291138, "learning_rate": 9.068452380952382e-06, "loss": 34.8529, "step": 6510 }, { "epoch": 155.02388059701494, "grad_norm": 1.3995031118392944, "learning_rate": 9.066964285714287e-06, "loss": 39.4393, "step": 6511 }, { "epoch": 155.04776119402985, "grad_norm": 1.3431791067123413, "learning_rate": 9.065476190476191e-06, "loss": 41.4351, "step": 6512 }, { "epoch": 155.07164179104478, "grad_norm": 1.3862271308898926, "learning_rate": 9.063988095238096e-06, "loss": 39.1006, "step": 6513 }, { "epoch": 155.0955223880597, "grad_norm": 1.4117140769958496, "learning_rate": 9.0625e-06, "loss": 39.5381, "step": 6514 }, { "epoch": 155.11940298507463, "grad_norm": 1.3927181959152222, "learning_rate": 9.061011904761905e-06, "loss": 40.2099, "step": 6515 }, { "epoch": 155.14328358208957, "grad_norm": 1.3548402786254883, "learning_rate": 9.05952380952381e-06, "loss": 40.8506, "step": 6516 }, { "epoch": 155.16716417910447, "grad_norm": 1.3761292695999146, "learning_rate": 9.058035714285714e-06, "loss": 40.3616, "step": 6517 }, { "epoch": 155.1910447761194, "grad_norm": 1.3971184492111206, "learning_rate": 9.05654761904762e-06, "loss": 38.9076, "step": 6518 }, { "epoch": 155.21492537313432, "grad_norm": 1.3836153745651245, "learning_rate": 9.055059523809524e-06, "loss": 39.578, "step": 6519 }, { "epoch": 155.23880597014926, "grad_norm": 1.3997776508331299, "learning_rate": 9.053571428571429e-06, "loss": 40.104, "step": 6520 }, { "epoch": 155.26268656716417, "grad_norm": 1.3330402374267578, "learning_rate": 9.052083333333334e-06, "loss": 38.7822, "step": 6521 }, { "epoch": 155.2865671641791, "grad_norm": 1.395068645477295, "learning_rate": 9.05059523809524e-06, "loss": 40.0802, "step": 6522 }, { "epoch": 155.31044776119404, "grad_norm": 1.39506196975708, "learning_rate": 9.049107142857143e-06, "loss": 40.4449, "step": 6523 }, { "epoch": 155.33432835820895, "grad_norm": 1.4050973653793335, "learning_rate": 9.047619047619049e-06, "loss": 39.1612, "step": 6524 }, { "epoch": 155.3582089552239, "grad_norm": 1.3784233331680298, "learning_rate": 9.046130952380952e-06, "loss": 40.7191, "step": 6525 }, { "epoch": 155.3820895522388, "grad_norm": 1.4047513008117676, "learning_rate": 9.044642857142858e-06, "loss": 39.7041, "step": 6526 }, { "epoch": 155.40597014925373, "grad_norm": 1.4064955711364746, "learning_rate": 9.043154761904761e-06, "loss": 38.7806, "step": 6527 }, { "epoch": 155.42985074626867, "grad_norm": 1.3668402433395386, "learning_rate": 9.041666666666667e-06, "loss": 39.0516, "step": 6528 }, { "epoch": 155.45373134328358, "grad_norm": 1.3921955823898315, "learning_rate": 9.040178571428572e-06, "loss": 39.636, "step": 6529 }, { "epoch": 155.47761194029852, "grad_norm": 1.3854788541793823, "learning_rate": 9.038690476190478e-06, "loss": 37.969, "step": 6530 }, { "epoch": 155.50149253731342, "grad_norm": NaN, "learning_rate": 9.037202380952381e-06, "loss": 64.5466, "step": 6531 }, { "epoch": 155.52537313432836, "grad_norm": 1.3921269178390503, "learning_rate": 9.037202380952381e-06, "loss": 39.3823, "step": 6532 }, { "epoch": 155.54925373134327, "grad_norm": 1.403752088546753, "learning_rate": 9.035714285714287e-06, "loss": 40.399, "step": 6533 }, { "epoch": 155.5731343283582, "grad_norm": 1.4598162174224854, "learning_rate": 9.03422619047619e-06, "loss": 40.4362, "step": 6534 }, { "epoch": 155.59701492537314, "grad_norm": 1.3955386877059937, "learning_rate": 9.032738095238096e-06, "loss": 40.6983, "step": 6535 }, { "epoch": 155.62089552238805, "grad_norm": 1.3871550559997559, "learning_rate": 9.031250000000001e-06, "loss": 40.2111, "step": 6536 }, { "epoch": 155.644776119403, "grad_norm": 1.4225163459777832, "learning_rate": 9.029761904761906e-06, "loss": 38.9755, "step": 6537 }, { "epoch": 155.6686567164179, "grad_norm": 1.4082552194595337, "learning_rate": 9.02827380952381e-06, "loss": 39.838, "step": 6538 }, { "epoch": 155.69253731343284, "grad_norm": 1.3752937316894531, "learning_rate": 9.026785714285715e-06, "loss": 39.703, "step": 6539 }, { "epoch": 155.71641791044777, "grad_norm": 1.3732014894485474, "learning_rate": 9.025297619047619e-06, "loss": 39.3335, "step": 6540 }, { "epoch": 155.74029850746268, "grad_norm": 1.3486356735229492, "learning_rate": 9.023809523809524e-06, "loss": 39.1286, "step": 6541 }, { "epoch": 155.76417910447762, "grad_norm": 1.485093355178833, "learning_rate": 9.022321428571428e-06, "loss": 38.8108, "step": 6542 }, { "epoch": 155.78805970149253, "grad_norm": 1.4288262128829956, "learning_rate": 9.020833333333334e-06, "loss": 40.6357, "step": 6543 }, { "epoch": 155.81194029850747, "grad_norm": 1.3610199689865112, "learning_rate": 9.019345238095239e-06, "loss": 41.3942, "step": 6544 }, { "epoch": 155.83582089552237, "grad_norm": 1.3782130479812622, "learning_rate": 9.017857142857144e-06, "loss": 38.9014, "step": 6545 }, { "epoch": 155.8597014925373, "grad_norm": 1.4015300273895264, "learning_rate": 9.016369047619048e-06, "loss": 39.0309, "step": 6546 }, { "epoch": 155.88358208955225, "grad_norm": 1.3882724046707153, "learning_rate": 9.014880952380953e-06, "loss": 40.7596, "step": 6547 }, { "epoch": 155.90746268656716, "grad_norm": 1.414153814315796, "learning_rate": 9.013392857142857e-06, "loss": 40.1431, "step": 6548 }, { "epoch": 155.9313432835821, "grad_norm": 1.3818789720535278, "learning_rate": 9.011904761904762e-06, "loss": 37.9539, "step": 6549 }, { "epoch": 155.955223880597, "grad_norm": 1.47318696975708, "learning_rate": 9.010416666666668e-06, "loss": 39.6276, "step": 6550 }, { "epoch": 155.97910447761194, "grad_norm": 1.4116003513336182, "learning_rate": 9.008928571428573e-06, "loss": 41.0022, "step": 6551 }, { "epoch": 156.0, "grad_norm": 1.3361613750457764, "learning_rate": 9.007440476190477e-06, "loss": 34.8038, "step": 6552 }, { "epoch": 156.02388059701494, "grad_norm": 1.3911172151565552, "learning_rate": 9.005952380952382e-06, "loss": 39.5718, "step": 6553 }, { "epoch": 156.04776119402985, "grad_norm": 1.3630069494247437, "learning_rate": 9.004464285714286e-06, "loss": 38.3647, "step": 6554 }, { "epoch": 156.07164179104478, "grad_norm": 1.3796380758285522, "learning_rate": 9.002976190476191e-06, "loss": 39.8741, "step": 6555 }, { "epoch": 156.0955223880597, "grad_norm": 1.356586217880249, "learning_rate": 9.001488095238095e-06, "loss": 39.3509, "step": 6556 }, { "epoch": 156.11940298507463, "grad_norm": 1.3985233306884766, "learning_rate": 9e-06, "loss": 40.2271, "step": 6557 }, { "epoch": 156.14328358208957, "grad_norm": NaN, "learning_rate": 8.998511904761906e-06, "loss": 54.3368, "step": 6558 }, { "epoch": 156.16716417910447, "grad_norm": 1.3986097574234009, "learning_rate": 8.998511904761906e-06, "loss": 39.7048, "step": 6559 }, { "epoch": 156.1910447761194, "grad_norm": 1.3575440645217896, "learning_rate": 8.997023809523811e-06, "loss": 38.4268, "step": 6560 }, { "epoch": 156.21492537313432, "grad_norm": 1.3850407600402832, "learning_rate": 8.995535714285715e-06, "loss": 39.3651, "step": 6561 }, { "epoch": 156.23880597014926, "grad_norm": 1.391190528869629, "learning_rate": 8.99404761904762e-06, "loss": 39.5647, "step": 6562 }, { "epoch": 156.26268656716417, "grad_norm": 1.444995641708374, "learning_rate": 8.992559523809524e-06, "loss": 38.2753, "step": 6563 }, { "epoch": 156.2865671641791, "grad_norm": 1.382914423942566, "learning_rate": 8.991071428571429e-06, "loss": 39.3636, "step": 6564 }, { "epoch": 156.31044776119404, "grad_norm": 1.405930519104004, "learning_rate": 8.989583333333334e-06, "loss": 38.7186, "step": 6565 }, { "epoch": 156.33432835820895, "grad_norm": 1.3927149772644043, "learning_rate": 8.98809523809524e-06, "loss": 39.3207, "step": 6566 }, { "epoch": 156.3582089552239, "grad_norm": 1.3928543329238892, "learning_rate": 8.986607142857143e-06, "loss": 38.216, "step": 6567 }, { "epoch": 156.3820895522388, "grad_norm": 1.3700975179672241, "learning_rate": 8.985119047619049e-06, "loss": 41.5253, "step": 6568 }, { "epoch": 156.40597014925373, "grad_norm": 1.3767778873443604, "learning_rate": 8.983630952380953e-06, "loss": 40.2835, "step": 6569 }, { "epoch": 156.42985074626867, "grad_norm": 1.3767149448394775, "learning_rate": 8.982142857142858e-06, "loss": 40.3433, "step": 6570 }, { "epoch": 156.45373134328358, "grad_norm": 1.4592711925506592, "learning_rate": 8.980654761904762e-06, "loss": 40.5664, "step": 6571 }, { "epoch": 156.47761194029852, "grad_norm": 1.4113961458206177, "learning_rate": 8.979166666666667e-06, "loss": 40.3956, "step": 6572 }, { "epoch": 156.50149253731342, "grad_norm": 1.390864372253418, "learning_rate": 8.977678571428572e-06, "loss": 39.6616, "step": 6573 }, { "epoch": 156.52537313432836, "grad_norm": 1.4462566375732422, "learning_rate": 8.976190476190478e-06, "loss": 39.2908, "step": 6574 }, { "epoch": 156.54925373134327, "grad_norm": 1.4051930904388428, "learning_rate": 8.974702380952381e-06, "loss": 40.3775, "step": 6575 }, { "epoch": 156.5731343283582, "grad_norm": 1.4046204090118408, "learning_rate": 8.973214285714287e-06, "loss": 39.7238, "step": 6576 }, { "epoch": 156.59701492537314, "grad_norm": 1.4431790113449097, "learning_rate": 8.97172619047619e-06, "loss": 40.6025, "step": 6577 }, { "epoch": 156.62089552238805, "grad_norm": 1.3649773597717285, "learning_rate": 8.970238095238096e-06, "loss": 38.949, "step": 6578 }, { "epoch": 156.644776119403, "grad_norm": 1.4179024696350098, "learning_rate": 8.968750000000001e-06, "loss": 40.5597, "step": 6579 }, { "epoch": 156.6686567164179, "grad_norm": 1.445203423500061, "learning_rate": 8.967261904761907e-06, "loss": 40.032, "step": 6580 }, { "epoch": 156.69253731343284, "grad_norm": 1.4274169206619263, "learning_rate": 8.96577380952381e-06, "loss": 40.4548, "step": 6581 }, { "epoch": 156.71641791044777, "grad_norm": 1.4170368909835815, "learning_rate": 8.964285714285716e-06, "loss": 39.7679, "step": 6582 }, { "epoch": 156.74029850746268, "grad_norm": 1.4345901012420654, "learning_rate": 8.96279761904762e-06, "loss": 40.158, "step": 6583 }, { "epoch": 156.76417910447762, "grad_norm": 1.377955675125122, "learning_rate": 8.961309523809525e-06, "loss": 39.4809, "step": 6584 }, { "epoch": 156.78805970149253, "grad_norm": 1.3850078582763672, "learning_rate": 8.959821428571428e-06, "loss": 40.2669, "step": 6585 }, { "epoch": 156.81194029850747, "grad_norm": 1.498397707939148, "learning_rate": 8.958333333333334e-06, "loss": 39.6399, "step": 6586 }, { "epoch": 156.83582089552237, "grad_norm": 1.3979178667068481, "learning_rate": 8.956845238095239e-06, "loss": 39.0786, "step": 6587 }, { "epoch": 156.8597014925373, "grad_norm": 1.4399276971817017, "learning_rate": 8.955357142857144e-06, "loss": 38.7572, "step": 6588 }, { "epoch": 156.88358208955225, "grad_norm": 1.3946410417556763, "learning_rate": 8.953869047619048e-06, "loss": 39.945, "step": 6589 }, { "epoch": 156.90746268656716, "grad_norm": 1.3992104530334473, "learning_rate": 8.952380952380953e-06, "loss": 40.6624, "step": 6590 }, { "epoch": 156.9313432835821, "grad_norm": 1.3764547109603882, "learning_rate": 8.950892857142857e-06, "loss": 39.6171, "step": 6591 }, { "epoch": 156.955223880597, "grad_norm": 1.42070472240448, "learning_rate": 8.949404761904763e-06, "loss": 39.901, "step": 6592 }, { "epoch": 156.97910447761194, "grad_norm": 1.4129507541656494, "learning_rate": 8.947916666666668e-06, "loss": 41.5559, "step": 6593 }, { "epoch": 157.0, "grad_norm": 1.3461110591888428, "learning_rate": 8.946428571428573e-06, "loss": 35.2175, "step": 6594 }, { "epoch": 157.02388059701494, "grad_norm": 1.3654001951217651, "learning_rate": 8.944940476190477e-06, "loss": 39.1103, "step": 6595 }, { "epoch": 157.04776119402985, "grad_norm": 1.4548580646514893, "learning_rate": 8.943452380952382e-06, "loss": 38.8319, "step": 6596 }, { "epoch": 157.07164179104478, "grad_norm": 1.3729093074798584, "learning_rate": 8.941964285714286e-06, "loss": 38.9272, "step": 6597 }, { "epoch": 157.0955223880597, "grad_norm": 1.3802340030670166, "learning_rate": 8.940476190476191e-06, "loss": 39.7874, "step": 6598 }, { "epoch": 157.11940298507463, "grad_norm": 1.4050244092941284, "learning_rate": 8.938988095238095e-06, "loss": 38.6457, "step": 6599 }, { "epoch": 157.14328358208957, "grad_norm": 1.4060397148132324, "learning_rate": 8.9375e-06, "loss": 40.7875, "step": 6600 }, { "epoch": 157.16716417910447, "grad_norm": 1.368646264076233, "learning_rate": 8.936011904761906e-06, "loss": 39.7986, "step": 6601 }, { "epoch": 157.1910447761194, "grad_norm": 1.3797630071640015, "learning_rate": 8.934523809523811e-06, "loss": 40.5993, "step": 6602 }, { "epoch": 157.21492537313432, "grad_norm": 1.4605674743652344, "learning_rate": 8.933035714285715e-06, "loss": 40.226, "step": 6603 }, { "epoch": 157.23880597014926, "grad_norm": 1.4018357992172241, "learning_rate": 8.93154761904762e-06, "loss": 38.9025, "step": 6604 }, { "epoch": 157.26268656716417, "grad_norm": 1.3937339782714844, "learning_rate": 8.930059523809524e-06, "loss": 40.6936, "step": 6605 }, { "epoch": 157.2865671641791, "grad_norm": 1.403732180595398, "learning_rate": 8.92857142857143e-06, "loss": 39.913, "step": 6606 }, { "epoch": 157.31044776119404, "grad_norm": 1.4184061288833618, "learning_rate": 8.927083333333335e-06, "loss": 40.1681, "step": 6607 }, { "epoch": 157.33432835820895, "grad_norm": 1.4007248878479004, "learning_rate": 8.92559523809524e-06, "loss": 39.9088, "step": 6608 }, { "epoch": 157.3582089552239, "grad_norm": 1.3997182846069336, "learning_rate": 8.924107142857144e-06, "loss": 39.9178, "step": 6609 }, { "epoch": 157.3820895522388, "grad_norm": 1.4176689386367798, "learning_rate": 8.922619047619049e-06, "loss": 38.8892, "step": 6610 }, { "epoch": 157.40597014925373, "grad_norm": 1.3784494400024414, "learning_rate": 8.921130952380953e-06, "loss": 41.1369, "step": 6611 }, { "epoch": 157.42985074626867, "grad_norm": 1.3731744289398193, "learning_rate": 8.919642857142858e-06, "loss": 40.3633, "step": 6612 }, { "epoch": 157.45373134328358, "grad_norm": 1.3814024925231934, "learning_rate": 8.918154761904762e-06, "loss": 41.6121, "step": 6613 }, { "epoch": 157.47761194029852, "grad_norm": NaN, "learning_rate": 8.916666666666667e-06, "loss": 44.0378, "step": 6614 }, { "epoch": 157.50149253731342, "grad_norm": 1.4214776754379272, "learning_rate": 8.916666666666667e-06, "loss": 39.9509, "step": 6615 }, { "epoch": 157.52537313432836, "grad_norm": 1.385748267173767, "learning_rate": 8.915178571428572e-06, "loss": 39.0341, "step": 6616 }, { "epoch": 157.54925373134327, "grad_norm": 1.3726961612701416, "learning_rate": 8.913690476190478e-06, "loss": 39.0484, "step": 6617 }, { "epoch": 157.5731343283582, "grad_norm": 1.3929901123046875, "learning_rate": 8.912202380952382e-06, "loss": 38.7875, "step": 6618 }, { "epoch": 157.59701492537314, "grad_norm": 1.7919487953186035, "learning_rate": 8.910714285714287e-06, "loss": 39.9361, "step": 6619 }, { "epoch": 157.62089552238805, "grad_norm": 1.3650434017181396, "learning_rate": 8.90922619047619e-06, "loss": 40.8012, "step": 6620 }, { "epoch": 157.644776119403, "grad_norm": 1.4527256488800049, "learning_rate": 8.907738095238096e-06, "loss": 39.7467, "step": 6621 }, { "epoch": 157.6686567164179, "grad_norm": 1.3788121938705444, "learning_rate": 8.906250000000001e-06, "loss": 39.5085, "step": 6622 }, { "epoch": 157.69253731343284, "grad_norm": 1.544628381729126, "learning_rate": 8.904761904761905e-06, "loss": 39.5441, "step": 6623 }, { "epoch": 157.71641791044777, "grad_norm": 1.381198763847351, "learning_rate": 8.90327380952381e-06, "loss": 39.1965, "step": 6624 }, { "epoch": 157.74029850746268, "grad_norm": 1.3838645219802856, "learning_rate": 8.901785714285714e-06, "loss": 40.3489, "step": 6625 }, { "epoch": 157.76417910447762, "grad_norm": 1.3680450916290283, "learning_rate": 8.90029761904762e-06, "loss": 39.3935, "step": 6626 }, { "epoch": 157.78805970149253, "grad_norm": 1.3854469060897827, "learning_rate": 8.898809523809525e-06, "loss": 39.7459, "step": 6627 }, { "epoch": 157.81194029850747, "grad_norm": 1.3747365474700928, "learning_rate": 8.89732142857143e-06, "loss": 39.9776, "step": 6628 }, { "epoch": 157.83582089552237, "grad_norm": 1.4013019800186157, "learning_rate": 8.895833333333334e-06, "loss": 39.51, "step": 6629 }, { "epoch": 157.8597014925373, "grad_norm": 1.4031602144241333, "learning_rate": 8.89434523809524e-06, "loss": 39.9198, "step": 6630 }, { "epoch": 157.88358208955225, "grad_norm": 1.3933470249176025, "learning_rate": 8.892857142857143e-06, "loss": 39.4802, "step": 6631 }, { "epoch": 157.90746268656716, "grad_norm": 1.40679132938385, "learning_rate": 8.891369047619048e-06, "loss": 40.0814, "step": 6632 }, { "epoch": 157.9313432835821, "grad_norm": 1.3847345113754272, "learning_rate": 8.889880952380952e-06, "loss": 40.2475, "step": 6633 }, { "epoch": 157.955223880597, "grad_norm": 1.3558169603347778, "learning_rate": 8.888392857142857e-06, "loss": 39.5539, "step": 6634 }, { "epoch": 157.97910447761194, "grad_norm": 1.3635287284851074, "learning_rate": 8.886904761904763e-06, "loss": 38.5697, "step": 6635 }, { "epoch": 158.0, "grad_norm": 1.3676224946975708, "learning_rate": 8.885416666666668e-06, "loss": 34.2145, "step": 6636 }, { "epoch": 158.02388059701494, "grad_norm": 1.3770579099655151, "learning_rate": 8.883928571428572e-06, "loss": 40.3716, "step": 6637 }, { "epoch": 158.04776119402985, "grad_norm": 1.3523234128952026, "learning_rate": 8.882440476190477e-06, "loss": 40.5202, "step": 6638 }, { "epoch": 158.07164179104478, "grad_norm": 1.3987711668014526, "learning_rate": 8.88095238095238e-06, "loss": 39.5899, "step": 6639 }, { "epoch": 158.0955223880597, "grad_norm": 1.3966926336288452, "learning_rate": 8.879464285714286e-06, "loss": 40.2236, "step": 6640 }, { "epoch": 158.11940298507463, "grad_norm": 1.4060138463974, "learning_rate": 8.877976190476192e-06, "loss": 40.0982, "step": 6641 }, { "epoch": 158.14328358208957, "grad_norm": NaN, "learning_rate": 8.876488095238097e-06, "loss": 61.0631, "step": 6642 }, { "epoch": 158.16716417910447, "grad_norm": 1.418991208076477, "learning_rate": 8.876488095238097e-06, "loss": 39.0739, "step": 6643 }, { "epoch": 158.1910447761194, "grad_norm": 1.3794472217559814, "learning_rate": 8.875e-06, "loss": 40.0093, "step": 6644 }, { "epoch": 158.21492537313432, "grad_norm": 1.4062801599502563, "learning_rate": 8.873511904761906e-06, "loss": 39.1204, "step": 6645 }, { "epoch": 158.23880597014926, "grad_norm": 1.3825030326843262, "learning_rate": 8.87202380952381e-06, "loss": 38.6368, "step": 6646 }, { "epoch": 158.26268656716417, "grad_norm": 1.3984006643295288, "learning_rate": 8.870535714285715e-06, "loss": 40.6655, "step": 6647 }, { "epoch": 158.2865671641791, "grad_norm": 1.368743658065796, "learning_rate": 8.869047619047619e-06, "loss": 39.4864, "step": 6648 }, { "epoch": 158.31044776119404, "grad_norm": 1.3826738595962524, "learning_rate": 8.867559523809524e-06, "loss": 40.7036, "step": 6649 }, { "epoch": 158.33432835820895, "grad_norm": 1.405868649482727, "learning_rate": 8.86607142857143e-06, "loss": 38.8353, "step": 6650 }, { "epoch": 158.3582089552239, "grad_norm": 1.3757659196853638, "learning_rate": 8.864583333333335e-06, "loss": 40.77, "step": 6651 }, { "epoch": 158.3820895522388, "grad_norm": 1.390241265296936, "learning_rate": 8.863095238095238e-06, "loss": 39.061, "step": 6652 }, { "epoch": 158.40597014925373, "grad_norm": 1.3768024444580078, "learning_rate": 8.861607142857144e-06, "loss": 39.1598, "step": 6653 }, { "epoch": 158.42985074626867, "grad_norm": 1.3859115839004517, "learning_rate": 8.860119047619048e-06, "loss": 39.4505, "step": 6654 }, { "epoch": 158.45373134328358, "grad_norm": 1.3970028162002563, "learning_rate": 8.858630952380953e-06, "loss": 39.9438, "step": 6655 }, { "epoch": 158.47761194029852, "grad_norm": 1.4460254907608032, "learning_rate": 8.857142857142858e-06, "loss": 40.5332, "step": 6656 }, { "epoch": 158.50149253731342, "grad_norm": 1.4080971479415894, "learning_rate": 8.855654761904764e-06, "loss": 39.2359, "step": 6657 }, { "epoch": 158.52537313432836, "grad_norm": 1.3922966718673706, "learning_rate": 8.854166666666667e-06, "loss": 39.5595, "step": 6658 }, { "epoch": 158.54925373134327, "grad_norm": 1.4533028602600098, "learning_rate": 8.852678571428573e-06, "loss": 39.9404, "step": 6659 }, { "epoch": 158.5731343283582, "grad_norm": 1.4458225965499878, "learning_rate": 8.851190476190476e-06, "loss": 39.8614, "step": 6660 }, { "epoch": 158.59701492537314, "grad_norm": 1.394280195236206, "learning_rate": 8.849702380952382e-06, "loss": 39.4816, "step": 6661 }, { "epoch": 158.62089552238805, "grad_norm": 1.3787786960601807, "learning_rate": 8.848214285714285e-06, "loss": 39.8175, "step": 6662 }, { "epoch": 158.644776119403, "grad_norm": 1.400944471359253, "learning_rate": 8.84672619047619e-06, "loss": 38.894, "step": 6663 }, { "epoch": 158.6686567164179, "grad_norm": 1.4139039516448975, "learning_rate": 8.845238095238096e-06, "loss": 40.4832, "step": 6664 }, { "epoch": 158.69253731343284, "grad_norm": 1.3863359689712524, "learning_rate": 8.843750000000002e-06, "loss": 39.7884, "step": 6665 }, { "epoch": 158.71641791044777, "grad_norm": 1.4125574827194214, "learning_rate": 8.842261904761905e-06, "loss": 40.6023, "step": 6666 }, { "epoch": 158.74029850746268, "grad_norm": 1.4575679302215576, "learning_rate": 8.84077380952381e-06, "loss": 40.9222, "step": 6667 }, { "epoch": 158.76417910447762, "grad_norm": 1.403927206993103, "learning_rate": 8.839285714285714e-06, "loss": 40.0363, "step": 6668 }, { "epoch": 158.78805970149253, "grad_norm": 1.349609375, "learning_rate": 8.83779761904762e-06, "loss": 39.8887, "step": 6669 }, { "epoch": 158.81194029850747, "grad_norm": 1.4094423055648804, "learning_rate": 8.836309523809525e-06, "loss": 37.0248, "step": 6670 }, { "epoch": 158.83582089552237, "grad_norm": 1.391459584236145, "learning_rate": 8.83482142857143e-06, "loss": 39.6834, "step": 6671 }, { "epoch": 158.8597014925373, "grad_norm": 1.3979979753494263, "learning_rate": 8.833333333333334e-06, "loss": 39.6883, "step": 6672 }, { "epoch": 158.88358208955225, "grad_norm": 1.449655294418335, "learning_rate": 8.83184523809524e-06, "loss": 39.182, "step": 6673 }, { "epoch": 158.90746268656716, "grad_norm": 1.3498477935791016, "learning_rate": 8.830357142857143e-06, "loss": 40.0666, "step": 6674 }, { "epoch": 158.9313432835821, "grad_norm": 1.4071307182312012, "learning_rate": 8.828869047619048e-06, "loss": 39.8969, "step": 6675 }, { "epoch": 158.955223880597, "grad_norm": 1.371772289276123, "learning_rate": 8.827380952380952e-06, "loss": 38.525, "step": 6676 }, { "epoch": 158.97910447761194, "grad_norm": 1.3670367002487183, "learning_rate": 8.825892857142857e-06, "loss": 38.8805, "step": 6677 }, { "epoch": 159.0, "grad_norm": 1.3860820531845093, "learning_rate": 8.824404761904763e-06, "loss": 35.23, "step": 6678 }, { "epoch": 159.02388059701494, "grad_norm": 1.3956419229507446, "learning_rate": 8.822916666666668e-06, "loss": 38.6321, "step": 6679 }, { "epoch": 159.04776119402985, "grad_norm": 1.3557555675506592, "learning_rate": 8.821428571428572e-06, "loss": 39.2189, "step": 6680 }, { "epoch": 159.07164179104478, "grad_norm": 1.3684642314910889, "learning_rate": 8.819940476190477e-06, "loss": 39.7867, "step": 6681 }, { "epoch": 159.0955223880597, "grad_norm": 1.3953148126602173, "learning_rate": 8.818452380952381e-06, "loss": 40.2814, "step": 6682 }, { "epoch": 159.11940298507463, "grad_norm": 1.4321436882019043, "learning_rate": 8.816964285714286e-06, "loss": 40.1067, "step": 6683 }, { "epoch": 159.14328358208957, "grad_norm": 1.3190104961395264, "learning_rate": 8.815476190476192e-06, "loss": 39.4311, "step": 6684 }, { "epoch": 159.16716417910447, "grad_norm": 1.4113024473190308, "learning_rate": 8.813988095238097e-06, "loss": 39.7276, "step": 6685 }, { "epoch": 159.1910447761194, "grad_norm": 1.4285346269607544, "learning_rate": 8.8125e-06, "loss": 39.3591, "step": 6686 }, { "epoch": 159.21492537313432, "grad_norm": 1.3754116296768188, "learning_rate": 8.811011904761906e-06, "loss": 40.2317, "step": 6687 }, { "epoch": 159.23880597014926, "grad_norm": 1.405094861984253, "learning_rate": 8.80952380952381e-06, "loss": 39.2613, "step": 6688 }, { "epoch": 159.26268656716417, "grad_norm": 1.3696409463882446, "learning_rate": 8.808035714285715e-06, "loss": 40.3651, "step": 6689 }, { "epoch": 159.2865671641791, "grad_norm": 1.3906916379928589, "learning_rate": 8.806547619047619e-06, "loss": 40.3082, "step": 6690 }, { "epoch": 159.31044776119404, "grad_norm": 1.432799220085144, "learning_rate": 8.805059523809524e-06, "loss": 39.6254, "step": 6691 }, { "epoch": 159.33432835820895, "grad_norm": 1.4147133827209473, "learning_rate": 8.80357142857143e-06, "loss": 40.2685, "step": 6692 }, { "epoch": 159.3582089552239, "grad_norm": 1.3813300132751465, "learning_rate": 8.802083333333335e-06, "loss": 39.9612, "step": 6693 }, { "epoch": 159.3820895522388, "grad_norm": 1.3860197067260742, "learning_rate": 8.800595238095239e-06, "loss": 39.8823, "step": 6694 }, { "epoch": 159.40597014925373, "grad_norm": 1.4007238149642944, "learning_rate": 8.799107142857144e-06, "loss": 39.9585, "step": 6695 }, { "epoch": 159.42985074626867, "grad_norm": 1.3855397701263428, "learning_rate": 8.797619047619048e-06, "loss": 39.5334, "step": 6696 }, { "epoch": 159.45373134328358, "grad_norm": 1.4047882556915283, "learning_rate": 8.796130952380953e-06, "loss": 39.9268, "step": 6697 }, { "epoch": 159.47761194029852, "grad_norm": 1.4815466403961182, "learning_rate": 8.794642857142858e-06, "loss": 39.089, "step": 6698 }, { "epoch": 159.50149253731342, "grad_norm": NaN, "learning_rate": 8.793154761904764e-06, "loss": 49.2703, "step": 6699 }, { "epoch": 159.52537313432836, "grad_norm": 1.3593868017196655, "learning_rate": 8.793154761904764e-06, "loss": 39.7837, "step": 6700 }, { "epoch": 159.54925373134327, "grad_norm": 1.4045919179916382, "learning_rate": 8.791666666666667e-06, "loss": 40.0894, "step": 6701 }, { "epoch": 159.5731343283582, "grad_norm": 1.4442623853683472, "learning_rate": 8.790178571428573e-06, "loss": 40.4408, "step": 6702 }, { "epoch": 159.59701492537314, "grad_norm": 1.3840117454528809, "learning_rate": 8.788690476190477e-06, "loss": 40.5399, "step": 6703 }, { "epoch": 159.62089552238805, "grad_norm": 1.3582241535186768, "learning_rate": 8.787202380952382e-06, "loss": 39.3096, "step": 6704 }, { "epoch": 159.644776119403, "grad_norm": 1.4721251726150513, "learning_rate": 8.785714285714286e-06, "loss": 38.5239, "step": 6705 }, { "epoch": 159.6686567164179, "grad_norm": 1.435297966003418, "learning_rate": 8.784226190476191e-06, "loss": 40.3109, "step": 6706 }, { "epoch": 159.69253731343284, "grad_norm": 1.3645728826522827, "learning_rate": 8.782738095238096e-06, "loss": 39.3499, "step": 6707 }, { "epoch": 159.71641791044777, "grad_norm": 1.4095731973648071, "learning_rate": 8.781250000000002e-06, "loss": 39.1805, "step": 6708 }, { "epoch": 159.74029850746268, "grad_norm": 1.4137580394744873, "learning_rate": 8.779761904761905e-06, "loss": 39.7826, "step": 6709 }, { "epoch": 159.76417910447762, "grad_norm": 1.39078950881958, "learning_rate": 8.77827380952381e-06, "loss": 39.2072, "step": 6710 }, { "epoch": 159.78805970149253, "grad_norm": NaN, "learning_rate": 8.776785714285714e-06, "loss": 53.1281, "step": 6711 }, { "epoch": 159.81194029850747, "grad_norm": 1.3934853076934814, "learning_rate": 8.776785714285714e-06, "loss": 39.6206, "step": 6712 }, { "epoch": 159.83582089552237, "grad_norm": 1.3985298871994019, "learning_rate": 8.77529761904762e-06, "loss": 39.688, "step": 6713 }, { "epoch": 159.8597014925373, "grad_norm": 1.4574235677719116, "learning_rate": 8.773809523809525e-06, "loss": 39.9409, "step": 6714 }, { "epoch": 159.88358208955225, "grad_norm": 1.3984016180038452, "learning_rate": 8.77232142857143e-06, "loss": 40.2145, "step": 6715 }, { "epoch": 159.90746268656716, "grad_norm": 1.4362388849258423, "learning_rate": 8.770833333333334e-06, "loss": 39.0604, "step": 6716 }, { "epoch": 159.9313432835821, "grad_norm": 1.3866724967956543, "learning_rate": 8.76934523809524e-06, "loss": 39.8224, "step": 6717 }, { "epoch": 159.955223880597, "grad_norm": 1.4200029373168945, "learning_rate": 8.767857142857143e-06, "loss": 40.0011, "step": 6718 }, { "epoch": 159.97910447761194, "grad_norm": 1.4002734422683716, "learning_rate": 8.766369047619049e-06, "loss": 40.8521, "step": 6719 }, { "epoch": 160.0, "grad_norm": 1.3807669878005981, "learning_rate": 8.764880952380952e-06, "loss": 35.0264, "step": 6720 }, { "epoch": 160.0, "step": 6720, "total_flos": 3.3116204149038605e+17, "train_loss": 4.975478271643321, "train_runtime": 25259.0955, "train_samples_per_second": 33.901, "train_steps_per_second": 0.266 }, { "epoch": 160.02388059701494, "grad_norm": 2.2171292304992676, "learning_rate": 1e-05, "loss": 40.4588, "step": 6721 }, { "epoch": 160.04776119402985, "grad_norm": 2.0892281532287598, "learning_rate": 9.99867724867725e-06, "loss": 39.3491, "step": 6722 }, { "epoch": 160.07164179104478, "grad_norm": 1.9013170003890991, "learning_rate": 9.997354497354498e-06, "loss": 39.7783, "step": 6723 }, { "epoch": 160.0955223880597, "grad_norm": 1.6732107400894165, "learning_rate": 9.996031746031746e-06, "loss": 40.3008, "step": 6724 }, { "epoch": 160.11940298507463, "grad_norm": 1.6346596479415894, "learning_rate": 9.994708994708996e-06, "loss": 40.0282, "step": 6725 }, { "epoch": 160.14328358208957, "grad_norm": 1.4480310678482056, "learning_rate": 9.993386243386244e-06, "loss": 39.5258, "step": 6726 }, { "epoch": 160.16716417910447, "grad_norm": 1.4065440893173218, "learning_rate": 9.992063492063493e-06, "loss": 40.1916, "step": 6727 }, { "epoch": 160.1910447761194, "grad_norm": 1.438938021659851, "learning_rate": 9.990740740740741e-06, "loss": 40.2978, "step": 6728 }, { "epoch": 160.21492537313432, "grad_norm": 1.394533395767212, "learning_rate": 9.989417989417989e-06, "loss": 40.6121, "step": 6729 }, { "epoch": 160.23880597014926, "grad_norm": 1.4559743404388428, "learning_rate": 9.988095238095239e-06, "loss": 39.3813, "step": 6730 }, { "epoch": 160.26268656716417, "grad_norm": 1.474015235900879, "learning_rate": 9.986772486772488e-06, "loss": 39.8308, "step": 6731 }, { "epoch": 160.2865671641791, "grad_norm": 1.4296596050262451, "learning_rate": 9.985449735449736e-06, "loss": 38.2651, "step": 6732 }, { "epoch": 160.31044776119404, "grad_norm": 1.4423072338104248, "learning_rate": 9.984126984126986e-06, "loss": 38.4989, "step": 6733 }, { "epoch": 160.33432835820895, "grad_norm": 1.4839755296707153, "learning_rate": 9.982804232804234e-06, "loss": 40.5666, "step": 6734 }, { "epoch": 160.3582089552239, "grad_norm": 1.4671854972839355, "learning_rate": 9.981481481481482e-06, "loss": 39.986, "step": 6735 }, { "epoch": 160.3820895522388, "grad_norm": 1.5325446128845215, "learning_rate": 9.980158730158731e-06, "loss": 40.3625, "step": 6736 }, { "epoch": 160.40597014925373, "grad_norm": 1.4163990020751953, "learning_rate": 9.97883597883598e-06, "loss": 38.4199, "step": 6737 }, { "epoch": 160.42985074626867, "grad_norm": 1.430085301399231, "learning_rate": 9.977513227513229e-06, "loss": 41.0472, "step": 6738 }, { "epoch": 160.45373134328358, "grad_norm": 1.3908662796020508, "learning_rate": 9.976190476190477e-06, "loss": 40.2013, "step": 6739 }, { "epoch": 160.47761194029852, "grad_norm": 1.3483721017837524, "learning_rate": 9.974867724867726e-06, "loss": 39.9399, "step": 6740 }, { "epoch": 160.50149253731342, "grad_norm": 1.3828513622283936, "learning_rate": 9.973544973544974e-06, "loss": 41.0988, "step": 6741 }, { "epoch": 160.52537313432836, "grad_norm": 1.3789186477661133, "learning_rate": 9.972222222222224e-06, "loss": 39.9856, "step": 6742 }, { "epoch": 160.54925373134327, "grad_norm": 1.3932874202728271, "learning_rate": 9.970899470899472e-06, "loss": 39.1738, "step": 6743 }, { "epoch": 160.5731343283582, "grad_norm": 1.3285012245178223, "learning_rate": 9.96957671957672e-06, "loss": 40.266, "step": 6744 }, { "epoch": 160.59701492537314, "grad_norm": 1.3630297183990479, "learning_rate": 9.968253968253969e-06, "loss": 40.0978, "step": 6745 }, { "epoch": 160.62089552238805, "grad_norm": 1.4125148057937622, "learning_rate": 9.966931216931219e-06, "loss": 39.2076, "step": 6746 }, { "epoch": 160.644776119403, "grad_norm": 1.3865870237350464, "learning_rate": 9.965608465608467e-06, "loss": 39.5053, "step": 6747 }, { "epoch": 160.6686567164179, "grad_norm": 1.3573662042617798, "learning_rate": 9.964285714285714e-06, "loss": 40.3288, "step": 6748 }, { "epoch": 160.69253731343284, "grad_norm": 1.4847028255462646, "learning_rate": 9.962962962962964e-06, "loss": 39.2902, "step": 6749 }, { "epoch": 160.71641791044777, "grad_norm": 1.3403093814849854, "learning_rate": 9.961640211640212e-06, "loss": 41.0299, "step": 6750 }, { "epoch": 160.74029850746268, "grad_norm": 1.3633135557174683, "learning_rate": 9.960317460317462e-06, "loss": 40.157, "step": 6751 }, { "epoch": 160.76417910447762, "grad_norm": 1.408743143081665, "learning_rate": 9.958994708994711e-06, "loss": 40.6048, "step": 6752 }, { "epoch": 160.78805970149253, "grad_norm": 1.397318720817566, "learning_rate": 9.957671957671959e-06, "loss": 37.7635, "step": 6753 }, { "epoch": 160.81194029850747, "grad_norm": 1.3131036758422852, "learning_rate": 9.956349206349207e-06, "loss": 39.2743, "step": 6754 }, { "epoch": 160.83582089552237, "grad_norm": 1.349668264389038, "learning_rate": 9.955026455026457e-06, "loss": 39.4871, "step": 6755 }, { "epoch": 160.8597014925373, "grad_norm": 1.3658077716827393, "learning_rate": 9.953703703703704e-06, "loss": 41.235, "step": 6756 }, { "epoch": 160.88358208955225, "grad_norm": 1.361548900604248, "learning_rate": 9.952380952380954e-06, "loss": 39.9187, "step": 6757 }, { "epoch": 160.90746268656716, "grad_norm": 1.3952453136444092, "learning_rate": 9.951058201058202e-06, "loss": 38.9911, "step": 6758 }, { "epoch": 160.9313432835821, "grad_norm": 1.3482592105865479, "learning_rate": 9.94973544973545e-06, "loss": 40.5553, "step": 6759 }, { "epoch": 160.955223880597, "grad_norm": 1.3871041536331177, "learning_rate": 9.9484126984127e-06, "loss": 39.6296, "step": 6760 }, { "epoch": 160.97910447761194, "grad_norm": 1.3720693588256836, "learning_rate": 9.947089947089947e-06, "loss": 39.7158, "step": 6761 }, { "epoch": 161.0, "grad_norm": 1.347894549369812, "learning_rate": 9.945767195767197e-06, "loss": 34.7905, "step": 6762 }, { "epoch": 161.02388059701494, "grad_norm": 1.337917447090149, "learning_rate": 9.944444444444445e-06, "loss": 40.4019, "step": 6763 }, { "epoch": 161.04776119402985, "grad_norm": 1.2859970331192017, "learning_rate": 9.943121693121693e-06, "loss": 39.6335, "step": 6764 }, { "epoch": 161.07164179104478, "grad_norm": 1.370324969291687, "learning_rate": 9.941798941798942e-06, "loss": 39.4735, "step": 6765 }, { "epoch": 161.0955223880597, "grad_norm": 1.3353538513183594, "learning_rate": 9.940476190476192e-06, "loss": 39.8737, "step": 6766 }, { "epoch": 161.11940298507463, "grad_norm": 1.3240337371826172, "learning_rate": 9.93915343915344e-06, "loss": 40.7373, "step": 6767 }, { "epoch": 161.14328358208957, "grad_norm": 1.3462592363357544, "learning_rate": 9.937830687830688e-06, "loss": 39.9557, "step": 6768 }, { "epoch": 161.16716417910447, "grad_norm": 1.3504456281661987, "learning_rate": 9.936507936507937e-06, "loss": 39.8625, "step": 6769 }, { "epoch": 161.1910447761194, "grad_norm": 1.4036452770233154, "learning_rate": 9.935185185185185e-06, "loss": 38.5475, "step": 6770 }, { "epoch": 161.21492537313432, "grad_norm": 1.3767704963684082, "learning_rate": 9.933862433862435e-06, "loss": 40.4407, "step": 6771 }, { "epoch": 161.23880597014926, "grad_norm": 1.3535382747650146, "learning_rate": 9.932539682539684e-06, "loss": 40.1742, "step": 6772 }, { "epoch": 161.26268656716417, "grad_norm": 1.3438529968261719, "learning_rate": 9.931216931216932e-06, "loss": 40.3674, "step": 6773 }, { "epoch": 161.2865671641791, "grad_norm": 1.3443667888641357, "learning_rate": 9.92989417989418e-06, "loss": 40.9006, "step": 6774 }, { "epoch": 161.31044776119404, "grad_norm": 1.3765147924423218, "learning_rate": 9.92857142857143e-06, "loss": 38.5045, "step": 6775 }, { "epoch": 161.33432835820895, "grad_norm": 1.348753809928894, "learning_rate": 9.927248677248678e-06, "loss": 38.2018, "step": 6776 }, { "epoch": 161.3582089552239, "grad_norm": 1.3691657781600952, "learning_rate": 9.925925925925927e-06, "loss": 39.9631, "step": 6777 }, { "epoch": 161.3820895522388, "grad_norm": 1.3511813879013062, "learning_rate": 9.924603174603175e-06, "loss": 40.3437, "step": 6778 }, { "epoch": 161.40597014925373, "grad_norm": 1.3469688892364502, "learning_rate": 9.923280423280423e-06, "loss": 39.5117, "step": 6779 }, { "epoch": 161.42985074626867, "grad_norm": 1.3825494050979614, "learning_rate": 9.921957671957673e-06, "loss": 39.1955, "step": 6780 }, { "epoch": 161.45373134328358, "grad_norm": NaN, "learning_rate": 9.920634920634922e-06, "loss": 70.4409, "step": 6781 }, { "epoch": 161.47761194029852, "grad_norm": 1.3229650259017944, "learning_rate": 9.920634920634922e-06, "loss": 40.4724, "step": 6782 }, { "epoch": 161.50149253731342, "grad_norm": 1.3711647987365723, "learning_rate": 9.91931216931217e-06, "loss": 40.3701, "step": 6783 }, { "epoch": 161.52537313432836, "grad_norm": 1.3456599712371826, "learning_rate": 9.917989417989418e-06, "loss": 39.8198, "step": 6784 }, { "epoch": 161.54925373134327, "grad_norm": 1.3641810417175293, "learning_rate": 9.916666666666668e-06, "loss": 39.876, "step": 6785 }, { "epoch": 161.5731343283582, "grad_norm": 1.369581937789917, "learning_rate": 9.915343915343916e-06, "loss": 39.6998, "step": 6786 }, { "epoch": 161.59701492537314, "grad_norm": 1.373091220855713, "learning_rate": 9.914021164021165e-06, "loss": 38.7811, "step": 6787 }, { "epoch": 161.62089552238805, "grad_norm": 1.3370786905288696, "learning_rate": 9.912698412698413e-06, "loss": 39.3485, "step": 6788 }, { "epoch": 161.644776119403, "grad_norm": 1.3550405502319336, "learning_rate": 9.911375661375661e-06, "loss": 38.5215, "step": 6789 }, { "epoch": 161.6686567164179, "grad_norm": 1.3393323421478271, "learning_rate": 9.91005291005291e-06, "loss": 40.0679, "step": 6790 }, { "epoch": 161.69253731343284, "grad_norm": 1.3420121669769287, "learning_rate": 9.90873015873016e-06, "loss": 40.0079, "step": 6791 }, { "epoch": 161.71641791044777, "grad_norm": 1.3778361082077026, "learning_rate": 9.907407407407408e-06, "loss": 39.9368, "step": 6792 }, { "epoch": 161.74029850746268, "grad_norm": 1.3545727729797363, "learning_rate": 9.906084656084658e-06, "loss": 40.5323, "step": 6793 }, { "epoch": 161.76417910447762, "grad_norm": 1.3675645589828491, "learning_rate": 9.904761904761906e-06, "loss": 39.8818, "step": 6794 }, { "epoch": 161.78805970149253, "grad_norm": 1.3856929540634155, "learning_rate": 9.903439153439154e-06, "loss": 40.4088, "step": 6795 }, { "epoch": 161.81194029850747, "grad_norm": 1.2922399044036865, "learning_rate": 9.902116402116403e-06, "loss": 39.5995, "step": 6796 }, { "epoch": 161.83582089552237, "grad_norm": 1.427140712738037, "learning_rate": 9.900793650793653e-06, "loss": 39.3635, "step": 6797 }, { "epoch": 161.8597014925373, "grad_norm": 1.340706467628479, "learning_rate": 9.8994708994709e-06, "loss": 40.3358, "step": 6798 }, { "epoch": 161.88358208955225, "grad_norm": 1.3359724283218384, "learning_rate": 9.898148148148148e-06, "loss": 39.1785, "step": 6799 }, { "epoch": 161.90746268656716, "grad_norm": 1.3448539972305298, "learning_rate": 9.896825396825398e-06, "loss": 40.4924, "step": 6800 }, { "epoch": 161.9313432835821, "grad_norm": 1.3323733806610107, "learning_rate": 9.895502645502646e-06, "loss": 41.5086, "step": 6801 }, { "epoch": 161.955223880597, "grad_norm": 1.377724528312683, "learning_rate": 9.894179894179896e-06, "loss": 39.7827, "step": 6802 }, { "epoch": 161.97910447761194, "grad_norm": 1.3142436742782593, "learning_rate": 9.892857142857143e-06, "loss": 39.2026, "step": 6803 }, { "epoch": 162.0, "grad_norm": 1.3853013515472412, "learning_rate": 9.891534391534391e-06, "loss": 34.8583, "step": 6804 }, { "epoch": 162.02388059701494, "grad_norm": 1.353721022605896, "learning_rate": 9.890211640211641e-06, "loss": 38.3756, "step": 6805 }, { "epoch": 162.04776119402985, "grad_norm": 1.3240638971328735, "learning_rate": 9.88888888888889e-06, "loss": 38.8479, "step": 6806 }, { "epoch": 162.07164179104478, "grad_norm": 1.340394139289856, "learning_rate": 9.887566137566138e-06, "loss": 39.9021, "step": 6807 }, { "epoch": 162.0955223880597, "grad_norm": 1.3773688077926636, "learning_rate": 9.886243386243386e-06, "loss": 39.852, "step": 6808 }, { "epoch": 162.11940298507463, "grad_norm": 1.335003137588501, "learning_rate": 9.884920634920636e-06, "loss": 39.579, "step": 6809 }, { "epoch": 162.14328358208957, "grad_norm": 1.3377668857574463, "learning_rate": 9.883597883597884e-06, "loss": 40.9699, "step": 6810 }, { "epoch": 162.16716417910447, "grad_norm": 1.3397351503372192, "learning_rate": 9.882275132275133e-06, "loss": 39.8971, "step": 6811 }, { "epoch": 162.1910447761194, "grad_norm": 1.3705880641937256, "learning_rate": 9.880952380952381e-06, "loss": 39.4296, "step": 6812 }, { "epoch": 162.21492537313432, "grad_norm": 1.3143386840820312, "learning_rate": 9.87962962962963e-06, "loss": 40.9426, "step": 6813 }, { "epoch": 162.23880597014926, "grad_norm": 1.3676623106002808, "learning_rate": 9.878306878306879e-06, "loss": 39.8334, "step": 6814 }, { "epoch": 162.26268656716417, "grad_norm": 1.359370231628418, "learning_rate": 9.876984126984128e-06, "loss": 39.466, "step": 6815 }, { "epoch": 162.2865671641791, "grad_norm": 1.3777540922164917, "learning_rate": 9.875661375661376e-06, "loss": 39.917, "step": 6816 }, { "epoch": 162.31044776119404, "grad_norm": 1.3382514715194702, "learning_rate": 9.874338624338626e-06, "loss": 38.5346, "step": 6817 }, { "epoch": 162.33432835820895, "grad_norm": 1.3250700235366821, "learning_rate": 9.873015873015874e-06, "loss": 39.1932, "step": 6818 }, { "epoch": 162.3582089552239, "grad_norm": 1.3359237909317017, "learning_rate": 9.871693121693122e-06, "loss": 39.9457, "step": 6819 }, { "epoch": 162.3820895522388, "grad_norm": 1.3594201803207397, "learning_rate": 9.870370370370371e-06, "loss": 40.5637, "step": 6820 }, { "epoch": 162.40597014925373, "grad_norm": 1.3373600244522095, "learning_rate": 9.869047619047621e-06, "loss": 39.5596, "step": 6821 }, { "epoch": 162.42985074626867, "grad_norm": 1.326043725013733, "learning_rate": 9.867724867724869e-06, "loss": 39.8277, "step": 6822 }, { "epoch": 162.45373134328358, "grad_norm": 1.3625069856643677, "learning_rate": 9.866402116402117e-06, "loss": 39.9363, "step": 6823 }, { "epoch": 162.47761194029852, "grad_norm": 1.3391035795211792, "learning_rate": 9.865079365079366e-06, "loss": 39.7148, "step": 6824 }, { "epoch": 162.50149253731342, "grad_norm": 1.3578449487686157, "learning_rate": 9.863756613756614e-06, "loss": 39.694, "step": 6825 }, { "epoch": 162.52537313432836, "grad_norm": 1.3808696269989014, "learning_rate": 9.862433862433864e-06, "loss": 40.6258, "step": 6826 }, { "epoch": 162.54925373134327, "grad_norm": 1.3350684642791748, "learning_rate": 9.861111111111112e-06, "loss": 39.9133, "step": 6827 }, { "epoch": 162.5731343283582, "grad_norm": 1.3514180183410645, "learning_rate": 9.85978835978836e-06, "loss": 38.8462, "step": 6828 }, { "epoch": 162.59701492537314, "grad_norm": 1.3496195077896118, "learning_rate": 9.85846560846561e-06, "loss": 39.8082, "step": 6829 }, { "epoch": 162.62089552238805, "grad_norm": 1.3149354457855225, "learning_rate": 9.857142857142859e-06, "loss": 40.7386, "step": 6830 }, { "epoch": 162.644776119403, "grad_norm": 1.2948827743530273, "learning_rate": 9.855820105820107e-06, "loss": 40.058, "step": 6831 }, { "epoch": 162.6686567164179, "grad_norm": 1.3815650939941406, "learning_rate": 9.854497354497355e-06, "loss": 40.1838, "step": 6832 }, { "epoch": 162.69253731343284, "grad_norm": 1.3763718605041504, "learning_rate": 9.853174603174604e-06, "loss": 40.0656, "step": 6833 }, { "epoch": 162.71641791044777, "grad_norm": 1.3556911945343018, "learning_rate": 9.851851851851852e-06, "loss": 40.45, "step": 6834 }, { "epoch": 162.74029850746268, "grad_norm": 1.3100175857543945, "learning_rate": 9.850529100529102e-06, "loss": 39.9341, "step": 6835 }, { "epoch": 162.76417910447762, "grad_norm": 1.3167445659637451, "learning_rate": 9.849206349206351e-06, "loss": 41.4503, "step": 6836 }, { "epoch": 162.78805970149253, "grad_norm": 1.3492788076400757, "learning_rate": 9.8478835978836e-06, "loss": 39.6754, "step": 6837 }, { "epoch": 162.81194029850747, "grad_norm": 1.4794081449508667, "learning_rate": 9.846560846560847e-06, "loss": 40.1498, "step": 6838 }, { "epoch": 162.83582089552237, "grad_norm": 1.3538216352462769, "learning_rate": 9.845238095238097e-06, "loss": 39.1348, "step": 6839 }, { "epoch": 162.8597014925373, "grad_norm": 1.3807742595672607, "learning_rate": 9.843915343915345e-06, "loss": 40.336, "step": 6840 }, { "epoch": 162.88358208955225, "grad_norm": 1.355214238166809, "learning_rate": 9.842592592592594e-06, "loss": 39.1805, "step": 6841 }, { "epoch": 162.90746268656716, "grad_norm": 1.4151486158370972, "learning_rate": 9.841269841269842e-06, "loss": 39.1509, "step": 6842 }, { "epoch": 162.9313432835821, "grad_norm": NaN, "learning_rate": 9.83994708994709e-06, "loss": 45.7572, "step": 6843 }, { "epoch": 162.955223880597, "grad_norm": 1.3633878231048584, "learning_rate": 9.83994708994709e-06, "loss": 38.0421, "step": 6844 }, { "epoch": 162.97910447761194, "grad_norm": 1.3174763917922974, "learning_rate": 9.83862433862434e-06, "loss": 40.0818, "step": 6845 }, { "epoch": 163.0, "grad_norm": 1.3407515287399292, "learning_rate": 9.837301587301588e-06, "loss": 35.6816, "step": 6846 }, { "epoch": 163.02388059701494, "grad_norm": 1.3601586818695068, "learning_rate": 9.835978835978837e-06, "loss": 40.865, "step": 6847 }, { "epoch": 163.04776119402985, "grad_norm": 1.357651948928833, "learning_rate": 9.834656084656085e-06, "loss": 41.7481, "step": 6848 }, { "epoch": 163.07164179104478, "grad_norm": 1.3911608457565308, "learning_rate": 9.833333333333333e-06, "loss": 39.4118, "step": 6849 }, { "epoch": 163.0955223880597, "grad_norm": 1.342949628829956, "learning_rate": 9.832010582010583e-06, "loss": 39.7434, "step": 6850 }, { "epoch": 163.11940298507463, "grad_norm": 1.3519266843795776, "learning_rate": 9.830687830687832e-06, "loss": 39.4462, "step": 6851 }, { "epoch": 163.14328358208957, "grad_norm": 1.3386552333831787, "learning_rate": 9.82936507936508e-06, "loss": 38.5324, "step": 6852 }, { "epoch": 163.16716417910447, "grad_norm": 1.3389843702316284, "learning_rate": 9.828042328042328e-06, "loss": 39.1381, "step": 6853 }, { "epoch": 163.1910447761194, "grad_norm": 1.3149495124816895, "learning_rate": 9.826719576719578e-06, "loss": 39.5436, "step": 6854 }, { "epoch": 163.21492537313432, "grad_norm": 1.3141855001449585, "learning_rate": 9.825396825396825e-06, "loss": 41.6011, "step": 6855 }, { "epoch": 163.23880597014926, "grad_norm": 1.3233840465545654, "learning_rate": 9.824074074074075e-06, "loss": 39.4409, "step": 6856 }, { "epoch": 163.26268656716417, "grad_norm": 1.3799134492874146, "learning_rate": 9.822751322751325e-06, "loss": 38.4556, "step": 6857 }, { "epoch": 163.2865671641791, "grad_norm": 1.3629534244537354, "learning_rate": 9.821428571428573e-06, "loss": 40.0919, "step": 6858 }, { "epoch": 163.31044776119404, "grad_norm": 1.3623981475830078, "learning_rate": 9.82010582010582e-06, "loss": 39.984, "step": 6859 }, { "epoch": 163.33432835820895, "grad_norm": 1.3435879945755005, "learning_rate": 9.81878306878307e-06, "loss": 40.8878, "step": 6860 }, { "epoch": 163.3582089552239, "grad_norm": 1.4174673557281494, "learning_rate": 9.817460317460318e-06, "loss": 39.5994, "step": 6861 }, { "epoch": 163.3820895522388, "grad_norm": 1.3612960577011108, "learning_rate": 9.816137566137567e-06, "loss": 39.9395, "step": 6862 }, { "epoch": 163.40597014925373, "grad_norm": 1.3448374271392822, "learning_rate": 9.814814814814815e-06, "loss": 39.6921, "step": 6863 }, { "epoch": 163.42985074626867, "grad_norm": 1.38387131690979, "learning_rate": 9.813492063492063e-06, "loss": 40.1043, "step": 6864 }, { "epoch": 163.45373134328358, "grad_norm": 1.3068115711212158, "learning_rate": 9.812169312169313e-06, "loss": 40.2811, "step": 6865 }, { "epoch": 163.47761194029852, "grad_norm": 1.3535618782043457, "learning_rate": 9.810846560846562e-06, "loss": 40.8599, "step": 6866 }, { "epoch": 163.50149253731342, "grad_norm": 1.3821572065353394, "learning_rate": 9.80952380952381e-06, "loss": 39.7494, "step": 6867 }, { "epoch": 163.52537313432836, "grad_norm": 1.3654358386993408, "learning_rate": 9.808201058201058e-06, "loss": 39.0526, "step": 6868 }, { "epoch": 163.54925373134327, "grad_norm": 1.396664023399353, "learning_rate": 9.806878306878308e-06, "loss": 39.0383, "step": 6869 }, { "epoch": 163.5731343283582, "grad_norm": 1.3618090152740479, "learning_rate": 9.805555555555556e-06, "loss": 39.0119, "step": 6870 }, { "epoch": 163.59701492537314, "grad_norm": 1.3441953659057617, "learning_rate": 9.804232804232805e-06, "loss": 41.3227, "step": 6871 }, { "epoch": 163.62089552238805, "grad_norm": 1.3785994052886963, "learning_rate": 9.802910052910053e-06, "loss": 38.912, "step": 6872 }, { "epoch": 163.644776119403, "grad_norm": 1.311445713043213, "learning_rate": 9.801587301587301e-06, "loss": 39.6595, "step": 6873 }, { "epoch": 163.6686567164179, "grad_norm": 1.4372007846832275, "learning_rate": 9.80026455026455e-06, "loss": 40.5792, "step": 6874 }, { "epoch": 163.69253731343284, "grad_norm": 1.3256510496139526, "learning_rate": 9.7989417989418e-06, "loss": 39.1204, "step": 6875 }, { "epoch": 163.71641791044777, "grad_norm": 1.3935540914535522, "learning_rate": 9.797619047619048e-06, "loss": 40.7691, "step": 6876 }, { "epoch": 163.74029850746268, "grad_norm": 1.366593360900879, "learning_rate": 9.796296296296298e-06, "loss": 39.0504, "step": 6877 }, { "epoch": 163.76417910447762, "grad_norm": 1.354702353477478, "learning_rate": 9.794973544973546e-06, "loss": 39.5547, "step": 6878 }, { "epoch": 163.78805970149253, "grad_norm": 1.3586008548736572, "learning_rate": 9.793650793650794e-06, "loss": 40.3465, "step": 6879 }, { "epoch": 163.81194029850747, "grad_norm": 1.4239503145217896, "learning_rate": 9.792328042328043e-06, "loss": 39.3861, "step": 6880 }, { "epoch": 163.83582089552237, "grad_norm": 1.368205189704895, "learning_rate": 9.791005291005293e-06, "loss": 39.16, "step": 6881 }, { "epoch": 163.8597014925373, "grad_norm": 1.402453899383545, "learning_rate": 9.78968253968254e-06, "loss": 39.063, "step": 6882 }, { "epoch": 163.88358208955225, "grad_norm": 1.34282648563385, "learning_rate": 9.788359788359789e-06, "loss": 41.2819, "step": 6883 }, { "epoch": 163.90746268656716, "grad_norm": 1.3638962507247925, "learning_rate": 9.787037037037038e-06, "loss": 38.967, "step": 6884 }, { "epoch": 163.9313432835821, "grad_norm": 1.355662226676941, "learning_rate": 9.785714285714286e-06, "loss": 39.915, "step": 6885 }, { "epoch": 163.955223880597, "grad_norm": 1.34540593624115, "learning_rate": 9.784391534391536e-06, "loss": 38.866, "step": 6886 }, { "epoch": 163.97910447761194, "grad_norm": 1.3553423881530762, "learning_rate": 9.783068783068784e-06, "loss": 40.7202, "step": 6887 }, { "epoch": 164.0, "grad_norm": 1.3325772285461426, "learning_rate": 9.781746031746032e-06, "loss": 34.9249, "step": 6888 }, { "epoch": 164.02388059701494, "grad_norm": 1.3387930393218994, "learning_rate": 9.780423280423281e-06, "loss": 39.472, "step": 6889 }, { "epoch": 164.04776119402985, "grad_norm": 1.3544968366622925, "learning_rate": 9.77910052910053e-06, "loss": 40.5523, "step": 6890 }, { "epoch": 164.07164179104478, "grad_norm": 1.3481279611587524, "learning_rate": 9.777777777777779e-06, "loss": 38.6651, "step": 6891 }, { "epoch": 164.0955223880597, "grad_norm": 1.3686054944992065, "learning_rate": 9.776455026455027e-06, "loss": 38.6406, "step": 6892 }, { "epoch": 164.11940298507463, "grad_norm": 1.3760312795639038, "learning_rate": 9.775132275132276e-06, "loss": 39.6763, "step": 6893 }, { "epoch": 164.14328358208957, "grad_norm": 1.4612263441085815, "learning_rate": 9.773809523809524e-06, "loss": 40.1674, "step": 6894 }, { "epoch": 164.16716417910447, "grad_norm": 1.3363441228866577, "learning_rate": 9.772486772486774e-06, "loss": 39.8871, "step": 6895 }, { "epoch": 164.1910447761194, "grad_norm": 1.3842893838882446, "learning_rate": 9.771164021164023e-06, "loss": 40.4011, "step": 6896 }, { "epoch": 164.21492537313432, "grad_norm": 1.340488314628601, "learning_rate": 9.769841269841271e-06, "loss": 39.8951, "step": 6897 }, { "epoch": 164.23880597014926, "grad_norm": 1.3618440628051758, "learning_rate": 9.768518518518519e-06, "loss": 38.7149, "step": 6898 }, { "epoch": 164.26268656716417, "grad_norm": 1.354642629623413, "learning_rate": 9.767195767195769e-06, "loss": 39.071, "step": 6899 }, { "epoch": 164.2865671641791, "grad_norm": 1.3772295713424683, "learning_rate": 9.765873015873017e-06, "loss": 40.2231, "step": 6900 }, { "epoch": 164.31044776119404, "grad_norm": 1.3554683923721313, "learning_rate": 9.764550264550266e-06, "loss": 38.7759, "step": 6901 }, { "epoch": 164.33432835820895, "grad_norm": 1.31846284866333, "learning_rate": 9.763227513227514e-06, "loss": 38.7104, "step": 6902 }, { "epoch": 164.3582089552239, "grad_norm": 1.3416959047317505, "learning_rate": 9.761904761904762e-06, "loss": 38.8722, "step": 6903 }, { "epoch": 164.3820895522388, "grad_norm": 1.3097319602966309, "learning_rate": 9.760582010582012e-06, "loss": 40.1763, "step": 6904 }, { "epoch": 164.40597014925373, "grad_norm": 1.3473299741744995, "learning_rate": 9.759259259259261e-06, "loss": 39.4153, "step": 6905 }, { "epoch": 164.42985074626867, "grad_norm": NaN, "learning_rate": 9.757936507936509e-06, "loss": 39.8658, "step": 6906 }, { "epoch": 164.45373134328358, "grad_norm": 1.3839035034179688, "learning_rate": 9.757936507936509e-06, "loss": 39.3374, "step": 6907 }, { "epoch": 164.47761194029852, "grad_norm": 1.3394455909729004, "learning_rate": 9.756613756613757e-06, "loss": 40.2607, "step": 6908 }, { "epoch": 164.50149253731342, "grad_norm": 1.3663523197174072, "learning_rate": 9.755291005291007e-06, "loss": 40.5247, "step": 6909 }, { "epoch": 164.52537313432836, "grad_norm": 1.494244933128357, "learning_rate": 9.753968253968254e-06, "loss": 40.9969, "step": 6910 }, { "epoch": 164.54925373134327, "grad_norm": 1.3175632953643799, "learning_rate": 9.752645502645504e-06, "loss": 38.9167, "step": 6911 }, { "epoch": 164.5731343283582, "grad_norm": 1.3910958766937256, "learning_rate": 9.751322751322752e-06, "loss": 40.0439, "step": 6912 }, { "epoch": 164.59701492537314, "grad_norm": 1.3739866018295288, "learning_rate": 9.75e-06, "loss": 41.2039, "step": 6913 }, { "epoch": 164.62089552238805, "grad_norm": 1.3448976278305054, "learning_rate": 9.74867724867725e-06, "loss": 41.4528, "step": 6914 }, { "epoch": 164.644776119403, "grad_norm": 1.3612624406814575, "learning_rate": 9.747354497354499e-06, "loss": 40.7767, "step": 6915 }, { "epoch": 164.6686567164179, "grad_norm": 1.4123319387435913, "learning_rate": 9.746031746031747e-06, "loss": 39.726, "step": 6916 }, { "epoch": 164.69253731343284, "grad_norm": 1.3529603481292725, "learning_rate": 9.744708994708997e-06, "loss": 39.8771, "step": 6917 }, { "epoch": 164.71641791044777, "grad_norm": 1.3270225524902344, "learning_rate": 9.743386243386244e-06, "loss": 39.3651, "step": 6918 }, { "epoch": 164.74029850746268, "grad_norm": 1.356740951538086, "learning_rate": 9.742063492063492e-06, "loss": 39.1558, "step": 6919 }, { "epoch": 164.76417910447762, "grad_norm": 1.3668843507766724, "learning_rate": 9.740740740740742e-06, "loss": 40.4382, "step": 6920 }, { "epoch": 164.78805970149253, "grad_norm": 1.3559602499008179, "learning_rate": 9.73941798941799e-06, "loss": 39.436, "step": 6921 }, { "epoch": 164.81194029850747, "grad_norm": 1.4806073904037476, "learning_rate": 9.73809523809524e-06, "loss": 41.9118, "step": 6922 }, { "epoch": 164.83582089552237, "grad_norm": 1.3269169330596924, "learning_rate": 9.736772486772487e-06, "loss": 40.4617, "step": 6923 }, { "epoch": 164.8597014925373, "grad_norm": 1.3404433727264404, "learning_rate": 9.735449735449735e-06, "loss": 39.6914, "step": 6924 }, { "epoch": 164.88358208955225, "grad_norm": 1.3517295122146606, "learning_rate": 9.734126984126985e-06, "loss": 40.1916, "step": 6925 }, { "epoch": 164.90746268656716, "grad_norm": 1.3799223899841309, "learning_rate": 9.732804232804234e-06, "loss": 39.266, "step": 6926 }, { "epoch": 164.9313432835821, "grad_norm": 1.3542789220809937, "learning_rate": 9.731481481481482e-06, "loss": 39.0602, "step": 6927 }, { "epoch": 164.955223880597, "grad_norm": 1.4114280939102173, "learning_rate": 9.73015873015873e-06, "loss": 40.0193, "step": 6928 }, { "epoch": 164.97910447761194, "grad_norm": 1.4133340120315552, "learning_rate": 9.72883597883598e-06, "loss": 39.624, "step": 6929 }, { "epoch": 165.0, "grad_norm": 1.3623600006103516, "learning_rate": 9.727513227513228e-06, "loss": 34.0414, "step": 6930 }, { "epoch": 165.02388059701494, "grad_norm": 1.3336745500564575, "learning_rate": 9.726190476190477e-06, "loss": 39.4018, "step": 6931 }, { "epoch": 165.04776119402985, "grad_norm": 1.3531681299209595, "learning_rate": 9.724867724867725e-06, "loss": 40.6619, "step": 6932 }, { "epoch": 165.07164179104478, "grad_norm": 1.3384662866592407, "learning_rate": 9.723544973544973e-06, "loss": 40.1925, "step": 6933 }, { "epoch": 165.0955223880597, "grad_norm": 1.3390882015228271, "learning_rate": 9.722222222222223e-06, "loss": 39.3963, "step": 6934 }, { "epoch": 165.11940298507463, "grad_norm": 1.374054193496704, "learning_rate": 9.720899470899472e-06, "loss": 39.7797, "step": 6935 }, { "epoch": 165.14328358208957, "grad_norm": 1.4392805099487305, "learning_rate": 9.71957671957672e-06, "loss": 41.6873, "step": 6936 }, { "epoch": 165.16716417910447, "grad_norm": 1.3564985990524292, "learning_rate": 9.71825396825397e-06, "loss": 40.6515, "step": 6937 }, { "epoch": 165.1910447761194, "grad_norm": 1.3396285772323608, "learning_rate": 9.716931216931218e-06, "loss": 40.9068, "step": 6938 }, { "epoch": 165.21492537313432, "grad_norm": 1.350167155265808, "learning_rate": 9.715608465608466e-06, "loss": 38.933, "step": 6939 }, { "epoch": 165.23880597014926, "grad_norm": 1.3914735317230225, "learning_rate": 9.714285714285715e-06, "loss": 38.5978, "step": 6940 }, { "epoch": 165.26268656716417, "grad_norm": 1.346781611442566, "learning_rate": 9.712962962962965e-06, "loss": 39.6332, "step": 6941 }, { "epoch": 165.2865671641791, "grad_norm": 1.3637794256210327, "learning_rate": 9.711640211640213e-06, "loss": 40.5987, "step": 6942 }, { "epoch": 165.31044776119404, "grad_norm": 1.3956800699234009, "learning_rate": 9.71031746031746e-06, "loss": 38.6682, "step": 6943 }, { "epoch": 165.33432835820895, "grad_norm": 1.3602339029312134, "learning_rate": 9.70899470899471e-06, "loss": 39.8005, "step": 6944 }, { "epoch": 165.3582089552239, "grad_norm": 1.368464708328247, "learning_rate": 9.707671957671958e-06, "loss": 38.9403, "step": 6945 }, { "epoch": 165.3820895522388, "grad_norm": 1.3539690971374512, "learning_rate": 9.706349206349208e-06, "loss": 40.2092, "step": 6946 }, { "epoch": 165.40597014925373, "grad_norm": 1.3289170265197754, "learning_rate": 9.705026455026456e-06, "loss": 37.9102, "step": 6947 }, { "epoch": 165.42985074626867, "grad_norm": 1.3967565298080444, "learning_rate": 9.703703703703703e-06, "loss": 38.985, "step": 6948 }, { "epoch": 165.45373134328358, "grad_norm": 1.316239595413208, "learning_rate": 9.702380952380953e-06, "loss": 40.4094, "step": 6949 }, { "epoch": 165.47761194029852, "grad_norm": 1.3844984769821167, "learning_rate": 9.701058201058203e-06, "loss": 39.4248, "step": 6950 }, { "epoch": 165.50149253731342, "grad_norm": 1.3383110761642456, "learning_rate": 9.69973544973545e-06, "loss": 38.6293, "step": 6951 }, { "epoch": 165.52537313432836, "grad_norm": 1.3987687826156616, "learning_rate": 9.698412698412698e-06, "loss": 40.6315, "step": 6952 }, { "epoch": 165.54925373134327, "grad_norm": 1.3665423393249512, "learning_rate": 9.697089947089948e-06, "loss": 40.5772, "step": 6953 }, { "epoch": 165.5731343283582, "grad_norm": 1.4644224643707275, "learning_rate": 9.695767195767196e-06, "loss": 39.8573, "step": 6954 }, { "epoch": 165.59701492537314, "grad_norm": 1.3907897472381592, "learning_rate": 9.694444444444446e-06, "loss": 39.871, "step": 6955 }, { "epoch": 165.62089552238805, "grad_norm": 1.3374537229537964, "learning_rate": 9.693121693121693e-06, "loss": 40.0979, "step": 6956 }, { "epoch": 165.644776119403, "grad_norm": 1.332961082458496, "learning_rate": 9.691798941798943e-06, "loss": 39.9541, "step": 6957 }, { "epoch": 165.6686567164179, "grad_norm": 1.3702845573425293, "learning_rate": 9.690476190476191e-06, "loss": 39.9834, "step": 6958 }, { "epoch": 165.69253731343284, "grad_norm": 1.332804560661316, "learning_rate": 9.68915343915344e-06, "loss": 39.2202, "step": 6959 }, { "epoch": 165.71641791044777, "grad_norm": 1.3361445665359497, "learning_rate": 9.687830687830688e-06, "loss": 39.703, "step": 6960 }, { "epoch": 165.74029850746268, "grad_norm": 1.3784061670303345, "learning_rate": 9.686507936507938e-06, "loss": 39.7805, "step": 6961 }, { "epoch": 165.76417910447762, "grad_norm": 1.350595235824585, "learning_rate": 9.685185185185186e-06, "loss": 39.928, "step": 6962 }, { "epoch": 165.78805970149253, "grad_norm": 1.3857430219650269, "learning_rate": 9.683862433862434e-06, "loss": 40.0704, "step": 6963 }, { "epoch": 165.81194029850747, "grad_norm": 1.348429560661316, "learning_rate": 9.682539682539683e-06, "loss": 39.656, "step": 6964 }, { "epoch": 165.83582089552237, "grad_norm": 1.3129210472106934, "learning_rate": 9.681216931216933e-06, "loss": 39.7069, "step": 6965 }, { "epoch": 165.8597014925373, "grad_norm": 1.372497797012329, "learning_rate": 9.679894179894181e-06, "loss": 40.633, "step": 6966 }, { "epoch": 165.88358208955225, "grad_norm": 1.3648401498794556, "learning_rate": 9.678571428571429e-06, "loss": 39.6313, "step": 6967 }, { "epoch": 165.90746268656716, "grad_norm": 1.3606940507888794, "learning_rate": 9.677248677248678e-06, "loss": 40.175, "step": 6968 }, { "epoch": 165.9313432835821, "grad_norm": 1.3502140045166016, "learning_rate": 9.675925925925926e-06, "loss": 38.5896, "step": 6969 }, { "epoch": 165.955223880597, "grad_norm": 1.4001539945602417, "learning_rate": 9.674603174603176e-06, "loss": 40.2346, "step": 6970 }, { "epoch": 165.97910447761194, "grad_norm": 1.3367387056350708, "learning_rate": 9.673280423280424e-06, "loss": 39.0902, "step": 6971 }, { "epoch": 166.0, "grad_norm": 1.3515381813049316, "learning_rate": 9.671957671957672e-06, "loss": 36.5363, "step": 6972 }, { "epoch": 166.02388059701494, "grad_norm": 1.3590385913848877, "learning_rate": 9.670634920634921e-06, "loss": 39.6357, "step": 6973 }, { "epoch": 166.04776119402985, "grad_norm": 1.3399431705474854, "learning_rate": 9.669312169312171e-06, "loss": 40.5035, "step": 6974 }, { "epoch": 166.07164179104478, "grad_norm": 1.3200721740722656, "learning_rate": 9.667989417989419e-06, "loss": 40.3884, "step": 6975 }, { "epoch": 166.0955223880597, "grad_norm": 1.3185110092163086, "learning_rate": 9.666666666666667e-06, "loss": 39.6576, "step": 6976 }, { "epoch": 166.11940298507463, "grad_norm": 1.3626072406768799, "learning_rate": 9.665343915343916e-06, "loss": 39.6171, "step": 6977 }, { "epoch": 166.14328358208957, "grad_norm": 1.3651039600372314, "learning_rate": 9.664021164021164e-06, "loss": 40.7049, "step": 6978 }, { "epoch": 166.16716417910447, "grad_norm": 1.3552557229995728, "learning_rate": 9.662698412698414e-06, "loss": 38.5042, "step": 6979 }, { "epoch": 166.1910447761194, "grad_norm": 1.3370728492736816, "learning_rate": 9.661375661375663e-06, "loss": 39.8901, "step": 6980 }, { "epoch": 166.21492537313432, "grad_norm": 1.3414838314056396, "learning_rate": 9.660052910052911e-06, "loss": 40.2988, "step": 6981 }, { "epoch": 166.23880597014926, "grad_norm": 1.4650989770889282, "learning_rate": 9.65873015873016e-06, "loss": 39.3792, "step": 6982 }, { "epoch": 166.26268656716417, "grad_norm": 1.3368306159973145, "learning_rate": 9.657407407407409e-06, "loss": 40.1067, "step": 6983 }, { "epoch": 166.2865671641791, "grad_norm": 1.3175499439239502, "learning_rate": 9.656084656084657e-06, "loss": 39.5464, "step": 6984 }, { "epoch": 166.31044776119404, "grad_norm": 1.3266922235488892, "learning_rate": 9.654761904761906e-06, "loss": 39.9473, "step": 6985 }, { "epoch": 166.33432835820895, "grad_norm": 1.335342526435852, "learning_rate": 9.653439153439154e-06, "loss": 39.8762, "step": 6986 }, { "epoch": 166.3582089552239, "grad_norm": 1.3278251886367798, "learning_rate": 9.652116402116402e-06, "loss": 39.5196, "step": 6987 }, { "epoch": 166.3820895522388, "grad_norm": 1.3360018730163574, "learning_rate": 9.650793650793652e-06, "loss": 38.9576, "step": 6988 }, { "epoch": 166.40597014925373, "grad_norm": 1.369584560394287, "learning_rate": 9.649470899470901e-06, "loss": 38.0433, "step": 6989 }, { "epoch": 166.42985074626867, "grad_norm": 1.382920503616333, "learning_rate": 9.64814814814815e-06, "loss": 38.5275, "step": 6990 }, { "epoch": 166.45373134328358, "grad_norm": 1.3460105657577515, "learning_rate": 9.646825396825397e-06, "loss": 40.767, "step": 6991 }, { "epoch": 166.47761194029852, "grad_norm": 1.3411517143249512, "learning_rate": 9.645502645502647e-06, "loss": 39.326, "step": 6992 }, { "epoch": 166.50149253731342, "grad_norm": 1.3498022556304932, "learning_rate": 9.644179894179895e-06, "loss": 39.6927, "step": 6993 }, { "epoch": 166.52537313432836, "grad_norm": 1.3013488054275513, "learning_rate": 9.642857142857144e-06, "loss": 39.5556, "step": 6994 }, { "epoch": 166.54925373134327, "grad_norm": 1.3672395944595337, "learning_rate": 9.641534391534392e-06, "loss": 40.6455, "step": 6995 }, { "epoch": 166.5731343283582, "grad_norm": 1.3663015365600586, "learning_rate": 9.64021164021164e-06, "loss": 39.2814, "step": 6996 }, { "epoch": 166.59701492537314, "grad_norm": 1.3533947467803955, "learning_rate": 9.63888888888889e-06, "loss": 39.2606, "step": 6997 }, { "epoch": 166.62089552238805, "grad_norm": 1.3200322389602661, "learning_rate": 9.63756613756614e-06, "loss": 40.1535, "step": 6998 }, { "epoch": 166.644776119403, "grad_norm": 1.3595837354660034, "learning_rate": 9.636243386243387e-06, "loss": 39.8342, "step": 6999 }, { "epoch": 166.6686567164179, "grad_norm": 1.3496931791305542, "learning_rate": 9.634920634920637e-06, "loss": 40.5993, "step": 7000 }, { "epoch": 166.69253731343284, "grad_norm": 1.3617490530014038, "learning_rate": 9.633597883597885e-06, "loss": 40.3263, "step": 7001 }, { "epoch": 166.71641791044777, "grad_norm": 1.3269988298416138, "learning_rate": 9.632275132275132e-06, "loss": 40.4448, "step": 7002 }, { "epoch": 166.74029850746268, "grad_norm": 1.3891942501068115, "learning_rate": 9.630952380952382e-06, "loss": 40.0166, "step": 7003 }, { "epoch": 166.76417910447762, "grad_norm": 1.337205410003662, "learning_rate": 9.62962962962963e-06, "loss": 39.864, "step": 7004 }, { "epoch": 166.78805970149253, "grad_norm": 1.3462777137756348, "learning_rate": 9.62830687830688e-06, "loss": 41.321, "step": 7005 }, { "epoch": 166.81194029850747, "grad_norm": 1.3246110677719116, "learning_rate": 9.626984126984127e-06, "loss": 40.9173, "step": 7006 }, { "epoch": 166.83582089552237, "grad_norm": 1.369049310684204, "learning_rate": 9.625661375661375e-06, "loss": 39.2198, "step": 7007 }, { "epoch": 166.8597014925373, "grad_norm": 1.3217442035675049, "learning_rate": 9.624338624338625e-06, "loss": 41.0203, "step": 7008 }, { "epoch": 166.88358208955225, "grad_norm": 1.3401697874069214, "learning_rate": 9.623015873015875e-06, "loss": 40.3809, "step": 7009 }, { "epoch": 166.90746268656716, "grad_norm": 1.3532962799072266, "learning_rate": 9.621693121693122e-06, "loss": 38.487, "step": 7010 }, { "epoch": 166.9313432835821, "grad_norm": 1.3789747953414917, "learning_rate": 9.62037037037037e-06, "loss": 39.9497, "step": 7011 }, { "epoch": 166.955223880597, "grad_norm": 1.423011302947998, "learning_rate": 9.61904761904762e-06, "loss": 38.7952, "step": 7012 }, { "epoch": 166.97910447761194, "grad_norm": 1.354900598526001, "learning_rate": 9.617724867724868e-06, "loss": 39.1116, "step": 7013 }, { "epoch": 167.0, "grad_norm": 1.3092418909072876, "learning_rate": 9.616402116402117e-06, "loss": 34.7259, "step": 7014 }, { "epoch": 167.02388059701494, "grad_norm": 1.3559030294418335, "learning_rate": 9.615079365079365e-06, "loss": 39.0737, "step": 7015 }, { "epoch": 167.04776119402985, "grad_norm": 1.329548716545105, "learning_rate": 9.613756613756613e-06, "loss": 39.4389, "step": 7016 }, { "epoch": 167.07164179104478, "grad_norm": 1.390742301940918, "learning_rate": 9.612433862433863e-06, "loss": 39.6407, "step": 7017 }, { "epoch": 167.0955223880597, "grad_norm": 1.3454971313476562, "learning_rate": 9.611111111111112e-06, "loss": 40.6573, "step": 7018 }, { "epoch": 167.11940298507463, "grad_norm": 1.3405733108520508, "learning_rate": 9.60978835978836e-06, "loss": 39.7179, "step": 7019 }, { "epoch": 167.14328358208957, "grad_norm": 1.3281277418136597, "learning_rate": 9.60846560846561e-06, "loss": 39.6091, "step": 7020 }, { "epoch": 167.16716417910447, "grad_norm": 1.4051810503005981, "learning_rate": 9.607142857142858e-06, "loss": 39.8035, "step": 7021 }, { "epoch": 167.1910447761194, "grad_norm": 1.3172636032104492, "learning_rate": 9.605820105820106e-06, "loss": 38.1107, "step": 7022 }, { "epoch": 167.21492537313432, "grad_norm": 1.3670603036880493, "learning_rate": 9.604497354497355e-06, "loss": 38.7103, "step": 7023 }, { "epoch": 167.23880597014926, "grad_norm": 1.378203272819519, "learning_rate": 9.603174603174605e-06, "loss": 39.88, "step": 7024 }, { "epoch": 167.26268656716417, "grad_norm": 1.3266545534133911, "learning_rate": 9.601851851851853e-06, "loss": 40.3703, "step": 7025 }, { "epoch": 167.2865671641791, "grad_norm": 1.3473185300827026, "learning_rate": 9.6005291005291e-06, "loss": 38.7424, "step": 7026 }, { "epoch": 167.31044776119404, "grad_norm": 1.323764443397522, "learning_rate": 9.59920634920635e-06, "loss": 40.9287, "step": 7027 }, { "epoch": 167.33432835820895, "grad_norm": 1.3710359334945679, "learning_rate": 9.597883597883598e-06, "loss": 41.2086, "step": 7028 }, { "epoch": 167.3582089552239, "grad_norm": 1.3750139474868774, "learning_rate": 9.596560846560848e-06, "loss": 38.5904, "step": 7029 }, { "epoch": 167.3820895522388, "grad_norm": 1.3192567825317383, "learning_rate": 9.595238095238096e-06, "loss": 39.3472, "step": 7030 }, { "epoch": 167.40597014925373, "grad_norm": 1.322412371635437, "learning_rate": 9.593915343915344e-06, "loss": 39.9354, "step": 7031 }, { "epoch": 167.42985074626867, "grad_norm": 1.3642640113830566, "learning_rate": 9.592592592592593e-06, "loss": 40.5585, "step": 7032 }, { "epoch": 167.45373134328358, "grad_norm": 1.3717423677444458, "learning_rate": 9.591269841269843e-06, "loss": 39.5879, "step": 7033 }, { "epoch": 167.47761194029852, "grad_norm": 1.344419240951538, "learning_rate": 9.58994708994709e-06, "loss": 40.0879, "step": 7034 }, { "epoch": 167.50149253731342, "grad_norm": 1.3486324548721313, "learning_rate": 9.588624338624339e-06, "loss": 39.463, "step": 7035 }, { "epoch": 167.52537313432836, "grad_norm": 1.3633627891540527, "learning_rate": 9.587301587301588e-06, "loss": 38.7203, "step": 7036 }, { "epoch": 167.54925373134327, "grad_norm": 1.3374042510986328, "learning_rate": 9.585978835978836e-06, "loss": 40.1283, "step": 7037 }, { "epoch": 167.5731343283582, "grad_norm": 1.4064818620681763, "learning_rate": 9.584656084656086e-06, "loss": 38.3532, "step": 7038 }, { "epoch": 167.59701492537314, "grad_norm": 1.3595378398895264, "learning_rate": 9.583333333333335e-06, "loss": 40.8573, "step": 7039 }, { "epoch": 167.62089552238805, "grad_norm": 1.3526853322982788, "learning_rate": 9.582010582010583e-06, "loss": 39.7884, "step": 7040 }, { "epoch": 167.644776119403, "grad_norm": 1.3426052331924438, "learning_rate": 9.580687830687831e-06, "loss": 41.2005, "step": 7041 }, { "epoch": 167.6686567164179, "grad_norm": 1.3682364225387573, "learning_rate": 9.57936507936508e-06, "loss": 40.0048, "step": 7042 }, { "epoch": 167.69253731343284, "grad_norm": 1.3397588729858398, "learning_rate": 9.578042328042329e-06, "loss": 39.8863, "step": 7043 }, { "epoch": 167.71641791044777, "grad_norm": 1.4009288549423218, "learning_rate": 9.576719576719578e-06, "loss": 38.3545, "step": 7044 }, { "epoch": 167.74029850746268, "grad_norm": 1.3808497190475464, "learning_rate": 9.575396825396826e-06, "loss": 40.0806, "step": 7045 }, { "epoch": 167.76417910447762, "grad_norm": 1.3357704877853394, "learning_rate": 9.574074074074074e-06, "loss": 40.8193, "step": 7046 }, { "epoch": 167.78805970149253, "grad_norm": 1.3432787656784058, "learning_rate": 9.572751322751324e-06, "loss": 40.1928, "step": 7047 }, { "epoch": 167.81194029850747, "grad_norm": 1.3016505241394043, "learning_rate": 9.571428571428573e-06, "loss": 41.7644, "step": 7048 }, { "epoch": 167.83582089552237, "grad_norm": 1.3368052244186401, "learning_rate": 9.570105820105821e-06, "loss": 39.2175, "step": 7049 }, { "epoch": 167.8597014925373, "grad_norm": 1.3311084508895874, "learning_rate": 9.568783068783069e-06, "loss": 38.8456, "step": 7050 }, { "epoch": 167.88358208955225, "grad_norm": 1.3640724420547485, "learning_rate": 9.567460317460319e-06, "loss": 39.831, "step": 7051 }, { "epoch": 167.90746268656716, "grad_norm": 1.3592629432678223, "learning_rate": 9.566137566137567e-06, "loss": 40.4093, "step": 7052 }, { "epoch": 167.9313432835821, "grad_norm": 1.3266855478286743, "learning_rate": 9.564814814814816e-06, "loss": 40.236, "step": 7053 }, { "epoch": 167.955223880597, "grad_norm": 1.3842047452926636, "learning_rate": 9.563492063492064e-06, "loss": 39.1914, "step": 7054 }, { "epoch": 167.97910447761194, "grad_norm": 1.3062819242477417, "learning_rate": 9.562169312169312e-06, "loss": 40.3472, "step": 7055 }, { "epoch": 168.0, "grad_norm": 1.3494211435317993, "learning_rate": 9.560846560846561e-06, "loss": 34.9703, "step": 7056 }, { "epoch": 168.02388059701494, "grad_norm": 1.3233306407928467, "learning_rate": 9.559523809523811e-06, "loss": 40.8555, "step": 7057 }, { "epoch": 168.04776119402985, "grad_norm": 1.4142626523971558, "learning_rate": 9.558201058201059e-06, "loss": 41.3451, "step": 7058 }, { "epoch": 168.07164179104478, "grad_norm": 1.3779305219650269, "learning_rate": 9.556878306878309e-06, "loss": 39.8239, "step": 7059 }, { "epoch": 168.0955223880597, "grad_norm": 1.3489388227462769, "learning_rate": 9.555555555555556e-06, "loss": 40.7477, "step": 7060 }, { "epoch": 168.11940298507463, "grad_norm": 1.3324973583221436, "learning_rate": 9.554232804232804e-06, "loss": 39.258, "step": 7061 }, { "epoch": 168.14328358208957, "grad_norm": 1.344478726387024, "learning_rate": 9.552910052910054e-06, "loss": 39.076, "step": 7062 }, { "epoch": 168.16716417910447, "grad_norm": 1.3738884925842285, "learning_rate": 9.551587301587304e-06, "loss": 39.99, "step": 7063 }, { "epoch": 168.1910447761194, "grad_norm": 1.4221608638763428, "learning_rate": 9.550264550264551e-06, "loss": 38.8112, "step": 7064 }, { "epoch": 168.21492537313432, "grad_norm": 1.3458447456359863, "learning_rate": 9.5489417989418e-06, "loss": 40.5159, "step": 7065 }, { "epoch": 168.23880597014926, "grad_norm": 1.3454288244247437, "learning_rate": 9.547619047619049e-06, "loss": 40.4204, "step": 7066 }, { "epoch": 168.26268656716417, "grad_norm": 1.3390823602676392, "learning_rate": 9.546296296296297e-06, "loss": 40.8493, "step": 7067 }, { "epoch": 168.2865671641791, "grad_norm": 1.3296849727630615, "learning_rate": 9.544973544973546e-06, "loss": 40.4683, "step": 7068 }, { "epoch": 168.31044776119404, "grad_norm": 1.3632545471191406, "learning_rate": 9.543650793650794e-06, "loss": 38.8796, "step": 7069 }, { "epoch": 168.33432835820895, "grad_norm": 1.3552480936050415, "learning_rate": 9.542328042328042e-06, "loss": 39.5039, "step": 7070 }, { "epoch": 168.3582089552239, "grad_norm": 1.339033603668213, "learning_rate": 9.541005291005292e-06, "loss": 40.0749, "step": 7071 }, { "epoch": 168.3820895522388, "grad_norm": 1.3255335092544556, "learning_rate": 9.539682539682541e-06, "loss": 39.7082, "step": 7072 }, { "epoch": 168.40597014925373, "grad_norm": 1.3985430002212524, "learning_rate": 9.53835978835979e-06, "loss": 39.8512, "step": 7073 }, { "epoch": 168.42985074626867, "grad_norm": 1.3709005117416382, "learning_rate": 9.537037037037037e-06, "loss": 38.2344, "step": 7074 }, { "epoch": 168.45373134328358, "grad_norm": 1.350658893585205, "learning_rate": 9.535714285714287e-06, "loss": 39.3283, "step": 7075 }, { "epoch": 168.47761194029852, "grad_norm": 1.3166258335113525, "learning_rate": 9.534391534391535e-06, "loss": 38.9473, "step": 7076 }, { "epoch": 168.50149253731342, "grad_norm": 1.3502711057662964, "learning_rate": 9.533068783068784e-06, "loss": 39.3751, "step": 7077 }, { "epoch": 168.52537313432836, "grad_norm": 1.3733402490615845, "learning_rate": 9.531746031746032e-06, "loss": 38.7871, "step": 7078 }, { "epoch": 168.54925373134327, "grad_norm": 1.366037368774414, "learning_rate": 9.530423280423282e-06, "loss": 39.5465, "step": 7079 }, { "epoch": 168.5731343283582, "grad_norm": 1.331069827079773, "learning_rate": 9.52910052910053e-06, "loss": 40.3177, "step": 7080 }, { "epoch": 168.59701492537314, "grad_norm": 1.3913800716400146, "learning_rate": 9.527777777777778e-06, "loss": 39.3226, "step": 7081 }, { "epoch": 168.62089552238805, "grad_norm": 1.3283894062042236, "learning_rate": 9.526455026455027e-06, "loss": 40.3936, "step": 7082 }, { "epoch": 168.644776119403, "grad_norm": 1.3107869625091553, "learning_rate": 9.525132275132277e-06, "loss": 39.7354, "step": 7083 }, { "epoch": 168.6686567164179, "grad_norm": 1.396079659461975, "learning_rate": 9.523809523809525e-06, "loss": 39.8396, "step": 7084 }, { "epoch": 168.69253731343284, "grad_norm": 1.346066951751709, "learning_rate": 9.522486772486773e-06, "loss": 40.8605, "step": 7085 }, { "epoch": 168.71641791044777, "grad_norm": 1.3828232288360596, "learning_rate": 9.521164021164022e-06, "loss": 39.8928, "step": 7086 }, { "epoch": 168.74029850746268, "grad_norm": 1.3492178916931152, "learning_rate": 9.51984126984127e-06, "loss": 40.0894, "step": 7087 }, { "epoch": 168.76417910447762, "grad_norm": NaN, "learning_rate": 9.51851851851852e-06, "loss": 34.4094, "step": 7088 }, { "epoch": 168.78805970149253, "grad_norm": 1.3919861316680908, "learning_rate": 9.51851851851852e-06, "loss": 39.7423, "step": 7089 }, { "epoch": 168.81194029850747, "grad_norm": 1.3383917808532715, "learning_rate": 9.517195767195768e-06, "loss": 39.9854, "step": 7090 }, { "epoch": 168.83582089552237, "grad_norm": 1.3066655397415161, "learning_rate": 9.515873015873016e-06, "loss": 39.1918, "step": 7091 }, { "epoch": 168.8597014925373, "grad_norm": 1.3831478357315063, "learning_rate": 9.514550264550265e-06, "loss": 38.5412, "step": 7092 }, { "epoch": 168.88358208955225, "grad_norm": 1.356590747833252, "learning_rate": 9.513227513227515e-06, "loss": 40.2749, "step": 7093 }, { "epoch": 168.90746268656716, "grad_norm": 1.345308542251587, "learning_rate": 9.511904761904763e-06, "loss": 39.7523, "step": 7094 }, { "epoch": 168.9313432835821, "grad_norm": 1.3508780002593994, "learning_rate": 9.51058201058201e-06, "loss": 38.7908, "step": 7095 }, { "epoch": 168.955223880597, "grad_norm": 1.3625800609588623, "learning_rate": 9.50925925925926e-06, "loss": 40.6204, "step": 7096 }, { "epoch": 168.97910447761194, "grad_norm": 1.3752845525741577, "learning_rate": 9.507936507936508e-06, "loss": 41.0542, "step": 7097 }, { "epoch": 169.0, "grad_norm": 1.3438960313796997, "learning_rate": 9.506613756613758e-06, "loss": 34.6698, "step": 7098 }, { "epoch": 169.02388059701494, "grad_norm": 1.3486193418502808, "learning_rate": 9.505291005291006e-06, "loss": 41.4155, "step": 7099 }, { "epoch": 169.04776119402985, "grad_norm": 1.331634283065796, "learning_rate": 9.503968253968255e-06, "loss": 39.2714, "step": 7100 }, { "epoch": 169.07164179104478, "grad_norm": 1.3297326564788818, "learning_rate": 9.502645502645503e-06, "loss": 39.6652, "step": 7101 }, { "epoch": 169.0955223880597, "grad_norm": 1.3588078022003174, "learning_rate": 9.501322751322753e-06, "loss": 38.876, "step": 7102 }, { "epoch": 169.11940298507463, "grad_norm": 1.3721580505371094, "learning_rate": 9.5e-06, "loss": 38.3734, "step": 7103 }, { "epoch": 169.14328358208957, "grad_norm": 1.3384313583374023, "learning_rate": 9.49867724867725e-06, "loss": 40.8429, "step": 7104 }, { "epoch": 169.16716417910447, "grad_norm": 1.3668956756591797, "learning_rate": 9.497354497354498e-06, "loss": 40.9638, "step": 7105 }, { "epoch": 169.1910447761194, "grad_norm": 1.3079472780227661, "learning_rate": 9.496031746031746e-06, "loss": 38.8985, "step": 7106 }, { "epoch": 169.21492537313432, "grad_norm": 1.334763526916504, "learning_rate": 9.494708994708996e-06, "loss": 39.4778, "step": 7107 }, { "epoch": 169.23880597014926, "grad_norm": 1.3430068492889404, "learning_rate": 9.493386243386245e-06, "loss": 39.8384, "step": 7108 }, { "epoch": 169.26268656716417, "grad_norm": 1.3613860607147217, "learning_rate": 9.492063492063493e-06, "loss": 39.8486, "step": 7109 }, { "epoch": 169.2865671641791, "grad_norm": 1.3685253858566284, "learning_rate": 9.490740740740741e-06, "loss": 40.4534, "step": 7110 }, { "epoch": 169.31044776119404, "grad_norm": 1.3537236452102661, "learning_rate": 9.48941798941799e-06, "loss": 39.607, "step": 7111 }, { "epoch": 169.33432835820895, "grad_norm": 1.3147629499435425, "learning_rate": 9.488095238095238e-06, "loss": 38.3008, "step": 7112 }, { "epoch": 169.3582089552239, "grad_norm": 1.3524067401885986, "learning_rate": 9.486772486772488e-06, "loss": 39.8697, "step": 7113 }, { "epoch": 169.3820895522388, "grad_norm": 1.357541561126709, "learning_rate": 9.485449735449736e-06, "loss": 39.9783, "step": 7114 }, { "epoch": 169.40597014925373, "grad_norm": 1.3490022420883179, "learning_rate": 9.484126984126984e-06, "loss": 40.2126, "step": 7115 }, { "epoch": 169.42985074626867, "grad_norm": 1.352953553199768, "learning_rate": 9.482804232804233e-06, "loss": 37.7046, "step": 7116 }, { "epoch": 169.45373134328358, "grad_norm": 1.3989671468734741, "learning_rate": 9.481481481481483e-06, "loss": 40.0015, "step": 7117 }, { "epoch": 169.47761194029852, "grad_norm": 1.3343031406402588, "learning_rate": 9.480158730158731e-06, "loss": 38.6188, "step": 7118 }, { "epoch": 169.50149253731342, "grad_norm": 1.3417316675186157, "learning_rate": 9.478835978835979e-06, "loss": 39.5876, "step": 7119 }, { "epoch": 169.52537313432836, "grad_norm": 1.3748295307159424, "learning_rate": 9.477513227513228e-06, "loss": 38.3695, "step": 7120 }, { "epoch": 169.54925373134327, "grad_norm": 1.3688911199569702, "learning_rate": 9.476190476190476e-06, "loss": 40.6121, "step": 7121 }, { "epoch": 169.5731343283582, "grad_norm": 1.3369336128234863, "learning_rate": 9.474867724867726e-06, "loss": 38.7368, "step": 7122 }, { "epoch": 169.59701492537314, "grad_norm": 1.318602442741394, "learning_rate": 9.473544973544975e-06, "loss": 38.5565, "step": 7123 }, { "epoch": 169.62089552238805, "grad_norm": 1.3630393743515015, "learning_rate": 9.472222222222223e-06, "loss": 38.5573, "step": 7124 }, { "epoch": 169.644776119403, "grad_norm": 1.3206722736358643, "learning_rate": 9.470899470899471e-06, "loss": 39.4992, "step": 7125 }, { "epoch": 169.6686567164179, "grad_norm": 1.3590387105941772, "learning_rate": 9.469576719576721e-06, "loss": 40.608, "step": 7126 }, { "epoch": 169.69253731343284, "grad_norm": 1.3363189697265625, "learning_rate": 9.468253968253969e-06, "loss": 40.7754, "step": 7127 }, { "epoch": 169.71641791044777, "grad_norm": 1.3422224521636963, "learning_rate": 9.466931216931218e-06, "loss": 39.1074, "step": 7128 }, { "epoch": 169.74029850746268, "grad_norm": 1.37155282497406, "learning_rate": 9.465608465608466e-06, "loss": 41.2402, "step": 7129 }, { "epoch": 169.76417910447762, "grad_norm": 1.3153523206710815, "learning_rate": 9.464285714285714e-06, "loss": 40.0884, "step": 7130 }, { "epoch": 169.78805970149253, "grad_norm": 1.301551342010498, "learning_rate": 9.462962962962964e-06, "loss": 40.3491, "step": 7131 }, { "epoch": 169.81194029850747, "grad_norm": 1.3921793699264526, "learning_rate": 9.461640211640213e-06, "loss": 40.3347, "step": 7132 }, { "epoch": 169.83582089552237, "grad_norm": 1.3138710260391235, "learning_rate": 9.460317460317461e-06, "loss": 40.2208, "step": 7133 }, { "epoch": 169.8597014925373, "grad_norm": 1.3605502843856812, "learning_rate": 9.45899470899471e-06, "loss": 40.209, "step": 7134 }, { "epoch": 169.88358208955225, "grad_norm": 1.3803596496582031, "learning_rate": 9.457671957671959e-06, "loss": 40.1368, "step": 7135 }, { "epoch": 169.90746268656716, "grad_norm": 1.3509975671768188, "learning_rate": 9.456349206349207e-06, "loss": 40.2833, "step": 7136 }, { "epoch": 169.9313432835821, "grad_norm": 1.3689559698104858, "learning_rate": 9.455026455026456e-06, "loss": 40.634, "step": 7137 }, { "epoch": 169.955223880597, "grad_norm": 1.3937246799468994, "learning_rate": 9.453703703703704e-06, "loss": 39.7271, "step": 7138 }, { "epoch": 169.97910447761194, "grad_norm": 1.3915613889694214, "learning_rate": 9.452380952380952e-06, "loss": 40.7597, "step": 7139 }, { "epoch": 170.0, "grad_norm": 1.3330239057540894, "learning_rate": 9.451058201058202e-06, "loss": 35.1925, "step": 7140 }, { "epoch": 170.02388059701494, "grad_norm": 1.3262258768081665, "learning_rate": 9.449735449735451e-06, "loss": 40.8172, "step": 7141 }, { "epoch": 170.04776119402985, "grad_norm": 1.396565318107605, "learning_rate": 9.4484126984127e-06, "loss": 37.8169, "step": 7142 }, { "epoch": 170.07164179104478, "grad_norm": 1.3543195724487305, "learning_rate": 9.447089947089949e-06, "loss": 40.6836, "step": 7143 }, { "epoch": 170.0955223880597, "grad_norm": 1.3194537162780762, "learning_rate": 9.445767195767197e-06, "loss": 40.4921, "step": 7144 }, { "epoch": 170.11940298507463, "grad_norm": 1.3341922760009766, "learning_rate": 9.444444444444445e-06, "loss": 39.7737, "step": 7145 }, { "epoch": 170.14328358208957, "grad_norm": 1.3740530014038086, "learning_rate": 9.443121693121694e-06, "loss": 40.0377, "step": 7146 }, { "epoch": 170.16716417910447, "grad_norm": 1.3599907159805298, "learning_rate": 9.441798941798944e-06, "loss": 40.0189, "step": 7147 }, { "epoch": 170.1910447761194, "grad_norm": 1.3085746765136719, "learning_rate": 9.440476190476192e-06, "loss": 39.8579, "step": 7148 }, { "epoch": 170.21492537313432, "grad_norm": 1.3401015996932983, "learning_rate": 9.43915343915344e-06, "loss": 40.762, "step": 7149 }, { "epoch": 170.23880597014926, "grad_norm": 1.3249237537384033, "learning_rate": 9.437830687830689e-06, "loss": 39.0433, "step": 7150 }, { "epoch": 170.26268656716417, "grad_norm": 1.3491779565811157, "learning_rate": 9.436507936507937e-06, "loss": 38.5821, "step": 7151 }, { "epoch": 170.2865671641791, "grad_norm": 1.341207504272461, "learning_rate": 9.435185185185187e-06, "loss": 39.0287, "step": 7152 }, { "epoch": 170.31044776119404, "grad_norm": 1.3572359085083008, "learning_rate": 9.433862433862435e-06, "loss": 39.3265, "step": 7153 }, { "epoch": 170.33432835820895, "grad_norm": 1.3418549299240112, "learning_rate": 9.432539682539682e-06, "loss": 40.6952, "step": 7154 }, { "epoch": 170.3582089552239, "grad_norm": 1.355676531791687, "learning_rate": 9.431216931216932e-06, "loss": 39.3371, "step": 7155 }, { "epoch": 170.3820895522388, "grad_norm": 1.3331791162490845, "learning_rate": 9.429894179894182e-06, "loss": 39.6698, "step": 7156 }, { "epoch": 170.40597014925373, "grad_norm": 1.364210844039917, "learning_rate": 9.42857142857143e-06, "loss": 40.311, "step": 7157 }, { "epoch": 170.42985074626867, "grad_norm": 1.3581721782684326, "learning_rate": 9.427248677248677e-06, "loss": 39.5021, "step": 7158 }, { "epoch": 170.45373134328358, "grad_norm": 1.344396710395813, "learning_rate": 9.425925925925925e-06, "loss": 38.3445, "step": 7159 }, { "epoch": 170.47761194029852, "grad_norm": 1.361926555633545, "learning_rate": 9.424603174603175e-06, "loss": 38.9324, "step": 7160 }, { "epoch": 170.50149253731342, "grad_norm": 1.3381565809249878, "learning_rate": 9.423280423280425e-06, "loss": 39.5553, "step": 7161 }, { "epoch": 170.52537313432836, "grad_norm": 1.311760663986206, "learning_rate": 9.421957671957672e-06, "loss": 39.3498, "step": 7162 }, { "epoch": 170.54925373134327, "grad_norm": 1.3610490560531616, "learning_rate": 9.420634920634922e-06, "loss": 39.9446, "step": 7163 }, { "epoch": 170.5731343283582, "grad_norm": 1.3327308893203735, "learning_rate": 9.41931216931217e-06, "loss": 40.5661, "step": 7164 }, { "epoch": 170.59701492537314, "grad_norm": 1.3453307151794434, "learning_rate": 9.417989417989418e-06, "loss": 39.4192, "step": 7165 }, { "epoch": 170.62089552238805, "grad_norm": 1.3489744663238525, "learning_rate": 9.416666666666667e-06, "loss": 39.6081, "step": 7166 }, { "epoch": 170.644776119403, "grad_norm": 1.3632079362869263, "learning_rate": 9.415343915343917e-06, "loss": 40.4695, "step": 7167 }, { "epoch": 170.6686567164179, "grad_norm": 1.3469113111495972, "learning_rate": 9.414021164021165e-06, "loss": 39.1772, "step": 7168 }, { "epoch": 170.69253731343284, "grad_norm": 1.34419846534729, "learning_rate": 9.412698412698413e-06, "loss": 40.6199, "step": 7169 }, { "epoch": 170.71641791044777, "grad_norm": 1.3465518951416016, "learning_rate": 9.411375661375662e-06, "loss": 39.9124, "step": 7170 }, { "epoch": 170.74029850746268, "grad_norm": 1.3518483638763428, "learning_rate": 9.41005291005291e-06, "loss": 39.876, "step": 7171 }, { "epoch": 170.76417910447762, "grad_norm": 1.3460254669189453, "learning_rate": 9.40873015873016e-06, "loss": 41.0544, "step": 7172 }, { "epoch": 170.78805970149253, "grad_norm": 1.380447268486023, "learning_rate": 9.407407407407408e-06, "loss": 40.2536, "step": 7173 }, { "epoch": 170.81194029850747, "grad_norm": 1.3598554134368896, "learning_rate": 9.406084656084656e-06, "loss": 40.527, "step": 7174 }, { "epoch": 170.83582089552237, "grad_norm": 1.3470475673675537, "learning_rate": 9.404761904761905e-06, "loss": 40.0275, "step": 7175 }, { "epoch": 170.8597014925373, "grad_norm": 1.3463116884231567, "learning_rate": 9.403439153439155e-06, "loss": 39.0637, "step": 7176 }, { "epoch": 170.88358208955225, "grad_norm": 1.331470251083374, "learning_rate": 9.402116402116403e-06, "loss": 40.641, "step": 7177 }, { "epoch": 170.90746268656716, "grad_norm": 1.3633310794830322, "learning_rate": 9.40079365079365e-06, "loss": 40.2795, "step": 7178 }, { "epoch": 170.9313432835821, "grad_norm": 1.305858850479126, "learning_rate": 9.3994708994709e-06, "loss": 40.243, "step": 7179 }, { "epoch": 170.955223880597, "grad_norm": 1.3637527227401733, "learning_rate": 9.398148148148148e-06, "loss": 39.0004, "step": 7180 }, { "epoch": 170.97910447761194, "grad_norm": 1.3380577564239502, "learning_rate": 9.396825396825398e-06, "loss": 38.4645, "step": 7181 }, { "epoch": 171.0, "grad_norm": 1.329349398612976, "learning_rate": 9.395502645502647e-06, "loss": 34.6676, "step": 7182 }, { "epoch": 171.02388059701494, "grad_norm": 1.319063425064087, "learning_rate": 9.394179894179895e-06, "loss": 39.6201, "step": 7183 }, { "epoch": 171.04776119402985, "grad_norm": 1.328701138496399, "learning_rate": 9.392857142857143e-06, "loss": 39.563, "step": 7184 }, { "epoch": 171.07164179104478, "grad_norm": 1.3429590463638306, "learning_rate": 9.391534391534393e-06, "loss": 38.74, "step": 7185 }, { "epoch": 171.0955223880597, "grad_norm": 1.3411667346954346, "learning_rate": 9.39021164021164e-06, "loss": 40.7305, "step": 7186 }, { "epoch": 171.11940298507463, "grad_norm": 1.4476734399795532, "learning_rate": 9.38888888888889e-06, "loss": 40.059, "step": 7187 }, { "epoch": 171.14328358208957, "grad_norm": 1.341245412826538, "learning_rate": 9.387566137566138e-06, "loss": 39.1336, "step": 7188 }, { "epoch": 171.16716417910447, "grad_norm": 1.3224161863327026, "learning_rate": 9.386243386243386e-06, "loss": 40.2197, "step": 7189 }, { "epoch": 171.1910447761194, "grad_norm": 1.3007460832595825, "learning_rate": 9.384920634920636e-06, "loss": 39.1822, "step": 7190 }, { "epoch": 171.21492537313432, "grad_norm": 1.3537617921829224, "learning_rate": 9.383597883597885e-06, "loss": 40.5367, "step": 7191 }, { "epoch": 171.23880597014926, "grad_norm": 1.3276145458221436, "learning_rate": 9.382275132275133e-06, "loss": 39.7707, "step": 7192 }, { "epoch": 171.26268656716417, "grad_norm": 1.339526891708374, "learning_rate": 9.380952380952381e-06, "loss": 40.1924, "step": 7193 }, { "epoch": 171.2865671641791, "grad_norm": 1.3344089984893799, "learning_rate": 9.37962962962963e-06, "loss": 40.3606, "step": 7194 }, { "epoch": 171.31044776119404, "grad_norm": 1.4000035524368286, "learning_rate": 9.378306878306879e-06, "loss": 39.8373, "step": 7195 }, { "epoch": 171.33432835820895, "grad_norm": 1.3877160549163818, "learning_rate": 9.376984126984128e-06, "loss": 40.2461, "step": 7196 }, { "epoch": 171.3582089552239, "grad_norm": 1.340790033340454, "learning_rate": 9.375661375661376e-06, "loss": 38.5727, "step": 7197 }, { "epoch": 171.3820895522388, "grad_norm": 1.3561476469039917, "learning_rate": 9.374338624338624e-06, "loss": 39.7051, "step": 7198 }, { "epoch": 171.40597014925373, "grad_norm": 1.361050009727478, "learning_rate": 9.373015873015874e-06, "loss": 39.9437, "step": 7199 }, { "epoch": 171.42985074626867, "grad_norm": 1.3544739484786987, "learning_rate": 9.371693121693123e-06, "loss": 39.8629, "step": 7200 }, { "epoch": 171.45373134328358, "grad_norm": 1.345081090927124, "learning_rate": 9.370370370370371e-06, "loss": 38.7938, "step": 7201 }, { "epoch": 171.47761194029852, "grad_norm": 1.3780359029769897, "learning_rate": 9.36904761904762e-06, "loss": 39.8709, "step": 7202 }, { "epoch": 171.50149253731342, "grad_norm": 1.3390543460845947, "learning_rate": 9.367724867724869e-06, "loss": 39.9079, "step": 7203 }, { "epoch": 171.52537313432836, "grad_norm": 1.3201690912246704, "learning_rate": 9.366402116402116e-06, "loss": 39.5587, "step": 7204 }, { "epoch": 171.54925373134327, "grad_norm": 1.369909405708313, "learning_rate": 9.365079365079366e-06, "loss": 40.2892, "step": 7205 }, { "epoch": 171.5731343283582, "grad_norm": 1.318674087524414, "learning_rate": 9.363756613756616e-06, "loss": 40.0369, "step": 7206 }, { "epoch": 171.59701492537314, "grad_norm": NaN, "learning_rate": 9.362433862433864e-06, "loss": 55.3686, "step": 7207 }, { "epoch": 171.62089552238805, "grad_norm": 1.3507757186889648, "learning_rate": 9.362433862433864e-06, "loss": 40.3836, "step": 7208 }, { "epoch": 171.644776119403, "grad_norm": 1.3558305501937866, "learning_rate": 9.361111111111111e-06, "loss": 39.0671, "step": 7209 }, { "epoch": 171.6686567164179, "grad_norm": 1.3233661651611328, "learning_rate": 9.359788359788361e-06, "loss": 39.0614, "step": 7210 }, { "epoch": 171.69253731343284, "grad_norm": 1.3654851913452148, "learning_rate": 9.358465608465609e-06, "loss": 39.5419, "step": 7211 }, { "epoch": 171.71641791044777, "grad_norm": 1.3195794820785522, "learning_rate": 9.357142857142859e-06, "loss": 40.6874, "step": 7212 }, { "epoch": 171.74029850746268, "grad_norm": 1.3735674619674683, "learning_rate": 9.355820105820106e-06, "loss": 40.5782, "step": 7213 }, { "epoch": 171.76417910447762, "grad_norm": 1.3381919860839844, "learning_rate": 9.354497354497354e-06, "loss": 40.5762, "step": 7214 }, { "epoch": 171.78805970149253, "grad_norm": 1.3386741876602173, "learning_rate": 9.353174603174604e-06, "loss": 39.9861, "step": 7215 }, { "epoch": 171.81194029850747, "grad_norm": 1.3468060493469238, "learning_rate": 9.351851851851854e-06, "loss": 39.0176, "step": 7216 }, { "epoch": 171.83582089552237, "grad_norm": 1.3305038213729858, "learning_rate": 9.350529100529101e-06, "loss": 39.478, "step": 7217 }, { "epoch": 171.8597014925373, "grad_norm": 1.3390151262283325, "learning_rate": 9.34920634920635e-06, "loss": 38.4343, "step": 7218 }, { "epoch": 171.88358208955225, "grad_norm": 1.3568507432937622, "learning_rate": 9.347883597883599e-06, "loss": 39.5568, "step": 7219 }, { "epoch": 171.90746268656716, "grad_norm": 1.3196043968200684, "learning_rate": 9.346560846560847e-06, "loss": 40.2839, "step": 7220 }, { "epoch": 171.9313432835821, "grad_norm": 1.3547987937927246, "learning_rate": 9.345238095238096e-06, "loss": 39.4396, "step": 7221 }, { "epoch": 171.955223880597, "grad_norm": 1.3638144731521606, "learning_rate": 9.343915343915344e-06, "loss": 39.3734, "step": 7222 }, { "epoch": 171.97910447761194, "grad_norm": 1.6199836730957031, "learning_rate": 9.342592592592594e-06, "loss": 39.2949, "step": 7223 }, { "epoch": 172.0, "grad_norm": 1.3820313215255737, "learning_rate": 9.341269841269842e-06, "loss": 35.3657, "step": 7224 }, { "epoch": 172.02388059701494, "grad_norm": 1.3362001180648804, "learning_rate": 9.339947089947091e-06, "loss": 40.426, "step": 7225 }, { "epoch": 172.04776119402985, "grad_norm": 1.326817512512207, "learning_rate": 9.33862433862434e-06, "loss": 39.6416, "step": 7226 }, { "epoch": 172.07164179104478, "grad_norm": 1.3397701978683472, "learning_rate": 9.337301587301589e-06, "loss": 38.719, "step": 7227 }, { "epoch": 172.0955223880597, "grad_norm": NaN, "learning_rate": 9.335978835978837e-06, "loss": 64.7968, "step": 7228 }, { "epoch": 172.11940298507463, "grad_norm": 1.331685185432434, "learning_rate": 9.335978835978837e-06, "loss": 40.4221, "step": 7229 }, { "epoch": 172.14328358208957, "grad_norm": 1.3680771589279175, "learning_rate": 9.334656084656085e-06, "loss": 39.9626, "step": 7230 }, { "epoch": 172.16716417910447, "grad_norm": 1.3584150075912476, "learning_rate": 9.333333333333334e-06, "loss": 40.0321, "step": 7231 }, { "epoch": 172.1910447761194, "grad_norm": 1.3385413885116577, "learning_rate": 9.332010582010584e-06, "loss": 39.6497, "step": 7232 }, { "epoch": 172.21492537313432, "grad_norm": 1.3601174354553223, "learning_rate": 9.330687830687832e-06, "loss": 39.1492, "step": 7233 }, { "epoch": 172.23880597014926, "grad_norm": 1.3738369941711426, "learning_rate": 9.32936507936508e-06, "loss": 40.9252, "step": 7234 }, { "epoch": 172.26268656716417, "grad_norm": 1.3309648036956787, "learning_rate": 9.32804232804233e-06, "loss": 39.1207, "step": 7235 }, { "epoch": 172.2865671641791, "grad_norm": 1.368870735168457, "learning_rate": 9.326719576719577e-06, "loss": 39.5406, "step": 7236 }, { "epoch": 172.31044776119404, "grad_norm": 1.3580938577651978, "learning_rate": 9.325396825396827e-06, "loss": 38.7666, "step": 7237 }, { "epoch": 172.33432835820895, "grad_norm": 1.3404444456100464, "learning_rate": 9.324074074074075e-06, "loss": 39.2625, "step": 7238 }, { "epoch": 172.3582089552239, "grad_norm": 1.3336524963378906, "learning_rate": 9.322751322751323e-06, "loss": 39.6929, "step": 7239 }, { "epoch": 172.3820895522388, "grad_norm": 1.3280375003814697, "learning_rate": 9.321428571428572e-06, "loss": 40.2371, "step": 7240 }, { "epoch": 172.40597014925373, "grad_norm": 1.353670358657837, "learning_rate": 9.32010582010582e-06, "loss": 38.8586, "step": 7241 }, { "epoch": 172.42985074626867, "grad_norm": 1.3460719585418701, "learning_rate": 9.31878306878307e-06, "loss": 40.1038, "step": 7242 }, { "epoch": 172.45373134328358, "grad_norm": 1.3371565341949463, "learning_rate": 9.317460317460318e-06, "loss": 39.4123, "step": 7243 }, { "epoch": 172.47761194029852, "grad_norm": 1.3506194353103638, "learning_rate": 9.316137566137567e-06, "loss": 39.6114, "step": 7244 }, { "epoch": 172.50149253731342, "grad_norm": 1.350117564201355, "learning_rate": 9.314814814814815e-06, "loss": 38.8367, "step": 7245 }, { "epoch": 172.52537313432836, "grad_norm": 1.3830896615982056, "learning_rate": 9.313492063492065e-06, "loss": 38.9254, "step": 7246 }, { "epoch": 172.54925373134327, "grad_norm": 1.3733288049697876, "learning_rate": 9.312169312169313e-06, "loss": 39.3469, "step": 7247 }, { "epoch": 172.5731343283582, "grad_norm": 1.3439626693725586, "learning_rate": 9.310846560846562e-06, "loss": 39.2686, "step": 7248 }, { "epoch": 172.59701492537314, "grad_norm": 1.2914012670516968, "learning_rate": 9.30952380952381e-06, "loss": 40.6579, "step": 7249 }, { "epoch": 172.62089552238805, "grad_norm": 1.3595120906829834, "learning_rate": 9.308201058201058e-06, "loss": 40.0792, "step": 7250 }, { "epoch": 172.644776119403, "grad_norm": 1.3526846170425415, "learning_rate": 9.306878306878308e-06, "loss": 40.7855, "step": 7251 }, { "epoch": 172.6686567164179, "grad_norm": 1.3401552438735962, "learning_rate": 9.305555555555557e-06, "loss": 40.2772, "step": 7252 }, { "epoch": 172.69253731343284, "grad_norm": 1.3086196184158325, "learning_rate": 9.304232804232805e-06, "loss": 40.7645, "step": 7253 }, { "epoch": 172.71641791044777, "grad_norm": 1.348833680152893, "learning_rate": 9.302910052910053e-06, "loss": 38.2582, "step": 7254 }, { "epoch": 172.74029850746268, "grad_norm": 1.3486543893814087, "learning_rate": 9.301587301587303e-06, "loss": 40.0919, "step": 7255 }, { "epoch": 172.76417910447762, "grad_norm": 1.3650239706039429, "learning_rate": 9.30026455026455e-06, "loss": 38.9888, "step": 7256 }, { "epoch": 172.78805970149253, "grad_norm": 1.372307538986206, "learning_rate": 9.2989417989418e-06, "loss": 40.0086, "step": 7257 }, { "epoch": 172.81194029850747, "grad_norm": 1.3410948514938354, "learning_rate": 9.297619047619048e-06, "loss": 39.7721, "step": 7258 }, { "epoch": 172.83582089552237, "grad_norm": 1.3908497095108032, "learning_rate": 9.296296296296296e-06, "loss": 40.1436, "step": 7259 }, { "epoch": 172.8597014925373, "grad_norm": 1.34599769115448, "learning_rate": 9.294973544973545e-06, "loss": 40.947, "step": 7260 }, { "epoch": 172.88358208955225, "grad_norm": 1.3636701107025146, "learning_rate": 9.293650793650795e-06, "loss": 39.87, "step": 7261 }, { "epoch": 172.90746268656716, "grad_norm": 1.3469781875610352, "learning_rate": 9.292328042328043e-06, "loss": 40.0445, "step": 7262 }, { "epoch": 172.9313432835821, "grad_norm": 1.3575636148452759, "learning_rate": 9.291005291005291e-06, "loss": 39.8911, "step": 7263 }, { "epoch": 172.955223880597, "grad_norm": 1.3286575078964233, "learning_rate": 9.28968253968254e-06, "loss": 40.1545, "step": 7264 }, { "epoch": 172.97910447761194, "grad_norm": 1.355547547340393, "learning_rate": 9.288359788359788e-06, "loss": 39.4368, "step": 7265 }, { "epoch": 173.0, "grad_norm": 1.3618028163909912, "learning_rate": 9.287037037037038e-06, "loss": 35.3863, "step": 7266 }, { "epoch": 173.02388059701494, "grad_norm": 1.3261897563934326, "learning_rate": 9.285714285714288e-06, "loss": 39.5407, "step": 7267 }, { "epoch": 173.04776119402985, "grad_norm": 1.3454641103744507, "learning_rate": 9.284391534391535e-06, "loss": 40.063, "step": 7268 }, { "epoch": 173.07164179104478, "grad_norm": 1.3404861688613892, "learning_rate": 9.283068783068783e-06, "loss": 40.3042, "step": 7269 }, { "epoch": 173.0955223880597, "grad_norm": 1.34608793258667, "learning_rate": 9.281746031746033e-06, "loss": 38.6923, "step": 7270 }, { "epoch": 173.11940298507463, "grad_norm": 1.3655990362167358, "learning_rate": 9.280423280423281e-06, "loss": 39.7919, "step": 7271 }, { "epoch": 173.14328358208957, "grad_norm": 1.319282054901123, "learning_rate": 9.27910052910053e-06, "loss": 40.4831, "step": 7272 }, { "epoch": 173.16716417910447, "grad_norm": 1.3599673509597778, "learning_rate": 9.277777777777778e-06, "loss": 40.0875, "step": 7273 }, { "epoch": 173.1910447761194, "grad_norm": 1.3290376663208008, "learning_rate": 9.276455026455026e-06, "loss": 39.8141, "step": 7274 }, { "epoch": 173.21492537313432, "grad_norm": 1.3849685192108154, "learning_rate": 9.275132275132276e-06, "loss": 40.3623, "step": 7275 }, { "epoch": 173.23880597014926, "grad_norm": 1.3209336996078491, "learning_rate": 9.273809523809525e-06, "loss": 41.071, "step": 7276 }, { "epoch": 173.26268656716417, "grad_norm": 1.35568368434906, "learning_rate": 9.272486772486773e-06, "loss": 38.5588, "step": 7277 }, { "epoch": 173.2865671641791, "grad_norm": 1.3420324325561523, "learning_rate": 9.271164021164021e-06, "loss": 39.0365, "step": 7278 }, { "epoch": 173.31044776119404, "grad_norm": 1.3469103574752808, "learning_rate": 9.26984126984127e-06, "loss": 40.6276, "step": 7279 }, { "epoch": 173.33432835820895, "grad_norm": 1.3617210388183594, "learning_rate": 9.268518518518519e-06, "loss": 39.1667, "step": 7280 }, { "epoch": 173.3582089552239, "grad_norm": 1.3667908906936646, "learning_rate": 9.267195767195768e-06, "loss": 39.8827, "step": 7281 }, { "epoch": 173.3820895522388, "grad_norm": 1.3455469608306885, "learning_rate": 9.265873015873016e-06, "loss": 40.0657, "step": 7282 }, { "epoch": 173.40597014925373, "grad_norm": 1.3281315565109253, "learning_rate": 9.264550264550264e-06, "loss": 39.1014, "step": 7283 }, { "epoch": 173.42985074626867, "grad_norm": 1.3713206052780151, "learning_rate": 9.263227513227514e-06, "loss": 40.2235, "step": 7284 }, { "epoch": 173.45373134328358, "grad_norm": 1.3701459169387817, "learning_rate": 9.261904761904763e-06, "loss": 39.9062, "step": 7285 }, { "epoch": 173.47761194029852, "grad_norm": 1.3707828521728516, "learning_rate": 9.260582010582011e-06, "loss": 38.5759, "step": 7286 }, { "epoch": 173.50149253731342, "grad_norm": 1.3247160911560059, "learning_rate": 9.25925925925926e-06, "loss": 39.9925, "step": 7287 }, { "epoch": 173.52537313432836, "grad_norm": 1.3310132026672363, "learning_rate": 9.257936507936509e-06, "loss": 40.0468, "step": 7288 }, { "epoch": 173.54925373134327, "grad_norm": 1.3671839237213135, "learning_rate": 9.256613756613757e-06, "loss": 39.6308, "step": 7289 }, { "epoch": 173.5731343283582, "grad_norm": 1.3280019760131836, "learning_rate": 9.255291005291006e-06, "loss": 40.741, "step": 7290 }, { "epoch": 173.59701492537314, "grad_norm": 1.3520594835281372, "learning_rate": 9.253968253968256e-06, "loss": 39.3346, "step": 7291 }, { "epoch": 173.62089552238805, "grad_norm": 1.323617696762085, "learning_rate": 9.252645502645504e-06, "loss": 40.0253, "step": 7292 }, { "epoch": 173.644776119403, "grad_norm": 1.3665153980255127, "learning_rate": 9.251322751322752e-06, "loss": 40.3984, "step": 7293 }, { "epoch": 173.6686567164179, "grad_norm": 1.324738621711731, "learning_rate": 9.250000000000001e-06, "loss": 39.6968, "step": 7294 }, { "epoch": 173.69253731343284, "grad_norm": 1.3476990461349487, "learning_rate": 9.248677248677249e-06, "loss": 39.5649, "step": 7295 }, { "epoch": 173.71641791044777, "grad_norm": 1.3469176292419434, "learning_rate": 9.247354497354499e-06, "loss": 38.9262, "step": 7296 }, { "epoch": 173.74029850746268, "grad_norm": 1.3572776317596436, "learning_rate": 9.246031746031747e-06, "loss": 39.4031, "step": 7297 }, { "epoch": 173.76417910447762, "grad_norm": 1.3752645254135132, "learning_rate": 9.244708994708995e-06, "loss": 38.5194, "step": 7298 }, { "epoch": 173.78805970149253, "grad_norm": 1.3639882802963257, "learning_rate": 9.243386243386244e-06, "loss": 39.7584, "step": 7299 }, { "epoch": 173.81194029850747, "grad_norm": 1.326098084449768, "learning_rate": 9.242063492063494e-06, "loss": 40.5383, "step": 7300 }, { "epoch": 173.83582089552237, "grad_norm": 1.350326418876648, "learning_rate": 9.240740740740742e-06, "loss": 39.3371, "step": 7301 }, { "epoch": 173.8597014925373, "grad_norm": 1.3196059465408325, "learning_rate": 9.23941798941799e-06, "loss": 40.1081, "step": 7302 }, { "epoch": 173.88358208955225, "grad_norm": 1.4265973567962646, "learning_rate": 9.238095238095239e-06, "loss": 39.2865, "step": 7303 }, { "epoch": 173.90746268656716, "grad_norm": 1.332656741142273, "learning_rate": 9.236772486772487e-06, "loss": 39.5572, "step": 7304 }, { "epoch": 173.9313432835821, "grad_norm": 1.3254095315933228, "learning_rate": 9.235449735449737e-06, "loss": 38.5735, "step": 7305 }, { "epoch": 173.955223880597, "grad_norm": 1.3783568143844604, "learning_rate": 9.234126984126986e-06, "loss": 40.7789, "step": 7306 }, { "epoch": 173.97910447761194, "grad_norm": 1.3714357614517212, "learning_rate": 9.232804232804234e-06, "loss": 40.4873, "step": 7307 }, { "epoch": 174.0, "grad_norm": 1.32612144947052, "learning_rate": 9.231481481481482e-06, "loss": 35.127, "step": 7308 }, { "epoch": 174.02388059701494, "grad_norm": 1.318514108657837, "learning_rate": 9.230158730158732e-06, "loss": 39.3674, "step": 7309 }, { "epoch": 174.04776119402985, "grad_norm": 1.4338476657867432, "learning_rate": 9.22883597883598e-06, "loss": 39.6966, "step": 7310 }, { "epoch": 174.07164179104478, "grad_norm": 1.3618606328964233, "learning_rate": 9.227513227513229e-06, "loss": 39.1042, "step": 7311 }, { "epoch": 174.0955223880597, "grad_norm": NaN, "learning_rate": 9.226190476190477e-06, "loss": 69.5127, "step": 7312 }, { "epoch": 174.11940298507463, "grad_norm": 1.4042284488677979, "learning_rate": 9.226190476190477e-06, "loss": 39.9607, "step": 7313 }, { "epoch": 174.14328358208957, "grad_norm": 1.4026013612747192, "learning_rate": 9.224867724867725e-06, "loss": 40.1511, "step": 7314 }, { "epoch": 174.16716417910447, "grad_norm": 1.3568694591522217, "learning_rate": 9.223544973544974e-06, "loss": 39.6409, "step": 7315 }, { "epoch": 174.1910447761194, "grad_norm": 1.3569552898406982, "learning_rate": 9.222222222222224e-06, "loss": 39.6225, "step": 7316 }, { "epoch": 174.21492537313432, "grad_norm": 1.3345904350280762, "learning_rate": 9.220899470899472e-06, "loss": 41.3713, "step": 7317 }, { "epoch": 174.23880597014926, "grad_norm": 1.3111231327056885, "learning_rate": 9.21957671957672e-06, "loss": 40.4788, "step": 7318 }, { "epoch": 174.26268656716417, "grad_norm": 1.4698351621627808, "learning_rate": 9.218253968253968e-06, "loss": 39.9572, "step": 7319 }, { "epoch": 174.2865671641791, "grad_norm": 1.3567854166030884, "learning_rate": 9.216931216931217e-06, "loss": 38.4423, "step": 7320 }, { "epoch": 174.31044776119404, "grad_norm": 1.4867687225341797, "learning_rate": 9.215608465608467e-06, "loss": 39.909, "step": 7321 }, { "epoch": 174.33432835820895, "grad_norm": 1.3549503087997437, "learning_rate": 9.214285714285715e-06, "loss": 39.5344, "step": 7322 }, { "epoch": 174.3582089552239, "grad_norm": 1.3454926013946533, "learning_rate": 9.212962962962963e-06, "loss": 39.7954, "step": 7323 }, { "epoch": 174.3820895522388, "grad_norm": 1.3607735633850098, "learning_rate": 9.211640211640212e-06, "loss": 40.1075, "step": 7324 }, { "epoch": 174.40597014925373, "grad_norm": 1.328131079673767, "learning_rate": 9.21031746031746e-06, "loss": 39.3338, "step": 7325 }, { "epoch": 174.42985074626867, "grad_norm": 1.3520519733428955, "learning_rate": 9.20899470899471e-06, "loss": 40.4587, "step": 7326 }, { "epoch": 174.45373134328358, "grad_norm": 1.33650541305542, "learning_rate": 9.20767195767196e-06, "loss": 39.3984, "step": 7327 }, { "epoch": 174.47761194029852, "grad_norm": 1.351664662361145, "learning_rate": 9.206349206349207e-06, "loss": 39.8097, "step": 7328 }, { "epoch": 174.50149253731342, "grad_norm": 1.3572107553482056, "learning_rate": 9.205026455026455e-06, "loss": 39.3338, "step": 7329 }, { "epoch": 174.52537313432836, "grad_norm": 1.3215022087097168, "learning_rate": 9.203703703703705e-06, "loss": 40.5761, "step": 7330 }, { "epoch": 174.54925373134327, "grad_norm": 1.3508996963500977, "learning_rate": 9.202380952380953e-06, "loss": 40.0919, "step": 7331 }, { "epoch": 174.5731343283582, "grad_norm": 1.3351258039474487, "learning_rate": 9.201058201058202e-06, "loss": 38.9117, "step": 7332 }, { "epoch": 174.59701492537314, "grad_norm": 1.3855445384979248, "learning_rate": 9.19973544973545e-06, "loss": 40.5677, "step": 7333 }, { "epoch": 174.62089552238805, "grad_norm": 1.3696563243865967, "learning_rate": 9.198412698412698e-06, "loss": 39.5215, "step": 7334 }, { "epoch": 174.644776119403, "grad_norm": 1.3258403539657593, "learning_rate": 9.197089947089948e-06, "loss": 40.3784, "step": 7335 }, { "epoch": 174.6686567164179, "grad_norm": 1.4121485948562622, "learning_rate": 9.195767195767197e-06, "loss": 39.6627, "step": 7336 }, { "epoch": 174.69253731343284, "grad_norm": 1.3840203285217285, "learning_rate": 9.194444444444445e-06, "loss": 39.3063, "step": 7337 }, { "epoch": 174.71641791044777, "grad_norm": 1.423812747001648, "learning_rate": 9.193121693121693e-06, "loss": 39.505, "step": 7338 }, { "epoch": 174.74029850746268, "grad_norm": 1.3642387390136719, "learning_rate": 9.191798941798943e-06, "loss": 40.4297, "step": 7339 }, { "epoch": 174.76417910447762, "grad_norm": 1.3654006719589233, "learning_rate": 9.19047619047619e-06, "loss": 40.9555, "step": 7340 }, { "epoch": 174.78805970149253, "grad_norm": 1.3691061735153198, "learning_rate": 9.18915343915344e-06, "loss": 39.9112, "step": 7341 }, { "epoch": 174.81194029850747, "grad_norm": 1.415709376335144, "learning_rate": 9.187830687830688e-06, "loss": 38.7459, "step": 7342 }, { "epoch": 174.83582089552237, "grad_norm": 1.3451846837997437, "learning_rate": 9.186507936507936e-06, "loss": 39.0763, "step": 7343 }, { "epoch": 174.8597014925373, "grad_norm": 1.357240080833435, "learning_rate": 9.185185185185186e-06, "loss": 39.2298, "step": 7344 }, { "epoch": 174.88358208955225, "grad_norm": 1.3587242364883423, "learning_rate": 9.183862433862435e-06, "loss": 39.2001, "step": 7345 }, { "epoch": 174.90746268656716, "grad_norm": 1.3248088359832764, "learning_rate": 9.182539682539683e-06, "loss": 39.248, "step": 7346 }, { "epoch": 174.9313432835821, "grad_norm": 1.356203556060791, "learning_rate": 9.181216931216933e-06, "loss": 38.955, "step": 7347 }, { "epoch": 174.955223880597, "grad_norm": 1.3575602769851685, "learning_rate": 9.17989417989418e-06, "loss": 40.3294, "step": 7348 }, { "epoch": 174.97910447761194, "grad_norm": 1.390126347541809, "learning_rate": 9.178571428571429e-06, "loss": 39.143, "step": 7349 }, { "epoch": 175.0, "grad_norm": 1.3239256143569946, "learning_rate": 9.177248677248678e-06, "loss": 36.2586, "step": 7350 }, { "epoch": 175.02388059701494, "grad_norm": 1.3360668420791626, "learning_rate": 9.175925925925928e-06, "loss": 38.7821, "step": 7351 }, { "epoch": 175.04776119402985, "grad_norm": 1.31790030002594, "learning_rate": 9.174603174603176e-06, "loss": 39.5069, "step": 7352 }, { "epoch": 175.07164179104478, "grad_norm": 1.353070855140686, "learning_rate": 9.173280423280424e-06, "loss": 38.7401, "step": 7353 }, { "epoch": 175.0955223880597, "grad_norm": 1.349911093711853, "learning_rate": 9.171957671957673e-06, "loss": 40.2657, "step": 7354 }, { "epoch": 175.11940298507463, "grad_norm": 1.3576761484146118, "learning_rate": 9.170634920634921e-06, "loss": 39.418, "step": 7355 }, { "epoch": 175.14328358208957, "grad_norm": 1.3711827993392944, "learning_rate": 9.16931216931217e-06, "loss": 39.3356, "step": 7356 }, { "epoch": 175.16716417910447, "grad_norm": 1.4017949104309082, "learning_rate": 9.167989417989419e-06, "loss": 40.3027, "step": 7357 }, { "epoch": 175.1910447761194, "grad_norm": 1.3340651988983154, "learning_rate": 9.166666666666666e-06, "loss": 39.9385, "step": 7358 }, { "epoch": 175.21492537313432, "grad_norm": 1.4652364253997803, "learning_rate": 9.165343915343916e-06, "loss": 40.2641, "step": 7359 }, { "epoch": 175.23880597014926, "grad_norm": 1.396429419517517, "learning_rate": 9.164021164021166e-06, "loss": 38.9414, "step": 7360 }, { "epoch": 175.26268656716417, "grad_norm": 1.3575589656829834, "learning_rate": 9.162698412698414e-06, "loss": 38.9134, "step": 7361 }, { "epoch": 175.2865671641791, "grad_norm": 1.3190656900405884, "learning_rate": 9.161375661375661e-06, "loss": 40.2621, "step": 7362 }, { "epoch": 175.31044776119404, "grad_norm": 1.350962519645691, "learning_rate": 9.160052910052911e-06, "loss": 40.8427, "step": 7363 }, { "epoch": 175.33432835820895, "grad_norm": 1.3436270952224731, "learning_rate": 9.158730158730159e-06, "loss": 39.6109, "step": 7364 }, { "epoch": 175.3582089552239, "grad_norm": 1.3335262537002563, "learning_rate": 9.157407407407409e-06, "loss": 38.9611, "step": 7365 }, { "epoch": 175.3820895522388, "grad_norm": 1.3355522155761719, "learning_rate": 9.156084656084656e-06, "loss": 39.6882, "step": 7366 }, { "epoch": 175.40597014925373, "grad_norm": 1.3880144357681274, "learning_rate": 9.154761904761906e-06, "loss": 39.6618, "step": 7367 }, { "epoch": 175.42985074626867, "grad_norm": 1.397220253944397, "learning_rate": 9.153439153439154e-06, "loss": 38.9062, "step": 7368 }, { "epoch": 175.45373134328358, "grad_norm": 1.3289062976837158, "learning_rate": 9.152116402116404e-06, "loss": 40.1454, "step": 7369 }, { "epoch": 175.47761194029852, "grad_norm": 1.5050179958343506, "learning_rate": 9.150793650793651e-06, "loss": 41.1401, "step": 7370 }, { "epoch": 175.50149253731342, "grad_norm": 1.3426024913787842, "learning_rate": 9.149470899470901e-06, "loss": 40.1819, "step": 7371 }, { "epoch": 175.52537313432836, "grad_norm": 1.377850890159607, "learning_rate": 9.148148148148149e-06, "loss": 39.6105, "step": 7372 }, { "epoch": 175.54925373134327, "grad_norm": 1.380335807800293, "learning_rate": 9.146825396825397e-06, "loss": 40.7559, "step": 7373 }, { "epoch": 175.5731343283582, "grad_norm": NaN, "learning_rate": 9.145502645502646e-06, "loss": 38.9656, "step": 7374 }, { "epoch": 175.59701492537314, "grad_norm": 1.349639892578125, "learning_rate": 9.145502645502646e-06, "loss": 40.386, "step": 7375 }, { "epoch": 175.62089552238805, "grad_norm": 1.3604176044464111, "learning_rate": 9.144179894179896e-06, "loss": 39.5059, "step": 7376 }, { "epoch": 175.644776119403, "grad_norm": 1.3274760246276855, "learning_rate": 9.142857142857144e-06, "loss": 38.8695, "step": 7377 }, { "epoch": 175.6686567164179, "grad_norm": 1.3531901836395264, "learning_rate": 9.141534391534392e-06, "loss": 40.0666, "step": 7378 }, { "epoch": 175.69253731343284, "grad_norm": 1.3732352256774902, "learning_rate": 9.140211640211641e-06, "loss": 40.2501, "step": 7379 }, { "epoch": 175.71641791044777, "grad_norm": 1.5355960130691528, "learning_rate": 9.13888888888889e-06, "loss": 40.4483, "step": 7380 }, { "epoch": 175.74029850746268, "grad_norm": 1.3239938020706177, "learning_rate": 9.137566137566139e-06, "loss": 39.654, "step": 7381 }, { "epoch": 175.76417910447762, "grad_norm": 1.3449605703353882, "learning_rate": 9.136243386243387e-06, "loss": 39.5978, "step": 7382 }, { "epoch": 175.78805970149253, "grad_norm": 1.3618800640106201, "learning_rate": 9.134920634920635e-06, "loss": 40.0148, "step": 7383 }, { "epoch": 175.81194029850747, "grad_norm": 1.3231267929077148, "learning_rate": 9.133597883597884e-06, "loss": 39.57, "step": 7384 }, { "epoch": 175.83582089552237, "grad_norm": 1.3758021593093872, "learning_rate": 9.132275132275134e-06, "loss": 39.3742, "step": 7385 }, { "epoch": 175.8597014925373, "grad_norm": 1.3019177913665771, "learning_rate": 9.130952380952382e-06, "loss": 40.2811, "step": 7386 }, { "epoch": 175.88358208955225, "grad_norm": 1.3544148206710815, "learning_rate": 9.12962962962963e-06, "loss": 40.6144, "step": 7387 }, { "epoch": 175.90746268656716, "grad_norm": 1.3432109355926514, "learning_rate": 9.12830687830688e-06, "loss": 38.4047, "step": 7388 }, { "epoch": 175.9313432835821, "grad_norm": 1.3468061685562134, "learning_rate": 9.126984126984127e-06, "loss": 40.0402, "step": 7389 }, { "epoch": 175.955223880597, "grad_norm": 1.3930106163024902, "learning_rate": 9.125661375661377e-06, "loss": 39.7085, "step": 7390 }, { "epoch": 175.97910447761194, "grad_norm": 1.3333525657653809, "learning_rate": 9.124338624338626e-06, "loss": 40.3588, "step": 7391 }, { "epoch": 176.0, "grad_norm": 1.3614896535873413, "learning_rate": 9.123015873015874e-06, "loss": 34.1582, "step": 7392 }, { "epoch": 176.02388059701494, "grad_norm": 1.3335237503051758, "learning_rate": 9.121693121693122e-06, "loss": 40.4135, "step": 7393 }, { "epoch": 176.04776119402985, "grad_norm": 1.3273295164108276, "learning_rate": 9.120370370370372e-06, "loss": 39.472, "step": 7394 }, { "epoch": 176.07164179104478, "grad_norm": 1.3421489000320435, "learning_rate": 9.11904761904762e-06, "loss": 40.8156, "step": 7395 }, { "epoch": 176.0955223880597, "grad_norm": 1.3549504280090332, "learning_rate": 9.11772486772487e-06, "loss": 39.8917, "step": 7396 }, { "epoch": 176.11940298507463, "grad_norm": 1.3300765752792358, "learning_rate": 9.116402116402117e-06, "loss": 40.4, "step": 7397 }, { "epoch": 176.14328358208957, "grad_norm": 1.3501636981964111, "learning_rate": 9.115079365079365e-06, "loss": 39.4187, "step": 7398 }, { "epoch": 176.16716417910447, "grad_norm": 1.3432456254959106, "learning_rate": 9.113756613756615e-06, "loss": 39.5996, "step": 7399 }, { "epoch": 176.1910447761194, "grad_norm": 1.3574341535568237, "learning_rate": 9.112433862433863e-06, "loss": 39.1241, "step": 7400 }, { "epoch": 176.21492537313432, "grad_norm": 1.334989070892334, "learning_rate": 9.111111111111112e-06, "loss": 39.2634, "step": 7401 }, { "epoch": 176.23880597014926, "grad_norm": 1.4135162830352783, "learning_rate": 9.10978835978836e-06, "loss": 40.0302, "step": 7402 }, { "epoch": 176.26268656716417, "grad_norm": 1.3358062505722046, "learning_rate": 9.108465608465608e-06, "loss": 39.8573, "step": 7403 }, { "epoch": 176.2865671641791, "grad_norm": 1.3273305892944336, "learning_rate": 9.107142857142858e-06, "loss": 39.2502, "step": 7404 }, { "epoch": 176.31044776119404, "grad_norm": 1.3129762411117554, "learning_rate": 9.105820105820107e-06, "loss": 39.9582, "step": 7405 }, { "epoch": 176.33432835820895, "grad_norm": 1.3592876195907593, "learning_rate": 9.104497354497355e-06, "loss": 40.5952, "step": 7406 }, { "epoch": 176.3582089552239, "grad_norm": 1.458012580871582, "learning_rate": 9.103174603174603e-06, "loss": 39.9755, "step": 7407 }, { "epoch": 176.3820895522388, "grad_norm": 1.3531750440597534, "learning_rate": 9.101851851851853e-06, "loss": 39.4522, "step": 7408 }, { "epoch": 176.40597014925373, "grad_norm": 1.3724374771118164, "learning_rate": 9.1005291005291e-06, "loss": 38.585, "step": 7409 }, { "epoch": 176.42985074626867, "grad_norm": 1.3491337299346924, "learning_rate": 9.09920634920635e-06, "loss": 40.1304, "step": 7410 }, { "epoch": 176.45373134328358, "grad_norm": 1.395094633102417, "learning_rate": 9.0978835978836e-06, "loss": 41.0978, "step": 7411 }, { "epoch": 176.47761194029852, "grad_norm": 1.3147659301757812, "learning_rate": 9.096560846560848e-06, "loss": 39.4848, "step": 7412 }, { "epoch": 176.50149253731342, "grad_norm": 1.3695194721221924, "learning_rate": 9.095238095238095e-06, "loss": 39.7824, "step": 7413 }, { "epoch": 176.52537313432836, "grad_norm": 1.3562337160110474, "learning_rate": 9.093915343915345e-06, "loss": 39.1792, "step": 7414 }, { "epoch": 176.54925373134327, "grad_norm": 1.3483574390411377, "learning_rate": 9.092592592592593e-06, "loss": 40.5452, "step": 7415 }, { "epoch": 176.5731343283582, "grad_norm": 1.3488575220108032, "learning_rate": 9.091269841269843e-06, "loss": 39.5593, "step": 7416 }, { "epoch": 176.59701492537314, "grad_norm": 1.3436477184295654, "learning_rate": 9.08994708994709e-06, "loss": 39.6784, "step": 7417 }, { "epoch": 176.62089552238805, "grad_norm": 1.3250492811203003, "learning_rate": 9.088624338624338e-06, "loss": 40.2166, "step": 7418 }, { "epoch": 176.644776119403, "grad_norm": 1.337456226348877, "learning_rate": 9.087301587301588e-06, "loss": 40.6685, "step": 7419 }, { "epoch": 176.6686567164179, "grad_norm": 1.4005659818649292, "learning_rate": 9.085978835978838e-06, "loss": 39.8857, "step": 7420 }, { "epoch": 176.69253731343284, "grad_norm": 1.3771792650222778, "learning_rate": 9.084656084656085e-06, "loss": 40.4768, "step": 7421 }, { "epoch": 176.71641791044777, "grad_norm": 1.3941495418548584, "learning_rate": 9.083333333333333e-06, "loss": 39.3193, "step": 7422 }, { "epoch": 176.74029850746268, "grad_norm": 1.3525428771972656, "learning_rate": 9.082010582010583e-06, "loss": 39.4949, "step": 7423 }, { "epoch": 176.76417910447762, "grad_norm": 1.3595256805419922, "learning_rate": 9.08068783068783e-06, "loss": 41.075, "step": 7424 }, { "epoch": 176.78805970149253, "grad_norm": NaN, "learning_rate": 9.07936507936508e-06, "loss": 53.1931, "step": 7425 }, { "epoch": 176.81194029850747, "grad_norm": 1.4051458835601807, "learning_rate": 9.07936507936508e-06, "loss": 38.3003, "step": 7426 }, { "epoch": 176.83582089552237, "grad_norm": 1.355988621711731, "learning_rate": 9.078042328042328e-06, "loss": 40.4216, "step": 7427 }, { "epoch": 176.8597014925373, "grad_norm": 1.3458783626556396, "learning_rate": 9.076719576719576e-06, "loss": 37.9612, "step": 7428 }, { "epoch": 176.88358208955225, "grad_norm": 1.3219330310821533, "learning_rate": 9.075396825396826e-06, "loss": 39.9087, "step": 7429 }, { "epoch": 176.90746268656716, "grad_norm": 1.33823561668396, "learning_rate": 9.074074074074075e-06, "loss": 39.5183, "step": 7430 }, { "epoch": 176.9313432835821, "grad_norm": 1.3705931901931763, "learning_rate": 9.072751322751323e-06, "loss": 39.522, "step": 7431 }, { "epoch": 176.955223880597, "grad_norm": 1.3504018783569336, "learning_rate": 9.071428571428573e-06, "loss": 39.7817, "step": 7432 }, { "epoch": 176.97910447761194, "grad_norm": 1.4239598512649536, "learning_rate": 9.07010582010582e-06, "loss": 39.0806, "step": 7433 }, { "epoch": 177.0, "grad_norm": 1.3622612953186035, "learning_rate": 9.068783068783069e-06, "loss": 35.1163, "step": 7434 }, { "epoch": 177.02388059701494, "grad_norm": 1.3778443336486816, "learning_rate": 9.067460317460318e-06, "loss": 39.7015, "step": 7435 }, { "epoch": 177.04776119402985, "grad_norm": 1.3261699676513672, "learning_rate": 9.066137566137568e-06, "loss": 39.7969, "step": 7436 }, { "epoch": 177.07164179104478, "grad_norm": 1.3146778345108032, "learning_rate": 9.064814814814816e-06, "loss": 39.8272, "step": 7437 }, { "epoch": 177.0955223880597, "grad_norm": 1.4455409049987793, "learning_rate": 9.063492063492064e-06, "loss": 38.1952, "step": 7438 }, { "epoch": 177.11940298507463, "grad_norm": 1.3631296157836914, "learning_rate": 9.062169312169313e-06, "loss": 40.0635, "step": 7439 }, { "epoch": 177.14328358208957, "grad_norm": 1.3350403308868408, "learning_rate": 9.060846560846561e-06, "loss": 40.0807, "step": 7440 }, { "epoch": 177.16716417910447, "grad_norm": 1.371151089668274, "learning_rate": 9.05952380952381e-06, "loss": 39.4787, "step": 7441 }, { "epoch": 177.1910447761194, "grad_norm": 1.3253875970840454, "learning_rate": 9.058201058201059e-06, "loss": 41.0518, "step": 7442 }, { "epoch": 177.21492537313432, "grad_norm": 1.3404216766357422, "learning_rate": 9.056878306878307e-06, "loss": 40.0976, "step": 7443 }, { "epoch": 177.23880597014926, "grad_norm": 1.3342854976654053, "learning_rate": 9.055555555555556e-06, "loss": 39.3634, "step": 7444 }, { "epoch": 177.26268656716417, "grad_norm": 1.3750914335250854, "learning_rate": 9.054232804232806e-06, "loss": 40.0021, "step": 7445 }, { "epoch": 177.2865671641791, "grad_norm": 1.414766550064087, "learning_rate": 9.052910052910054e-06, "loss": 38.8754, "step": 7446 }, { "epoch": 177.31044776119404, "grad_norm": 1.3537728786468506, "learning_rate": 9.051587301587302e-06, "loss": 39.1276, "step": 7447 }, { "epoch": 177.33432835820895, "grad_norm": 1.3379589319229126, "learning_rate": 9.050264550264551e-06, "loss": 39.0707, "step": 7448 }, { "epoch": 177.3582089552239, "grad_norm": 1.3124688863754272, "learning_rate": 9.048941798941799e-06, "loss": 41.7563, "step": 7449 }, { "epoch": 177.3820895522388, "grad_norm": 1.3829821348190308, "learning_rate": 9.047619047619049e-06, "loss": 39.0053, "step": 7450 }, { "epoch": 177.40597014925373, "grad_norm": 1.3691003322601318, "learning_rate": 9.046296296296298e-06, "loss": 39.5336, "step": 7451 }, { "epoch": 177.42985074626867, "grad_norm": 1.3572919368743896, "learning_rate": 9.044973544973546e-06, "loss": 38.0463, "step": 7452 }, { "epoch": 177.45373134328358, "grad_norm": 1.3965981006622314, "learning_rate": 9.043650793650794e-06, "loss": 39.7446, "step": 7453 }, { "epoch": 177.47761194029852, "grad_norm": 1.3646645545959473, "learning_rate": 9.042328042328044e-06, "loss": 39.273, "step": 7454 }, { "epoch": 177.50149253731342, "grad_norm": 1.3149584531784058, "learning_rate": 9.041005291005292e-06, "loss": 38.665, "step": 7455 }, { "epoch": 177.52537313432836, "grad_norm": 1.380684494972229, "learning_rate": 9.039682539682541e-06, "loss": 40.2163, "step": 7456 }, { "epoch": 177.54925373134327, "grad_norm": 1.3332833051681519, "learning_rate": 9.038359788359789e-06, "loss": 40.7322, "step": 7457 }, { "epoch": 177.5731343283582, "grad_norm": 1.3540666103363037, "learning_rate": 9.037037037037037e-06, "loss": 40.8944, "step": 7458 }, { "epoch": 177.59701492537314, "grad_norm": 1.3545989990234375, "learning_rate": 9.035714285714287e-06, "loss": 41.4531, "step": 7459 }, { "epoch": 177.62089552238805, "grad_norm": 1.3936676979064941, "learning_rate": 9.034391534391536e-06, "loss": 38.872, "step": 7460 }, { "epoch": 177.644776119403, "grad_norm": 1.3444592952728271, "learning_rate": 9.033068783068784e-06, "loss": 39.6382, "step": 7461 }, { "epoch": 177.6686567164179, "grad_norm": 1.3533788919448853, "learning_rate": 9.031746031746032e-06, "loss": 39.3604, "step": 7462 }, { "epoch": 177.69253731343284, "grad_norm": 1.3498634099960327, "learning_rate": 9.030423280423282e-06, "loss": 39.3371, "step": 7463 }, { "epoch": 177.71641791044777, "grad_norm": 1.323500394821167, "learning_rate": 9.02910052910053e-06, "loss": 39.357, "step": 7464 }, { "epoch": 177.74029850746268, "grad_norm": 1.3932663202285767, "learning_rate": 9.027777777777779e-06, "loss": 40.2419, "step": 7465 }, { "epoch": 177.76417910447762, "grad_norm": 1.402206540107727, "learning_rate": 9.026455026455027e-06, "loss": 39.3879, "step": 7466 }, { "epoch": 177.78805970149253, "grad_norm": 1.3550209999084473, "learning_rate": 9.025132275132275e-06, "loss": 40.1907, "step": 7467 }, { "epoch": 177.81194029850747, "grad_norm": 1.3531484603881836, "learning_rate": 9.023809523809524e-06, "loss": 39.4276, "step": 7468 }, { "epoch": 177.83582089552237, "grad_norm": 1.3626508712768555, "learning_rate": 9.022486772486774e-06, "loss": 39.3546, "step": 7469 }, { "epoch": 177.8597014925373, "grad_norm": 1.3454413414001465, "learning_rate": 9.021164021164022e-06, "loss": 41.2911, "step": 7470 }, { "epoch": 177.88358208955225, "grad_norm": 1.359519600868225, "learning_rate": 9.019841269841272e-06, "loss": 39.7029, "step": 7471 }, { "epoch": 177.90746268656716, "grad_norm": 1.3330813646316528, "learning_rate": 9.01851851851852e-06, "loss": 39.3412, "step": 7472 }, { "epoch": 177.9313432835821, "grad_norm": 1.3384488821029663, "learning_rate": 9.017195767195767e-06, "loss": 40.9085, "step": 7473 }, { "epoch": 177.955223880597, "grad_norm": 1.3875775337219238, "learning_rate": 9.015873015873017e-06, "loss": 40.231, "step": 7474 }, { "epoch": 177.97910447761194, "grad_norm": 1.3705580234527588, "learning_rate": 9.014550264550267e-06, "loss": 38.9316, "step": 7475 }, { "epoch": 178.0, "grad_norm": 1.3501209020614624, "learning_rate": 9.013227513227514e-06, "loss": 34.4015, "step": 7476 }, { "epoch": 178.02388059701494, "grad_norm": 1.3857225179672241, "learning_rate": 9.011904761904762e-06, "loss": 40.5167, "step": 7477 }, { "epoch": 178.04776119402985, "grad_norm": 1.3471230268478394, "learning_rate": 9.010582010582012e-06, "loss": 39.9946, "step": 7478 }, { "epoch": 178.07164179104478, "grad_norm": 1.3186172246932983, "learning_rate": 9.00925925925926e-06, "loss": 39.2854, "step": 7479 }, { "epoch": 178.0955223880597, "grad_norm": 1.4382518529891968, "learning_rate": 9.00793650793651e-06, "loss": 39.5394, "step": 7480 }, { "epoch": 178.11940298507463, "grad_norm": 1.3372012376785278, "learning_rate": 9.006613756613757e-06, "loss": 39.5485, "step": 7481 }, { "epoch": 178.14328358208957, "grad_norm": 1.3433163166046143, "learning_rate": 9.005291005291005e-06, "loss": 39.4068, "step": 7482 }, { "epoch": 178.16716417910447, "grad_norm": 1.3754476308822632, "learning_rate": 9.003968253968255e-06, "loss": 39.0884, "step": 7483 }, { "epoch": 178.1910447761194, "grad_norm": 1.3740776777267456, "learning_rate": 9.002645502645503e-06, "loss": 39.385, "step": 7484 }, { "epoch": 178.21492537313432, "grad_norm": 1.3518000841140747, "learning_rate": 9.001322751322752e-06, "loss": 40.7485, "step": 7485 }, { "epoch": 178.23880597014926, "grad_norm": 1.3547621965408325, "learning_rate": 9e-06, "loss": 40.2268, "step": 7486 }, { "epoch": 178.26268656716417, "grad_norm": 1.3204748630523682, "learning_rate": 8.998677248677248e-06, "loss": 39.573, "step": 7487 }, { "epoch": 178.2865671641791, "grad_norm": 1.3391307592391968, "learning_rate": 8.997354497354498e-06, "loss": 38.3267, "step": 7488 }, { "epoch": 178.31044776119404, "grad_norm": NaN, "learning_rate": 8.996031746031747e-06, "loss": 59.3152, "step": 7489 }, { "epoch": 178.33432835820895, "grad_norm": 1.3670405149459839, "learning_rate": 8.996031746031747e-06, "loss": 40.2898, "step": 7490 }, { "epoch": 178.3582089552239, "grad_norm": 1.3279860019683838, "learning_rate": 8.994708994708995e-06, "loss": 38.9853, "step": 7491 }, { "epoch": 178.3820895522388, "grad_norm": 1.3863072395324707, "learning_rate": 8.993386243386245e-06, "loss": 38.9439, "step": 7492 }, { "epoch": 178.40597014925373, "grad_norm": 1.3380961418151855, "learning_rate": 8.992063492063493e-06, "loss": 40.4688, "step": 7493 }, { "epoch": 178.42985074626867, "grad_norm": 1.3702536821365356, "learning_rate": 8.99074074074074e-06, "loss": 39.9543, "step": 7494 }, { "epoch": 178.45373134328358, "grad_norm": 1.357947587966919, "learning_rate": 8.98941798941799e-06, "loss": 38.2329, "step": 7495 }, { "epoch": 178.47761194029852, "grad_norm": 1.3251829147338867, "learning_rate": 8.98809523809524e-06, "loss": 39.4161, "step": 7496 }, { "epoch": 178.50149253731342, "grad_norm": 1.3043396472930908, "learning_rate": 8.986772486772488e-06, "loss": 40.7284, "step": 7497 }, { "epoch": 178.52537313432836, "grad_norm": 1.3427293300628662, "learning_rate": 8.985449735449736e-06, "loss": 39.9628, "step": 7498 }, { "epoch": 178.54925373134327, "grad_norm": 1.4002571105957031, "learning_rate": 8.984126984126985e-06, "loss": 40.0284, "step": 7499 }, { "epoch": 178.5731343283582, "grad_norm": 1.3868746757507324, "learning_rate": 8.982804232804233e-06, "loss": 39.4908, "step": 7500 }, { "epoch": 178.59701492537314, "grad_norm": 1.3143390417099, "learning_rate": 8.981481481481483e-06, "loss": 39.8162, "step": 7501 }, { "epoch": 178.62089552238805, "grad_norm": 1.3167476654052734, "learning_rate": 8.98015873015873e-06, "loss": 39.6408, "step": 7502 }, { "epoch": 178.644776119403, "grad_norm": 1.3930736780166626, "learning_rate": 8.978835978835979e-06, "loss": 39.1194, "step": 7503 }, { "epoch": 178.6686567164179, "grad_norm": 1.3967961072921753, "learning_rate": 8.977513227513228e-06, "loss": 38.9457, "step": 7504 }, { "epoch": 178.69253731343284, "grad_norm": 1.3539360761642456, "learning_rate": 8.976190476190478e-06, "loss": 40.2001, "step": 7505 }, { "epoch": 178.71641791044777, "grad_norm": 1.3439545631408691, "learning_rate": 8.974867724867726e-06, "loss": 40.6424, "step": 7506 }, { "epoch": 178.74029850746268, "grad_norm": 1.3599615097045898, "learning_rate": 8.973544973544973e-06, "loss": 40.0947, "step": 7507 }, { "epoch": 178.76417910447762, "grad_norm": 1.3466427326202393, "learning_rate": 8.972222222222223e-06, "loss": 40.6469, "step": 7508 }, { "epoch": 178.78805970149253, "grad_norm": 1.3023264408111572, "learning_rate": 8.970899470899471e-06, "loss": 38.7766, "step": 7509 }, { "epoch": 178.81194029850747, "grad_norm": 1.3334767818450928, "learning_rate": 8.96957671957672e-06, "loss": 40.3181, "step": 7510 }, { "epoch": 178.83582089552237, "grad_norm": 1.3132569789886475, "learning_rate": 8.968253968253968e-06, "loss": 38.6309, "step": 7511 }, { "epoch": 178.8597014925373, "grad_norm": 1.3265668153762817, "learning_rate": 8.966931216931218e-06, "loss": 40.3507, "step": 7512 }, { "epoch": 178.88358208955225, "grad_norm": 1.3703892230987549, "learning_rate": 8.965608465608466e-06, "loss": 39.6582, "step": 7513 }, { "epoch": 178.90746268656716, "grad_norm": 1.334561824798584, "learning_rate": 8.964285714285716e-06, "loss": 40.1735, "step": 7514 }, { "epoch": 178.9313432835821, "grad_norm": 1.383825659751892, "learning_rate": 8.962962962962963e-06, "loss": 40.4742, "step": 7515 }, { "epoch": 178.955223880597, "grad_norm": 1.4653191566467285, "learning_rate": 8.961640211640213e-06, "loss": 39.801, "step": 7516 }, { "epoch": 178.97910447761194, "grad_norm": 1.3393415212631226, "learning_rate": 8.960317460317461e-06, "loss": 39.9655, "step": 7517 }, { "epoch": 179.0, "grad_norm": 1.354925274848938, "learning_rate": 8.958994708994709e-06, "loss": 35.444, "step": 7518 }, { "epoch": 179.02388059701494, "grad_norm": 1.383780598640442, "learning_rate": 8.957671957671958e-06, "loss": 39.6943, "step": 7519 }, { "epoch": 179.04776119402985, "grad_norm": NaN, "learning_rate": 8.956349206349208e-06, "loss": 63.8402, "step": 7520 }, { "epoch": 179.07164179104478, "grad_norm": 1.3614469766616821, "learning_rate": 8.956349206349208e-06, "loss": 40.6741, "step": 7521 }, { "epoch": 179.0955223880597, "grad_norm": 1.3139305114746094, "learning_rate": 8.955026455026456e-06, "loss": 40.596, "step": 7522 }, { "epoch": 179.11940298507463, "grad_norm": 1.3953404426574707, "learning_rate": 8.953703703703704e-06, "loss": 39.2581, "step": 7523 }, { "epoch": 179.14328358208957, "grad_norm": 1.3573111295700073, "learning_rate": 8.952380952380953e-06, "loss": 40.7233, "step": 7524 }, { "epoch": 179.16716417910447, "grad_norm": 1.3536303043365479, "learning_rate": 8.951058201058201e-06, "loss": 40.1138, "step": 7525 }, { "epoch": 179.1910447761194, "grad_norm": 1.3551383018493652, "learning_rate": 8.949735449735451e-06, "loss": 40.1157, "step": 7526 }, { "epoch": 179.21492537313432, "grad_norm": 1.3132773637771606, "learning_rate": 8.948412698412699e-06, "loss": 40.3112, "step": 7527 }, { "epoch": 179.23880597014926, "grad_norm": 1.3693616390228271, "learning_rate": 8.947089947089947e-06, "loss": 40.572, "step": 7528 }, { "epoch": 179.26268656716417, "grad_norm": 1.3210688829421997, "learning_rate": 8.945767195767196e-06, "loss": 39.5424, "step": 7529 }, { "epoch": 179.2865671641791, "grad_norm": 1.3437572717666626, "learning_rate": 8.944444444444446e-06, "loss": 39.4681, "step": 7530 }, { "epoch": 179.31044776119404, "grad_norm": 1.3715442419052124, "learning_rate": 8.943121693121694e-06, "loss": 38.5815, "step": 7531 }, { "epoch": 179.33432835820895, "grad_norm": 1.3404653072357178, "learning_rate": 8.941798941798942e-06, "loss": 40.5077, "step": 7532 }, { "epoch": 179.3582089552239, "grad_norm": 1.3695898056030273, "learning_rate": 8.940476190476191e-06, "loss": 38.8564, "step": 7533 }, { "epoch": 179.3820895522388, "grad_norm": 1.333847999572754, "learning_rate": 8.93915343915344e-06, "loss": 39.5989, "step": 7534 }, { "epoch": 179.40597014925373, "grad_norm": 1.3408139944076538, "learning_rate": 8.937830687830689e-06, "loss": 39.2656, "step": 7535 }, { "epoch": 179.42985074626867, "grad_norm": 1.33467435836792, "learning_rate": 8.936507936507938e-06, "loss": 39.8499, "step": 7536 }, { "epoch": 179.45373134328358, "grad_norm": 1.3176394701004028, "learning_rate": 8.935185185185186e-06, "loss": 39.2384, "step": 7537 }, { "epoch": 179.47761194029852, "grad_norm": 1.3656506538391113, "learning_rate": 8.933862433862434e-06, "loss": 40.1956, "step": 7538 }, { "epoch": 179.50149253731342, "grad_norm": 1.3365297317504883, "learning_rate": 8.932539682539684e-06, "loss": 39.7087, "step": 7539 }, { "epoch": 179.52537313432836, "grad_norm": 1.3630247116088867, "learning_rate": 8.931216931216932e-06, "loss": 39.8995, "step": 7540 }, { "epoch": 179.54925373134327, "grad_norm": 1.298282504081726, "learning_rate": 8.929894179894181e-06, "loss": 39.3908, "step": 7541 }, { "epoch": 179.5731343283582, "grad_norm": 1.3757247924804688, "learning_rate": 8.92857142857143e-06, "loss": 40.2607, "step": 7542 }, { "epoch": 179.59701492537314, "grad_norm": 1.3562999963760376, "learning_rate": 8.927248677248677e-06, "loss": 37.8986, "step": 7543 }, { "epoch": 179.62089552238805, "grad_norm": 1.399717926979065, "learning_rate": 8.925925925925927e-06, "loss": 40.0505, "step": 7544 }, { "epoch": 179.644776119403, "grad_norm": 1.3617074489593506, "learning_rate": 8.924603174603176e-06, "loss": 39.3218, "step": 7545 }, { "epoch": 179.6686567164179, "grad_norm": 1.31533944606781, "learning_rate": 8.923280423280424e-06, "loss": 39.6912, "step": 7546 }, { "epoch": 179.69253731343284, "grad_norm": 1.3388538360595703, "learning_rate": 8.921957671957672e-06, "loss": 40.0104, "step": 7547 }, { "epoch": 179.71641791044777, "grad_norm": 1.3613851070404053, "learning_rate": 8.920634920634922e-06, "loss": 38.7171, "step": 7548 }, { "epoch": 179.74029850746268, "grad_norm": 1.3265963792800903, "learning_rate": 8.91931216931217e-06, "loss": 38.1732, "step": 7549 }, { "epoch": 179.76417910447762, "grad_norm": 1.3710014820098877, "learning_rate": 8.91798941798942e-06, "loss": 40.2718, "step": 7550 }, { "epoch": 179.78805970149253, "grad_norm": 1.3447701930999756, "learning_rate": 8.916666666666667e-06, "loss": 39.7439, "step": 7551 }, { "epoch": 179.81194029850747, "grad_norm": 1.328343391418457, "learning_rate": 8.915343915343915e-06, "loss": 39.6123, "step": 7552 }, { "epoch": 179.83582089552237, "grad_norm": 1.3742525577545166, "learning_rate": 8.914021164021165e-06, "loss": 40.4576, "step": 7553 }, { "epoch": 179.8597014925373, "grad_norm": 1.3646106719970703, "learning_rate": 8.912698412698414e-06, "loss": 39.6033, "step": 7554 }, { "epoch": 179.88358208955225, "grad_norm": 1.371544599533081, "learning_rate": 8.911375661375662e-06, "loss": 39.7809, "step": 7555 }, { "epoch": 179.90746268656716, "grad_norm": 1.348609447479248, "learning_rate": 8.910052910052912e-06, "loss": 39.8174, "step": 7556 }, { "epoch": 179.9313432835821, "grad_norm": 1.3376342058181763, "learning_rate": 8.90873015873016e-06, "loss": 39.1357, "step": 7557 }, { "epoch": 179.955223880597, "grad_norm": 1.3594517707824707, "learning_rate": 8.907407407407408e-06, "loss": 40.1579, "step": 7558 }, { "epoch": 179.97910447761194, "grad_norm": 1.3542228937149048, "learning_rate": 8.906084656084657e-06, "loss": 39.9314, "step": 7559 }, { "epoch": 180.0, "grad_norm": 1.3834638595581055, "learning_rate": 8.904761904761905e-06, "loss": 35.4229, "step": 7560 }, { "epoch": 180.0, "step": 7560, "total_flos": 3.732729299154012e+17, "train_loss": 4.428684310812168, "train_runtime": 25259.0821, "train_samples_per_second": 38.139, "train_steps_per_second": 0.299 }, { "epoch": 180.02388059701494, "grad_norm": 2.2088825702667236, "learning_rate": 1e-05, "loss": 40.1866, "step": 7561 }, { "epoch": 180.04776119402985, "grad_norm": 2.106600284576416, "learning_rate": 9.998809523809524e-06, "loss": 39.0755, "step": 7562 }, { "epoch": 180.07164179104478, "grad_norm": 1.7311856746673584, "learning_rate": 9.997619047619048e-06, "loss": 38.9738, "step": 7563 }, { "epoch": 180.0955223880597, "grad_norm": 1.7819795608520508, "learning_rate": 9.996428571428572e-06, "loss": 39.716, "step": 7564 }, { "epoch": 180.11940298507463, "grad_norm": 1.6036102771759033, "learning_rate": 9.995238095238095e-06, "loss": 40.9758, "step": 7565 }, { "epoch": 180.14328358208957, "grad_norm": 1.534388542175293, "learning_rate": 9.99404761904762e-06, "loss": 40.0883, "step": 7566 }, { "epoch": 180.16716417910447, "grad_norm": 1.3925228118896484, "learning_rate": 9.992857142857144e-06, "loss": 40.0831, "step": 7567 }, { "epoch": 180.1910447761194, "grad_norm": 1.3708882331848145, "learning_rate": 9.991666666666668e-06, "loss": 40.1074, "step": 7568 }, { "epoch": 180.21492537313432, "grad_norm": 1.426756501197815, "learning_rate": 9.990476190476191e-06, "loss": 39.0795, "step": 7569 }, { "epoch": 180.23880597014926, "grad_norm": 1.4570133686065674, "learning_rate": 9.989285714285715e-06, "loss": 40.3146, "step": 7570 }, { "epoch": 180.26268656716417, "grad_norm": 1.4728749990463257, "learning_rate": 9.988095238095239e-06, "loss": 40.9216, "step": 7571 }, { "epoch": 180.2865671641791, "grad_norm": 1.5344053506851196, "learning_rate": 9.986904761904764e-06, "loss": 38.4066, "step": 7572 }, { "epoch": 180.31044776119404, "grad_norm": 1.5063416957855225, "learning_rate": 9.985714285714286e-06, "loss": 40.6756, "step": 7573 }, { "epoch": 180.33432835820895, "grad_norm": 1.4700440168380737, "learning_rate": 9.984523809523811e-06, "loss": 40.2061, "step": 7574 }, { "epoch": 180.3582089552239, "grad_norm": 1.4829678535461426, "learning_rate": 9.983333333333333e-06, "loss": 40.0026, "step": 7575 }, { "epoch": 180.3820895522388, "grad_norm": 1.405871868133545, "learning_rate": 9.982142857142858e-06, "loss": 39.534, "step": 7576 }, { "epoch": 180.40597014925373, "grad_norm": 1.3536036014556885, "learning_rate": 9.980952380952382e-06, "loss": 39.5394, "step": 7577 }, { "epoch": 180.42985074626867, "grad_norm": 1.3740514516830444, "learning_rate": 9.979761904761906e-06, "loss": 40.6405, "step": 7578 }, { "epoch": 180.45373134328358, "grad_norm": 1.4062739610671997, "learning_rate": 9.97857142857143e-06, "loss": 40.0557, "step": 7579 }, { "epoch": 180.47761194029852, "grad_norm": 1.407299280166626, "learning_rate": 9.977380952380953e-06, "loss": 39.1047, "step": 7580 }, { "epoch": 180.50149253731342, "grad_norm": 1.384605884552002, "learning_rate": 9.976190476190477e-06, "loss": 40.1617, "step": 7581 }, { "epoch": 180.52537313432836, "grad_norm": 1.4377543926239014, "learning_rate": 9.975000000000002e-06, "loss": 39.6128, "step": 7582 }, { "epoch": 180.54925373134327, "grad_norm": 1.3995169401168823, "learning_rate": 9.973809523809524e-06, "loss": 39.5378, "step": 7583 }, { "epoch": 180.5731343283582, "grad_norm": 1.3548685312271118, "learning_rate": 9.972619047619049e-06, "loss": 40.0695, "step": 7584 }, { "epoch": 180.59701492537314, "grad_norm": 1.3918378353118896, "learning_rate": 9.971428571428571e-06, "loss": 38.761, "step": 7585 }, { "epoch": 180.62089552238805, "grad_norm": 1.365777850151062, "learning_rate": 9.970238095238096e-06, "loss": 39.5068, "step": 7586 }, { "epoch": 180.644776119403, "grad_norm": 1.3683996200561523, "learning_rate": 9.96904761904762e-06, "loss": 39.0844, "step": 7587 }, { "epoch": 180.6686567164179, "grad_norm": 1.350995421409607, "learning_rate": 9.967857142857144e-06, "loss": 38.8241, "step": 7588 }, { "epoch": 180.69253731343284, "grad_norm": 1.388558030128479, "learning_rate": 9.966666666666667e-06, "loss": 39.4747, "step": 7589 }, { "epoch": 180.71641791044777, "grad_norm": 1.321167230606079, "learning_rate": 9.965476190476191e-06, "loss": 39.7602, "step": 7590 }, { "epoch": 180.74029850746268, "grad_norm": 1.3482760190963745, "learning_rate": 9.964285714285714e-06, "loss": 40.996, "step": 7591 }, { "epoch": 180.76417910447762, "grad_norm": 1.4228594303131104, "learning_rate": 9.963095238095238e-06, "loss": 39.9121, "step": 7592 }, { "epoch": 180.78805970149253, "grad_norm": 1.369652271270752, "learning_rate": 9.961904761904763e-06, "loss": 40.6998, "step": 7593 }, { "epoch": 180.81194029850747, "grad_norm": 1.3797780275344849, "learning_rate": 9.960714285714287e-06, "loss": 39.4826, "step": 7594 }, { "epoch": 180.83582089552237, "grad_norm": 1.3203275203704834, "learning_rate": 9.95952380952381e-06, "loss": 39.3397, "step": 7595 }, { "epoch": 180.8597014925373, "grad_norm": 1.3718143701553345, "learning_rate": 9.958333333333334e-06, "loss": 39.9704, "step": 7596 }, { "epoch": 180.88358208955225, "grad_norm": 1.4263347387313843, "learning_rate": 9.957142857142858e-06, "loss": 40.4731, "step": 7597 }, { "epoch": 180.90746268656716, "grad_norm": 1.3567067384719849, "learning_rate": 9.955952380952382e-06, "loss": 39.5633, "step": 7598 }, { "epoch": 180.9313432835821, "grad_norm": 1.3206721544265747, "learning_rate": 9.954761904761905e-06, "loss": 40.6503, "step": 7599 }, { "epoch": 180.955223880597, "grad_norm": 1.354233980178833, "learning_rate": 9.953571428571429e-06, "loss": 41.1916, "step": 7600 }, { "epoch": 180.97910447761194, "grad_norm": 1.3341150283813477, "learning_rate": 9.952380952380954e-06, "loss": 38.6294, "step": 7601 }, { "epoch": 181.0, "grad_norm": 1.3148717880249023, "learning_rate": 9.951190476190476e-06, "loss": 36.0399, "step": 7602 }, { "epoch": 181.02388059701494, "grad_norm": 1.433619737625122, "learning_rate": 9.950000000000001e-06, "loss": 40.1204, "step": 7603 }, { "epoch": 181.04776119402985, "grad_norm": 1.4009112119674683, "learning_rate": 9.948809523809525e-06, "loss": 38.7094, "step": 7604 }, { "epoch": 181.07164179104478, "grad_norm": 1.365941047668457, "learning_rate": 9.947619047619049e-06, "loss": 39.1118, "step": 7605 }, { "epoch": 181.0955223880597, "grad_norm": 1.3595820665359497, "learning_rate": 9.946428571428572e-06, "loss": 39.698, "step": 7606 }, { "epoch": 181.11940298507463, "grad_norm": 1.3635787963867188, "learning_rate": 9.945238095238096e-06, "loss": 39.8703, "step": 7607 }, { "epoch": 181.14328358208957, "grad_norm": 1.3336384296417236, "learning_rate": 9.94404761904762e-06, "loss": 39.1618, "step": 7608 }, { "epoch": 181.16716417910447, "grad_norm": 1.3244030475616455, "learning_rate": 9.942857142857145e-06, "loss": 40.9476, "step": 7609 }, { "epoch": 181.1910447761194, "grad_norm": 1.3703497648239136, "learning_rate": 9.941666666666667e-06, "loss": 40.5933, "step": 7610 }, { "epoch": 181.21492537313432, "grad_norm": 1.3511179685592651, "learning_rate": 9.940476190476192e-06, "loss": 40.3532, "step": 7611 }, { "epoch": 181.23880597014926, "grad_norm": 1.2973387241363525, "learning_rate": 9.939285714285714e-06, "loss": 40.5368, "step": 7612 }, { "epoch": 181.26268656716417, "grad_norm": 1.3638863563537598, "learning_rate": 9.93809523809524e-06, "loss": 39.9985, "step": 7613 }, { "epoch": 181.2865671641791, "grad_norm": 1.3613210916519165, "learning_rate": 9.936904761904763e-06, "loss": 39.0016, "step": 7614 }, { "epoch": 181.31044776119404, "grad_norm": 1.3429604768753052, "learning_rate": 9.935714285714286e-06, "loss": 40.4619, "step": 7615 }, { "epoch": 181.33432835820895, "grad_norm": 1.361074447631836, "learning_rate": 9.93452380952381e-06, "loss": 39.9791, "step": 7616 }, { "epoch": 181.3582089552239, "grad_norm": NaN, "learning_rate": 9.933333333333334e-06, "loss": 49.2643, "step": 7617 }, { "epoch": 181.3820895522388, "grad_norm": 1.351794719696045, "learning_rate": 9.933333333333334e-06, "loss": 40.9551, "step": 7618 }, { "epoch": 181.40597014925373, "grad_norm": 1.3331366777420044, "learning_rate": 9.932142857142857e-06, "loss": 40.4961, "step": 7619 }, { "epoch": 181.42985074626867, "grad_norm": 1.3738224506378174, "learning_rate": 9.930952380952383e-06, "loss": 38.9925, "step": 7620 }, { "epoch": 181.45373134328358, "grad_norm": 1.3413071632385254, "learning_rate": 9.929761904761906e-06, "loss": 40.345, "step": 7621 }, { "epoch": 181.47761194029852, "grad_norm": 1.3372284173965454, "learning_rate": 9.92857142857143e-06, "loss": 39.5533, "step": 7622 }, { "epoch": 181.50149253731342, "grad_norm": 1.3982499837875366, "learning_rate": 9.927380952380953e-06, "loss": 40.2149, "step": 7623 }, { "epoch": 181.52537313432836, "grad_norm": 1.3323367834091187, "learning_rate": 9.926190476190477e-06, "loss": 39.7508, "step": 7624 }, { "epoch": 181.54925373134327, "grad_norm": 1.332747220993042, "learning_rate": 9.925e-06, "loss": 40.7926, "step": 7625 }, { "epoch": 181.5731343283582, "grad_norm": NaN, "learning_rate": 9.923809523809524e-06, "loss": 56.451, "step": 7626 }, { "epoch": 181.59701492537314, "grad_norm": 1.3642284870147705, "learning_rate": 9.923809523809524e-06, "loss": 39.1166, "step": 7627 }, { "epoch": 181.62089552238805, "grad_norm": 1.3867290019989014, "learning_rate": 9.922619047619048e-06, "loss": 39.9566, "step": 7628 }, { "epoch": 181.644776119403, "grad_norm": 1.3623274564743042, "learning_rate": 9.921428571428572e-06, "loss": 38.6703, "step": 7629 }, { "epoch": 181.6686567164179, "grad_norm": 1.34645676612854, "learning_rate": 9.920238095238097e-06, "loss": 40.6107, "step": 7630 }, { "epoch": 181.69253731343284, "grad_norm": 1.3394584655761719, "learning_rate": 9.91904761904762e-06, "loss": 40.2965, "step": 7631 }, { "epoch": 181.71641791044777, "grad_norm": 1.3996329307556152, "learning_rate": 9.917857142857144e-06, "loss": 40.2924, "step": 7632 }, { "epoch": 181.74029850746268, "grad_norm": 1.3611804246902466, "learning_rate": 9.916666666666668e-06, "loss": 39.3707, "step": 7633 }, { "epoch": 181.76417910447762, "grad_norm": 1.3747488260269165, "learning_rate": 9.915476190476191e-06, "loss": 38.9936, "step": 7634 }, { "epoch": 181.78805970149253, "grad_norm": 1.3544381856918335, "learning_rate": 9.914285714285715e-06, "loss": 39.6149, "step": 7635 }, { "epoch": 181.81194029850747, "grad_norm": 1.4598218202590942, "learning_rate": 9.91309523809524e-06, "loss": 38.9246, "step": 7636 }, { "epoch": 181.83582089552237, "grad_norm": 1.3805880546569824, "learning_rate": 9.911904761904762e-06, "loss": 39.9958, "step": 7637 }, { "epoch": 181.8597014925373, "grad_norm": 1.3860785961151123, "learning_rate": 9.910714285714288e-06, "loss": 40.2374, "step": 7638 }, { "epoch": 181.88358208955225, "grad_norm": 1.3572970628738403, "learning_rate": 9.90952380952381e-06, "loss": 39.8506, "step": 7639 }, { "epoch": 181.90746268656716, "grad_norm": 1.3772907257080078, "learning_rate": 9.908333333333335e-06, "loss": 40.6265, "step": 7640 }, { "epoch": 181.9313432835821, "grad_norm": 1.3353087902069092, "learning_rate": 9.907142857142858e-06, "loss": 40.1241, "step": 7641 }, { "epoch": 181.955223880597, "grad_norm": 1.3382861614227295, "learning_rate": 9.905952380952382e-06, "loss": 38.7993, "step": 7642 }, { "epoch": 181.97910447761194, "grad_norm": 1.3356451988220215, "learning_rate": 9.904761904761906e-06, "loss": 39.4023, "step": 7643 }, { "epoch": 182.0, "grad_norm": 1.4512486457824707, "learning_rate": 9.90357142857143e-06, "loss": 33.375, "step": 7644 }, { "epoch": 182.02388059701494, "grad_norm": 1.4112980365753174, "learning_rate": 9.902380952380953e-06, "loss": 38.5603, "step": 7645 }, { "epoch": 182.04776119402985, "grad_norm": 1.3651316165924072, "learning_rate": 9.901190476190476e-06, "loss": 39.9868, "step": 7646 }, { "epoch": 182.07164179104478, "grad_norm": 1.3678195476531982, "learning_rate": 9.9e-06, "loss": 40.7482, "step": 7647 }, { "epoch": 182.0955223880597, "grad_norm": 1.4099295139312744, "learning_rate": 9.898809523809525e-06, "loss": 38.8692, "step": 7648 }, { "epoch": 182.11940298507463, "grad_norm": NaN, "learning_rate": 9.897619047619047e-06, "loss": 59.7388, "step": 7649 }, { "epoch": 182.14328358208957, "grad_norm": 1.381217360496521, "learning_rate": 9.897619047619047e-06, "loss": 39.7727, "step": 7650 }, { "epoch": 182.16716417910447, "grad_norm": 1.361299991607666, "learning_rate": 9.896428571428573e-06, "loss": 39.0869, "step": 7651 }, { "epoch": 182.1910447761194, "grad_norm": 1.4074339866638184, "learning_rate": 9.895238095238096e-06, "loss": 39.7157, "step": 7652 }, { "epoch": 182.21492537313432, "grad_norm": 1.353183627128601, "learning_rate": 9.89404761904762e-06, "loss": 40.7032, "step": 7653 }, { "epoch": 182.23880597014926, "grad_norm": 1.338567852973938, "learning_rate": 9.892857142857143e-06, "loss": 39.1298, "step": 7654 }, { "epoch": 182.26268656716417, "grad_norm": 1.3717310428619385, "learning_rate": 9.891666666666667e-06, "loss": 38.6227, "step": 7655 }, { "epoch": 182.2865671641791, "grad_norm": 1.3313946723937988, "learning_rate": 9.89047619047619e-06, "loss": 39.6194, "step": 7656 }, { "epoch": 182.31044776119404, "grad_norm": 1.4169702529907227, "learning_rate": 9.889285714285714e-06, "loss": 41.159, "step": 7657 }, { "epoch": 182.33432835820895, "grad_norm": 1.3278824090957642, "learning_rate": 9.88809523809524e-06, "loss": 39.7653, "step": 7658 }, { "epoch": 182.3582089552239, "grad_norm": 1.3234167098999023, "learning_rate": 9.886904761904763e-06, "loss": 40.8534, "step": 7659 }, { "epoch": 182.3820895522388, "grad_norm": 1.306930422782898, "learning_rate": 9.885714285714287e-06, "loss": 40.4539, "step": 7660 }, { "epoch": 182.40597014925373, "grad_norm": 1.3555806875228882, "learning_rate": 9.88452380952381e-06, "loss": 39.7276, "step": 7661 }, { "epoch": 182.42985074626867, "grad_norm": 1.355045199394226, "learning_rate": 9.883333333333334e-06, "loss": 39.3548, "step": 7662 }, { "epoch": 182.45373134328358, "grad_norm": 1.331205129623413, "learning_rate": 9.882142857142858e-06, "loss": 40.1372, "step": 7663 }, { "epoch": 182.47761194029852, "grad_norm": 1.3852670192718506, "learning_rate": 9.880952380952381e-06, "loss": 40.9479, "step": 7664 }, { "epoch": 182.50149253731342, "grad_norm": 1.3578859567642212, "learning_rate": 9.879761904761905e-06, "loss": 39.7828, "step": 7665 }, { "epoch": 182.52537313432836, "grad_norm": 1.3850160837173462, "learning_rate": 9.87857142857143e-06, "loss": 39.1251, "step": 7666 }, { "epoch": 182.54925373134327, "grad_norm": 1.3344089984893799, "learning_rate": 9.877380952380952e-06, "loss": 39.0608, "step": 7667 }, { "epoch": 182.5731343283582, "grad_norm": 1.3278048038482666, "learning_rate": 9.876190476190478e-06, "loss": 39.418, "step": 7668 }, { "epoch": 182.59701492537314, "grad_norm": 1.3549968004226685, "learning_rate": 9.875000000000001e-06, "loss": 39.263, "step": 7669 }, { "epoch": 182.62089552238805, "grad_norm": 1.3217283487319946, "learning_rate": 9.873809523809525e-06, "loss": 39.2198, "step": 7670 }, { "epoch": 182.644776119403, "grad_norm": 1.3923081159591675, "learning_rate": 9.872619047619048e-06, "loss": 38.9872, "step": 7671 }, { "epoch": 182.6686567164179, "grad_norm": 1.3721230030059814, "learning_rate": 9.871428571428572e-06, "loss": 40.3424, "step": 7672 }, { "epoch": 182.69253731343284, "grad_norm": 1.3639194965362549, "learning_rate": 9.870238095238096e-06, "loss": 40.2026, "step": 7673 }, { "epoch": 182.71641791044777, "grad_norm": 1.3361389636993408, "learning_rate": 9.869047619047621e-06, "loss": 40.18, "step": 7674 }, { "epoch": 182.74029850746268, "grad_norm": 1.3317526578903198, "learning_rate": 9.867857142857143e-06, "loss": 39.4667, "step": 7675 }, { "epoch": 182.76417910447762, "grad_norm": 1.356610655784607, "learning_rate": 9.866666666666668e-06, "loss": 39.9026, "step": 7676 }, { "epoch": 182.78805970149253, "grad_norm": 1.3221778869628906, "learning_rate": 9.86547619047619e-06, "loss": 38.8787, "step": 7677 }, { "epoch": 182.81194029850747, "grad_norm": 1.3412845134735107, "learning_rate": 9.864285714285715e-06, "loss": 40.732, "step": 7678 }, { "epoch": 182.83582089552237, "grad_norm": 1.3659319877624512, "learning_rate": 9.863095238095239e-06, "loss": 39.3553, "step": 7679 }, { "epoch": 182.8597014925373, "grad_norm": 1.3602638244628906, "learning_rate": 9.861904761904763e-06, "loss": 39.6883, "step": 7680 }, { "epoch": 182.88358208955225, "grad_norm": NaN, "learning_rate": 9.860714285714286e-06, "loss": 59.7634, "step": 7681 }, { "epoch": 182.90746268656716, "grad_norm": 1.3570003509521484, "learning_rate": 9.860714285714286e-06, "loss": 39.7645, "step": 7682 }, { "epoch": 182.9313432835821, "grad_norm": 1.3640658855438232, "learning_rate": 9.85952380952381e-06, "loss": 40.5915, "step": 7683 }, { "epoch": 182.955223880597, "grad_norm": 1.3689199686050415, "learning_rate": 9.858333333333334e-06, "loss": 40.9116, "step": 7684 }, { "epoch": 182.97910447761194, "grad_norm": 1.3507763147354126, "learning_rate": 9.857142857142859e-06, "loss": 40.5916, "step": 7685 }, { "epoch": 183.0, "grad_norm": 1.324217438697815, "learning_rate": 9.85595238095238e-06, "loss": 35.4359, "step": 7686 }, { "epoch": 183.02388059701494, "grad_norm": 1.4101989269256592, "learning_rate": 9.854761904761906e-06, "loss": 38.6298, "step": 7687 }, { "epoch": 183.04776119402985, "grad_norm": 1.3475905656814575, "learning_rate": 9.85357142857143e-06, "loss": 39.9775, "step": 7688 }, { "epoch": 183.07164179104478, "grad_norm": 1.3737767934799194, "learning_rate": 9.852380952380953e-06, "loss": 39.9531, "step": 7689 }, { "epoch": 183.0955223880597, "grad_norm": 1.3482345342636108, "learning_rate": 9.851190476190477e-06, "loss": 40.3591, "step": 7690 }, { "epoch": 183.11940298507463, "grad_norm": 1.3356105089187622, "learning_rate": 9.85e-06, "loss": 39.6386, "step": 7691 }, { "epoch": 183.14328358208957, "grad_norm": 1.3852757215499878, "learning_rate": 9.848809523809524e-06, "loss": 39.8243, "step": 7692 }, { "epoch": 183.16716417910447, "grad_norm": 1.347570538520813, "learning_rate": 9.847619047619048e-06, "loss": 39.9031, "step": 7693 }, { "epoch": 183.1910447761194, "grad_norm": 1.417081356048584, "learning_rate": 9.846428571428573e-06, "loss": 39.0608, "step": 7694 }, { "epoch": 183.21492537313432, "grad_norm": 1.3714817762374878, "learning_rate": 9.845238095238097e-06, "loss": 39.9464, "step": 7695 }, { "epoch": 183.23880597014926, "grad_norm": 1.3312629461288452, "learning_rate": 9.84404761904762e-06, "loss": 41.4321, "step": 7696 }, { "epoch": 183.26268656716417, "grad_norm": 1.3345277309417725, "learning_rate": 9.842857142857144e-06, "loss": 40.0764, "step": 7697 }, { "epoch": 183.2865671641791, "grad_norm": 1.3040674924850464, "learning_rate": 9.841666666666668e-06, "loss": 41.0231, "step": 7698 }, { "epoch": 183.31044776119404, "grad_norm": 1.3859965801239014, "learning_rate": 9.840476190476191e-06, "loss": 39.2538, "step": 7699 }, { "epoch": 183.33432835820895, "grad_norm": 1.3410440683364868, "learning_rate": 9.839285714285715e-06, "loss": 40.766, "step": 7700 }, { "epoch": 183.3582089552239, "grad_norm": 1.3513861894607544, "learning_rate": 9.838095238095238e-06, "loss": 40.5238, "step": 7701 }, { "epoch": 183.3820895522388, "grad_norm": 1.3854436874389648, "learning_rate": 9.836904761904764e-06, "loss": 39.6086, "step": 7702 }, { "epoch": 183.40597014925373, "grad_norm": 1.3095662593841553, "learning_rate": 9.835714285714286e-06, "loss": 40.818, "step": 7703 }, { "epoch": 183.42985074626867, "grad_norm": 1.3519278764724731, "learning_rate": 9.834523809523811e-06, "loss": 39.694, "step": 7704 }, { "epoch": 183.45373134328358, "grad_norm": 1.3618355989456177, "learning_rate": 9.833333333333333e-06, "loss": 38.9571, "step": 7705 }, { "epoch": 183.47761194029852, "grad_norm": 1.3487801551818848, "learning_rate": 9.832142857142858e-06, "loss": 39.7837, "step": 7706 }, { "epoch": 183.50149253731342, "grad_norm": 1.3269516229629517, "learning_rate": 9.830952380952382e-06, "loss": 41.0883, "step": 7707 }, { "epoch": 183.52537313432836, "grad_norm": 1.3513522148132324, "learning_rate": 9.829761904761905e-06, "loss": 40.4022, "step": 7708 }, { "epoch": 183.54925373134327, "grad_norm": 1.3476548194885254, "learning_rate": 9.828571428571429e-06, "loss": 39.0225, "step": 7709 }, { "epoch": 183.5731343283582, "grad_norm": 1.3726063966751099, "learning_rate": 9.827380952380953e-06, "loss": 39.937, "step": 7710 }, { "epoch": 183.59701492537314, "grad_norm": 1.3789716958999634, "learning_rate": 9.826190476190476e-06, "loss": 38.2886, "step": 7711 }, { "epoch": 183.62089552238805, "grad_norm": 1.4317967891693115, "learning_rate": 9.825000000000002e-06, "loss": 39.2982, "step": 7712 }, { "epoch": 183.644776119403, "grad_norm": 1.3613648414611816, "learning_rate": 9.823809523809524e-06, "loss": 39.2467, "step": 7713 }, { "epoch": 183.6686567164179, "grad_norm": 1.3666434288024902, "learning_rate": 9.822619047619049e-06, "loss": 40.021, "step": 7714 }, { "epoch": 183.69253731343284, "grad_norm": 1.317740797996521, "learning_rate": 9.821428571428573e-06, "loss": 40.8253, "step": 7715 }, { "epoch": 183.71641791044777, "grad_norm": 1.375329613685608, "learning_rate": 9.820238095238096e-06, "loss": 39.577, "step": 7716 }, { "epoch": 183.74029850746268, "grad_norm": 1.37083899974823, "learning_rate": 9.81904761904762e-06, "loss": 40.2937, "step": 7717 }, { "epoch": 183.76417910447762, "grad_norm": 1.3439760208129883, "learning_rate": 9.817857142857143e-06, "loss": 39.5475, "step": 7718 }, { "epoch": 183.78805970149253, "grad_norm": 1.3243168592453003, "learning_rate": 9.816666666666667e-06, "loss": 40.4133, "step": 7719 }, { "epoch": 183.81194029850747, "grad_norm": 1.350960612297058, "learning_rate": 9.81547619047619e-06, "loss": 39.8991, "step": 7720 }, { "epoch": 183.83582089552237, "grad_norm": 1.352108359336853, "learning_rate": 9.814285714285716e-06, "loss": 39.3185, "step": 7721 }, { "epoch": 183.8597014925373, "grad_norm": 1.311669945716858, "learning_rate": 9.81309523809524e-06, "loss": 39.912, "step": 7722 }, { "epoch": 183.88358208955225, "grad_norm": 1.3697714805603027, "learning_rate": 9.811904761904763e-06, "loss": 39.1127, "step": 7723 }, { "epoch": 183.90746268656716, "grad_norm": 1.3384363651275635, "learning_rate": 9.810714285714287e-06, "loss": 40.0184, "step": 7724 }, { "epoch": 183.9313432835821, "grad_norm": 1.3691613674163818, "learning_rate": 9.80952380952381e-06, "loss": 39.9864, "step": 7725 }, { "epoch": 183.955223880597, "grad_norm": 1.3862379789352417, "learning_rate": 9.808333333333334e-06, "loss": 39.2231, "step": 7726 }, { "epoch": 183.97910447761194, "grad_norm": 1.3655441999435425, "learning_rate": 9.807142857142858e-06, "loss": 39.0694, "step": 7727 }, { "epoch": 184.0, "grad_norm": 1.3544189929962158, "learning_rate": 9.805952380952381e-06, "loss": 33.6012, "step": 7728 }, { "epoch": 184.02388059701494, "grad_norm": 1.3782861232757568, "learning_rate": 9.804761904761907e-06, "loss": 39.4896, "step": 7729 }, { "epoch": 184.04776119402985, "grad_norm": 1.3348222970962524, "learning_rate": 9.803571428571428e-06, "loss": 40.8086, "step": 7730 }, { "epoch": 184.07164179104478, "grad_norm": 1.3384095430374146, "learning_rate": 9.802380952380954e-06, "loss": 40.5666, "step": 7731 }, { "epoch": 184.0955223880597, "grad_norm": 1.3698259592056274, "learning_rate": 9.801190476190477e-06, "loss": 39.6987, "step": 7732 }, { "epoch": 184.11940298507463, "grad_norm": 1.3628394603729248, "learning_rate": 9.800000000000001e-06, "loss": 40.5131, "step": 7733 }, { "epoch": 184.14328358208957, "grad_norm": 1.3511914014816284, "learning_rate": 9.798809523809525e-06, "loss": 39.0674, "step": 7734 }, { "epoch": 184.16716417910447, "grad_norm": 1.3400299549102783, "learning_rate": 9.797619047619048e-06, "loss": 39.3678, "step": 7735 }, { "epoch": 184.1910447761194, "grad_norm": 1.3553309440612793, "learning_rate": 9.796428571428572e-06, "loss": 39.8838, "step": 7736 }, { "epoch": 184.21492537313432, "grad_norm": 1.3506001234054565, "learning_rate": 9.795238095238097e-06, "loss": 40.0269, "step": 7737 }, { "epoch": 184.23880597014926, "grad_norm": 1.347055435180664, "learning_rate": 9.794047619047619e-06, "loss": 40.4866, "step": 7738 }, { "epoch": 184.26268656716417, "grad_norm": 1.3336387872695923, "learning_rate": 9.792857142857144e-06, "loss": 40.2095, "step": 7739 }, { "epoch": 184.2865671641791, "grad_norm": 1.3379324674606323, "learning_rate": 9.791666666666666e-06, "loss": 39.797, "step": 7740 }, { "epoch": 184.31044776119404, "grad_norm": 1.396721601486206, "learning_rate": 9.790476190476192e-06, "loss": 41.1736, "step": 7741 }, { "epoch": 184.33432835820895, "grad_norm": 1.333448052406311, "learning_rate": 9.789285714285715e-06, "loss": 39.5097, "step": 7742 }, { "epoch": 184.3582089552239, "grad_norm": 1.3747590780258179, "learning_rate": 9.788095238095239e-06, "loss": 40.4251, "step": 7743 }, { "epoch": 184.3820895522388, "grad_norm": 1.3135462999343872, "learning_rate": 9.786904761904763e-06, "loss": 39.8919, "step": 7744 }, { "epoch": 184.40597014925373, "grad_norm": 1.405326247215271, "learning_rate": 9.785714285714286e-06, "loss": 39.4661, "step": 7745 }, { "epoch": 184.42985074626867, "grad_norm": 1.378355622291565, "learning_rate": 9.78452380952381e-06, "loss": 39.9771, "step": 7746 }, { "epoch": 184.45373134328358, "grad_norm": 1.3213404417037964, "learning_rate": 9.783333333333335e-06, "loss": 39.6865, "step": 7747 }, { "epoch": 184.47761194029852, "grad_norm": 1.3477661609649658, "learning_rate": 9.782142857142857e-06, "loss": 40.3212, "step": 7748 }, { "epoch": 184.50149253731342, "grad_norm": 1.3737239837646484, "learning_rate": 9.780952380952382e-06, "loss": 39.926, "step": 7749 }, { "epoch": 184.52537313432836, "grad_norm": 1.3777309656143188, "learning_rate": 9.779761904761906e-06, "loss": 37.389, "step": 7750 }, { "epoch": 184.54925373134327, "grad_norm": 1.3731151819229126, "learning_rate": 9.77857142857143e-06, "loss": 39.9557, "step": 7751 }, { "epoch": 184.5731343283582, "grad_norm": 1.358465313911438, "learning_rate": 9.777380952380953e-06, "loss": 38.0022, "step": 7752 }, { "epoch": 184.59701492537314, "grad_norm": 1.351117730140686, "learning_rate": 9.776190476190477e-06, "loss": 39.8206, "step": 7753 }, { "epoch": 184.62089552238805, "grad_norm": 1.3350672721862793, "learning_rate": 9.775e-06, "loss": 40.0741, "step": 7754 }, { "epoch": 184.644776119403, "grad_norm": 1.4243212938308716, "learning_rate": 9.773809523809524e-06, "loss": 40.1234, "step": 7755 }, { "epoch": 184.6686567164179, "grad_norm": 1.3239147663116455, "learning_rate": 9.77261904761905e-06, "loss": 40.8127, "step": 7756 }, { "epoch": 184.69253731343284, "grad_norm": 1.3312865495681763, "learning_rate": 9.771428571428571e-06, "loss": 41.0657, "step": 7757 }, { "epoch": 184.71641791044777, "grad_norm": 1.3680845499038696, "learning_rate": 9.770238095238097e-06, "loss": 41.3247, "step": 7758 }, { "epoch": 184.74029850746268, "grad_norm": 1.321744680404663, "learning_rate": 9.76904761904762e-06, "loss": 38.9973, "step": 7759 }, { "epoch": 184.76417910447762, "grad_norm": 1.3310075998306274, "learning_rate": 9.767857142857144e-06, "loss": 39.7813, "step": 7760 }, { "epoch": 184.78805970149253, "grad_norm": 1.4447020292282104, "learning_rate": 9.766666666666667e-06, "loss": 39.2111, "step": 7761 }, { "epoch": 184.81194029850747, "grad_norm": 1.407461404800415, "learning_rate": 9.765476190476191e-06, "loss": 39.3313, "step": 7762 }, { "epoch": 184.83582089552237, "grad_norm": 1.3727232217788696, "learning_rate": 9.764285714285715e-06, "loss": 39.3381, "step": 7763 }, { "epoch": 184.8597014925373, "grad_norm": 1.364490270614624, "learning_rate": 9.76309523809524e-06, "loss": 39.6084, "step": 7764 }, { "epoch": 184.88358208955225, "grad_norm": 1.4031805992126465, "learning_rate": 9.761904761904762e-06, "loss": 40.8163, "step": 7765 }, { "epoch": 184.90746268656716, "grad_norm": 1.3821228742599487, "learning_rate": 9.760714285714287e-06, "loss": 38.0195, "step": 7766 }, { "epoch": 184.9313432835821, "grad_norm": 1.3420741558074951, "learning_rate": 9.75952380952381e-06, "loss": 39.5857, "step": 7767 }, { "epoch": 184.955223880597, "grad_norm": 1.3301208019256592, "learning_rate": 9.758333333333334e-06, "loss": 40.2771, "step": 7768 }, { "epoch": 184.97910447761194, "grad_norm": 1.3781335353851318, "learning_rate": 9.757142857142858e-06, "loss": 39.2352, "step": 7769 }, { "epoch": 185.0, "grad_norm": 1.3316543102264404, "learning_rate": 9.755952380952382e-06, "loss": 34.2499, "step": 7770 }, { "epoch": 185.02388059701494, "grad_norm": 1.3717637062072754, "learning_rate": 9.754761904761905e-06, "loss": 40.2196, "step": 7771 }, { "epoch": 185.04776119402985, "grad_norm": 1.3722270727157593, "learning_rate": 9.753571428571429e-06, "loss": 39.8995, "step": 7772 }, { "epoch": 185.07164179104478, "grad_norm": 1.2897300720214844, "learning_rate": 9.752380952380953e-06, "loss": 40.652, "step": 7773 }, { "epoch": 185.0955223880597, "grad_norm": 1.3370007276535034, "learning_rate": 9.751190476190478e-06, "loss": 38.1557, "step": 7774 }, { "epoch": 185.11940298507463, "grad_norm": 1.337791919708252, "learning_rate": 9.75e-06, "loss": 40.3637, "step": 7775 }, { "epoch": 185.14328358208957, "grad_norm": 1.3993209600448608, "learning_rate": 9.748809523809525e-06, "loss": 39.7157, "step": 7776 }, { "epoch": 185.16716417910447, "grad_norm": 1.366675853729248, "learning_rate": 9.747619047619049e-06, "loss": 39.5779, "step": 7777 }, { "epoch": 185.1910447761194, "grad_norm": 1.331112027168274, "learning_rate": 9.746428571428572e-06, "loss": 41.1373, "step": 7778 }, { "epoch": 185.21492537313432, "grad_norm": 1.3376224040985107, "learning_rate": 9.745238095238096e-06, "loss": 39.5635, "step": 7779 }, { "epoch": 185.23880597014926, "grad_norm": 1.3490349054336548, "learning_rate": 9.74404761904762e-06, "loss": 40.2915, "step": 7780 }, { "epoch": 185.26268656716417, "grad_norm": 1.3844152688980103, "learning_rate": 9.742857142857143e-06, "loss": 39.7004, "step": 7781 }, { "epoch": 185.2865671641791, "grad_norm": 1.342834234237671, "learning_rate": 9.741666666666667e-06, "loss": 39.6637, "step": 7782 }, { "epoch": 185.31044776119404, "grad_norm": 1.3364394903182983, "learning_rate": 9.74047619047619e-06, "loss": 40.1719, "step": 7783 }, { "epoch": 185.33432835820895, "grad_norm": 1.3479561805725098, "learning_rate": 9.739285714285716e-06, "loss": 39.7646, "step": 7784 }, { "epoch": 185.3582089552239, "grad_norm": 1.3735675811767578, "learning_rate": 9.73809523809524e-06, "loss": 40.4896, "step": 7785 }, { "epoch": 185.3820895522388, "grad_norm": 1.3733397722244263, "learning_rate": 9.736904761904763e-06, "loss": 39.6166, "step": 7786 }, { "epoch": 185.40597014925373, "grad_norm": 1.3372589349746704, "learning_rate": 9.735714285714287e-06, "loss": 38.6443, "step": 7787 }, { "epoch": 185.42985074626867, "grad_norm": 1.3480873107910156, "learning_rate": 9.73452380952381e-06, "loss": 40.1823, "step": 7788 }, { "epoch": 185.45373134328358, "grad_norm": 1.4321746826171875, "learning_rate": 9.733333333333334e-06, "loss": 39.1283, "step": 7789 }, { "epoch": 185.47761194029852, "grad_norm": 1.346120834350586, "learning_rate": 9.732142857142858e-06, "loss": 39.28, "step": 7790 }, { "epoch": 185.50149253731342, "grad_norm": 1.3661071062088013, "learning_rate": 9.730952380952383e-06, "loss": 39.1474, "step": 7791 }, { "epoch": 185.52537313432836, "grad_norm": 1.3773843050003052, "learning_rate": 9.729761904761905e-06, "loss": 38.9534, "step": 7792 }, { "epoch": 185.54925373134327, "grad_norm": 1.3678182363510132, "learning_rate": 9.72857142857143e-06, "loss": 41.5843, "step": 7793 }, { "epoch": 185.5731343283582, "grad_norm": 1.3333873748779297, "learning_rate": 9.727380952380954e-06, "loss": 40.0667, "step": 7794 }, { "epoch": 185.59701492537314, "grad_norm": 1.3682844638824463, "learning_rate": 9.726190476190477e-06, "loss": 38.6821, "step": 7795 }, { "epoch": 185.62089552238805, "grad_norm": 1.808711051940918, "learning_rate": 9.725000000000001e-06, "loss": 40.2236, "step": 7796 }, { "epoch": 185.644776119403, "grad_norm": 1.3760195970535278, "learning_rate": 9.723809523809525e-06, "loss": 39.704, "step": 7797 }, { "epoch": 185.6686567164179, "grad_norm": 1.3244855403900146, "learning_rate": 9.722619047619048e-06, "loss": 40.3644, "step": 7798 }, { "epoch": 185.69253731343284, "grad_norm": 1.36043381690979, "learning_rate": 9.721428571428573e-06, "loss": 40.8146, "step": 7799 }, { "epoch": 185.71641791044777, "grad_norm": 1.3734409809112549, "learning_rate": 9.720238095238095e-06, "loss": 38.3786, "step": 7800 }, { "epoch": 185.74029850746268, "grad_norm": 1.3965812921524048, "learning_rate": 9.71904761904762e-06, "loss": 40.104, "step": 7801 }, { "epoch": 185.76417910447762, "grad_norm": 1.3585809469223022, "learning_rate": 9.717857142857143e-06, "loss": 39.6808, "step": 7802 }, { "epoch": 185.78805970149253, "grad_norm": 1.3542767763137817, "learning_rate": 9.716666666666668e-06, "loss": 39.0759, "step": 7803 }, { "epoch": 185.81194029850747, "grad_norm": 1.3232330083847046, "learning_rate": 9.715476190476192e-06, "loss": 39.8017, "step": 7804 }, { "epoch": 185.83582089552237, "grad_norm": 1.4800019264221191, "learning_rate": 9.714285714285715e-06, "loss": 40.5656, "step": 7805 }, { "epoch": 185.8597014925373, "grad_norm": 1.3827204704284668, "learning_rate": 9.713095238095239e-06, "loss": 40.3439, "step": 7806 }, { "epoch": 185.88358208955225, "grad_norm": 1.3550817966461182, "learning_rate": 9.711904761904762e-06, "loss": 40.155, "step": 7807 }, { "epoch": 185.90746268656716, "grad_norm": 1.7168573141098022, "learning_rate": 9.710714285714286e-06, "loss": 40.3376, "step": 7808 }, { "epoch": 185.9313432835821, "grad_norm": 1.3770028352737427, "learning_rate": 9.70952380952381e-06, "loss": 38.9747, "step": 7809 }, { "epoch": 185.955223880597, "grad_norm": 1.3534685373306274, "learning_rate": 9.708333333333333e-06, "loss": 40.5622, "step": 7810 }, { "epoch": 185.97910447761194, "grad_norm": 1.3348649740219116, "learning_rate": 9.707142857142859e-06, "loss": 39.5631, "step": 7811 }, { "epoch": 186.0, "grad_norm": 1.3146597146987915, "learning_rate": 9.705952380952382e-06, "loss": 33.6458, "step": 7812 }, { "epoch": 186.02388059701494, "grad_norm": 1.3513083457946777, "learning_rate": 9.704761904761906e-06, "loss": 40.3473, "step": 7813 }, { "epoch": 186.04776119402985, "grad_norm": 1.3351701498031616, "learning_rate": 9.70357142857143e-06, "loss": 39.8729, "step": 7814 }, { "epoch": 186.07164179104478, "grad_norm": 1.3340144157409668, "learning_rate": 9.702380952380953e-06, "loss": 40.1869, "step": 7815 }, { "epoch": 186.0955223880597, "grad_norm": 1.3248921632766724, "learning_rate": 9.701190476190477e-06, "loss": 38.6531, "step": 7816 }, { "epoch": 186.11940298507463, "grad_norm": 1.359162449836731, "learning_rate": 9.7e-06, "loss": 39.5863, "step": 7817 }, { "epoch": 186.14328358208957, "grad_norm": 1.3346376419067383, "learning_rate": 9.698809523809526e-06, "loss": 40.0428, "step": 7818 }, { "epoch": 186.16716417910447, "grad_norm": 1.3426218032836914, "learning_rate": 9.697619047619048e-06, "loss": 39.5507, "step": 7819 }, { "epoch": 186.1910447761194, "grad_norm": 1.3662432432174683, "learning_rate": 9.696428571428573e-06, "loss": 41.4172, "step": 7820 }, { "epoch": 186.21492537313432, "grad_norm": 1.3642263412475586, "learning_rate": 9.695238095238096e-06, "loss": 39.9677, "step": 7821 }, { "epoch": 186.23880597014926, "grad_norm": 1.3355430364608765, "learning_rate": 9.69404761904762e-06, "loss": 39.5065, "step": 7822 }, { "epoch": 186.26268656716417, "grad_norm": 1.3937400579452515, "learning_rate": 9.692857142857144e-06, "loss": 39.6055, "step": 7823 }, { "epoch": 186.2865671641791, "grad_norm": 1.3605906963348389, "learning_rate": 9.691666666666667e-06, "loss": 40.0683, "step": 7824 }, { "epoch": 186.31044776119404, "grad_norm": 1.4008375406265259, "learning_rate": 9.690476190476191e-06, "loss": 39.1216, "step": 7825 }, { "epoch": 186.33432835820895, "grad_norm": 1.3750519752502441, "learning_rate": 9.689285714285716e-06, "loss": 40.5142, "step": 7826 }, { "epoch": 186.3582089552239, "grad_norm": 1.3450565338134766, "learning_rate": 9.688095238095238e-06, "loss": 39.9726, "step": 7827 }, { "epoch": 186.3820895522388, "grad_norm": 1.3473477363586426, "learning_rate": 9.686904761904764e-06, "loss": 40.276, "step": 7828 }, { "epoch": 186.40597014925373, "grad_norm": 1.33859384059906, "learning_rate": 9.685714285714285e-06, "loss": 39.3928, "step": 7829 }, { "epoch": 186.42985074626867, "grad_norm": 1.3450857400894165, "learning_rate": 9.68452380952381e-06, "loss": 40.6313, "step": 7830 }, { "epoch": 186.45373134328358, "grad_norm": 1.383236289024353, "learning_rate": 9.683333333333334e-06, "loss": 39.7702, "step": 7831 }, { "epoch": 186.47761194029852, "grad_norm": 1.3860677480697632, "learning_rate": 9.682142857142858e-06, "loss": 40.5862, "step": 7832 }, { "epoch": 186.50149253731342, "grad_norm": 1.3554397821426392, "learning_rate": 9.680952380952382e-06, "loss": 39.3825, "step": 7833 }, { "epoch": 186.52537313432836, "grad_norm": 1.3280023336410522, "learning_rate": 9.679761904761905e-06, "loss": 40.089, "step": 7834 }, { "epoch": 186.54925373134327, "grad_norm": 1.3673008680343628, "learning_rate": 9.678571428571429e-06, "loss": 39.4143, "step": 7835 }, { "epoch": 186.5731343283582, "grad_norm": 1.3212974071502686, "learning_rate": 9.677380952380954e-06, "loss": 39.2837, "step": 7836 }, { "epoch": 186.59701492537314, "grad_norm": 1.3630690574645996, "learning_rate": 9.676190476190476e-06, "loss": 40.8979, "step": 7837 }, { "epoch": 186.62089552238805, "grad_norm": 1.3461096286773682, "learning_rate": 9.675000000000001e-06, "loss": 40.3701, "step": 7838 }, { "epoch": 186.644776119403, "grad_norm": 1.3191614151000977, "learning_rate": 9.673809523809525e-06, "loss": 39.8343, "step": 7839 }, { "epoch": 186.6686567164179, "grad_norm": 1.344325065612793, "learning_rate": 9.672619047619049e-06, "loss": 40.4083, "step": 7840 }, { "epoch": 186.69253731343284, "grad_norm": 1.3655611276626587, "learning_rate": 9.671428571428572e-06, "loss": 39.2549, "step": 7841 }, { "epoch": 186.71641791044777, "grad_norm": 1.3680347204208374, "learning_rate": 9.670238095238096e-06, "loss": 38.3742, "step": 7842 }, { "epoch": 186.74029850746268, "grad_norm": 1.3389077186584473, "learning_rate": 9.66904761904762e-06, "loss": 40.5457, "step": 7843 }, { "epoch": 186.76417910447762, "grad_norm": 1.3387905359268188, "learning_rate": 9.667857142857143e-06, "loss": 39.6538, "step": 7844 }, { "epoch": 186.78805970149253, "grad_norm": 1.390548586845398, "learning_rate": 9.666666666666667e-06, "loss": 39.151, "step": 7845 }, { "epoch": 186.81194029850747, "grad_norm": 1.3522568941116333, "learning_rate": 9.665476190476192e-06, "loss": 41.1206, "step": 7846 }, { "epoch": 186.83582089552237, "grad_norm": 1.355774164199829, "learning_rate": 9.664285714285716e-06, "loss": 39.9971, "step": 7847 }, { "epoch": 186.8597014925373, "grad_norm": 1.4954625368118286, "learning_rate": 9.66309523809524e-06, "loss": 39.2659, "step": 7848 }, { "epoch": 186.88358208955225, "grad_norm": 1.3742986917495728, "learning_rate": 9.661904761904763e-06, "loss": 39.7538, "step": 7849 }, { "epoch": 186.90746268656716, "grad_norm": 1.3273199796676636, "learning_rate": 9.660714285714287e-06, "loss": 38.6917, "step": 7850 }, { "epoch": 186.9313432835821, "grad_norm": NaN, "learning_rate": 9.65952380952381e-06, "loss": 67.7802, "step": 7851 }, { "epoch": 186.955223880597, "grad_norm": 1.3542839288711548, "learning_rate": 9.65952380952381e-06, "loss": 39.4139, "step": 7852 }, { "epoch": 186.97910447761194, "grad_norm": 1.3380635976791382, "learning_rate": 9.658333333333334e-06, "loss": 40.162, "step": 7853 }, { "epoch": 187.0, "grad_norm": 1.3620200157165527, "learning_rate": 9.657142857142859e-06, "loss": 33.7088, "step": 7854 }, { "epoch": 187.02388059701494, "grad_norm": 1.361411690711975, "learning_rate": 9.655952380952381e-06, "loss": 39.8832, "step": 7855 }, { "epoch": 187.04776119402985, "grad_norm": NaN, "learning_rate": 9.654761904761906e-06, "loss": 58.7697, "step": 7856 }, { "epoch": 187.07164179104478, "grad_norm": 1.2959736585617065, "learning_rate": 9.654761904761906e-06, "loss": 38.9787, "step": 7857 }, { "epoch": 187.0955223880597, "grad_norm": 1.382960557937622, "learning_rate": 9.653571428571428e-06, "loss": 39.0562, "step": 7858 }, { "epoch": 187.11940298507463, "grad_norm": 1.3307896852493286, "learning_rate": 9.652380952380954e-06, "loss": 40.2733, "step": 7859 }, { "epoch": 187.14328358208957, "grad_norm": 1.398577094078064, "learning_rate": 9.651190476190477e-06, "loss": 39.1492, "step": 7860 }, { "epoch": 187.16716417910447, "grad_norm": 1.3510396480560303, "learning_rate": 9.65e-06, "loss": 38.8411, "step": 7861 }, { "epoch": 187.1910447761194, "grad_norm": 1.334363341331482, "learning_rate": 9.648809523809524e-06, "loss": 40.1834, "step": 7862 }, { "epoch": 187.21492537313432, "grad_norm": 1.3405667543411255, "learning_rate": 9.647619047619048e-06, "loss": 39.4658, "step": 7863 }, { "epoch": 187.23880597014926, "grad_norm": 1.3481136560440063, "learning_rate": 9.646428571428572e-06, "loss": 40.2951, "step": 7864 }, { "epoch": 187.26268656716417, "grad_norm": 1.3307167291641235, "learning_rate": 9.645238095238097e-06, "loss": 39.8486, "step": 7865 }, { "epoch": 187.2865671641791, "grad_norm": 1.3630717992782593, "learning_rate": 9.644047619047619e-06, "loss": 40.2901, "step": 7866 }, { "epoch": 187.31044776119404, "grad_norm": 1.3555333614349365, "learning_rate": 9.642857142857144e-06, "loss": 39.1419, "step": 7867 }, { "epoch": 187.33432835820895, "grad_norm": 1.331682801246643, "learning_rate": 9.641666666666666e-06, "loss": 40.2512, "step": 7868 }, { "epoch": 187.3582089552239, "grad_norm": 1.4857771396636963, "learning_rate": 9.640476190476191e-06, "loss": 40.0353, "step": 7869 }, { "epoch": 187.3820895522388, "grad_norm": 1.3247640132904053, "learning_rate": 9.639285714285715e-06, "loss": 39.877, "step": 7870 }, { "epoch": 187.40597014925373, "grad_norm": 1.33641517162323, "learning_rate": 9.638095238095239e-06, "loss": 40.0425, "step": 7871 }, { "epoch": 187.42985074626867, "grad_norm": 1.3539186716079712, "learning_rate": 9.636904761904762e-06, "loss": 40.7108, "step": 7872 }, { "epoch": 187.45373134328358, "grad_norm": 1.3171097040176392, "learning_rate": 9.635714285714286e-06, "loss": 38.7593, "step": 7873 }, { "epoch": 187.47761194029852, "grad_norm": 1.3663147687911987, "learning_rate": 9.63452380952381e-06, "loss": 39.5839, "step": 7874 }, { "epoch": 187.50149253731342, "grad_norm": 1.3358383178710938, "learning_rate": 9.633333333333335e-06, "loss": 40.0806, "step": 7875 }, { "epoch": 187.52537313432836, "grad_norm": 1.3321752548217773, "learning_rate": 9.632142857142858e-06, "loss": 38.8618, "step": 7876 }, { "epoch": 187.54925373134327, "grad_norm": 1.3412487506866455, "learning_rate": 9.630952380952382e-06, "loss": 39.9958, "step": 7877 }, { "epoch": 187.5731343283582, "grad_norm": 1.3582993745803833, "learning_rate": 9.629761904761906e-06, "loss": 39.8273, "step": 7878 }, { "epoch": 187.59701492537314, "grad_norm": 1.3583983182907104, "learning_rate": 9.62857142857143e-06, "loss": 40.0015, "step": 7879 }, { "epoch": 187.62089552238805, "grad_norm": 1.355983018875122, "learning_rate": 9.627380952380953e-06, "loss": 39.6311, "step": 7880 }, { "epoch": 187.644776119403, "grad_norm": 1.3607032299041748, "learning_rate": 9.626190476190477e-06, "loss": 40.6084, "step": 7881 }, { "epoch": 187.6686567164179, "grad_norm": 1.3526500463485718, "learning_rate": 9.625e-06, "loss": 40.325, "step": 7882 }, { "epoch": 187.69253731343284, "grad_norm": 1.3157875537872314, "learning_rate": 9.623809523809524e-06, "loss": 40.6745, "step": 7883 }, { "epoch": 187.71641791044777, "grad_norm": 1.3640722036361694, "learning_rate": 9.622619047619049e-06, "loss": 39.1251, "step": 7884 }, { "epoch": 187.74029850746268, "grad_norm": 1.355539083480835, "learning_rate": 9.621428571428573e-06, "loss": 39.6818, "step": 7885 }, { "epoch": 187.76417910447762, "grad_norm": 1.3666335344314575, "learning_rate": 9.620238095238096e-06, "loss": 39.7654, "step": 7886 }, { "epoch": 187.78805970149253, "grad_norm": 1.344766616821289, "learning_rate": 9.61904761904762e-06, "loss": 40.2216, "step": 7887 }, { "epoch": 187.81194029850747, "grad_norm": 1.3368803262710571, "learning_rate": 9.617857142857144e-06, "loss": 41.2177, "step": 7888 }, { "epoch": 187.83582089552237, "grad_norm": 1.3445661067962646, "learning_rate": 9.616666666666667e-06, "loss": 39.1916, "step": 7889 }, { "epoch": 187.8597014925373, "grad_norm": 1.4862818717956543, "learning_rate": 9.615476190476193e-06, "loss": 40.0184, "step": 7890 }, { "epoch": 187.88358208955225, "grad_norm": 1.3438193798065186, "learning_rate": 9.614285714285714e-06, "loss": 39.6909, "step": 7891 }, { "epoch": 187.90746268656716, "grad_norm": 1.3423206806182861, "learning_rate": 9.61309523809524e-06, "loss": 39.938, "step": 7892 }, { "epoch": 187.9313432835821, "grad_norm": 1.8416774272918701, "learning_rate": 9.611904761904762e-06, "loss": 38.828, "step": 7893 }, { "epoch": 187.955223880597, "grad_norm": 1.3728396892547607, "learning_rate": 9.610714285714287e-06, "loss": 39.939, "step": 7894 }, { "epoch": 187.97910447761194, "grad_norm": 1.3345282077789307, "learning_rate": 9.60952380952381e-06, "loss": 39.8171, "step": 7895 }, { "epoch": 188.0, "grad_norm": 1.313852071762085, "learning_rate": 9.608333333333334e-06, "loss": 35.3763, "step": 7896 }, { "epoch": 188.02388059701494, "grad_norm": 1.3263553380966187, "learning_rate": 9.607142857142858e-06, "loss": 39.5158, "step": 7897 }, { "epoch": 188.04776119402985, "grad_norm": 1.3483853340148926, "learning_rate": 9.605952380952381e-06, "loss": 38.9402, "step": 7898 }, { "epoch": 188.07164179104478, "grad_norm": 1.3501253128051758, "learning_rate": 9.604761904761905e-06, "loss": 40.0367, "step": 7899 }, { "epoch": 188.0955223880597, "grad_norm": 1.3761786222457886, "learning_rate": 9.60357142857143e-06, "loss": 39.1418, "step": 7900 }, { "epoch": 188.11940298507463, "grad_norm": 1.3482613563537598, "learning_rate": 9.602380952380952e-06, "loss": 38.7221, "step": 7901 }, { "epoch": 188.14328358208957, "grad_norm": 1.3728978633880615, "learning_rate": 9.601190476190478e-06, "loss": 39.572, "step": 7902 }, { "epoch": 188.16716417910447, "grad_norm": 1.3547227382659912, "learning_rate": 9.600000000000001e-06, "loss": 40.605, "step": 7903 }, { "epoch": 188.1910447761194, "grad_norm": 1.3476436138153076, "learning_rate": 9.598809523809525e-06, "loss": 39.845, "step": 7904 }, { "epoch": 188.21492537313432, "grad_norm": 1.3198156356811523, "learning_rate": 9.597619047619048e-06, "loss": 38.5138, "step": 7905 }, { "epoch": 188.23880597014926, "grad_norm": 1.4031002521514893, "learning_rate": 9.596428571428572e-06, "loss": 38.7777, "step": 7906 }, { "epoch": 188.26268656716417, "grad_norm": 1.3373370170593262, "learning_rate": 9.595238095238096e-06, "loss": 41.3109, "step": 7907 }, { "epoch": 188.2865671641791, "grad_norm": 1.3407784700393677, "learning_rate": 9.59404761904762e-06, "loss": 39.113, "step": 7908 }, { "epoch": 188.31044776119404, "grad_norm": 1.2993460893630981, "learning_rate": 9.592857142857143e-06, "loss": 39.5795, "step": 7909 }, { "epoch": 188.33432835820895, "grad_norm": 1.3487869501113892, "learning_rate": 9.591666666666667e-06, "loss": 39.13, "step": 7910 }, { "epoch": 188.3582089552239, "grad_norm": 1.3610813617706299, "learning_rate": 9.590476190476192e-06, "loss": 40.4148, "step": 7911 }, { "epoch": 188.3820895522388, "grad_norm": 1.2994141578674316, "learning_rate": 9.589285714285716e-06, "loss": 38.7277, "step": 7912 }, { "epoch": 188.40597014925373, "grad_norm": 1.3491326570510864, "learning_rate": 9.588095238095239e-06, "loss": 40.0697, "step": 7913 }, { "epoch": 188.42985074626867, "grad_norm": 1.371491551399231, "learning_rate": 9.586904761904763e-06, "loss": 40.0035, "step": 7914 }, { "epoch": 188.45373134328358, "grad_norm": 1.323358178138733, "learning_rate": 9.585714285714286e-06, "loss": 40.046, "step": 7915 }, { "epoch": 188.47761194029852, "grad_norm": 1.3738892078399658, "learning_rate": 9.58452380952381e-06, "loss": 40.3131, "step": 7916 }, { "epoch": 188.50149253731342, "grad_norm": 1.3278961181640625, "learning_rate": 9.583333333333335e-06, "loss": 39.1732, "step": 7917 }, { "epoch": 188.52537313432836, "grad_norm": 1.4351946115493774, "learning_rate": 9.582142857142857e-06, "loss": 39.7018, "step": 7918 }, { "epoch": 188.54925373134327, "grad_norm": 1.395856261253357, "learning_rate": 9.580952380952383e-06, "loss": 38.2338, "step": 7919 }, { "epoch": 188.5731343283582, "grad_norm": 1.319776177406311, "learning_rate": 9.579761904761904e-06, "loss": 40.2578, "step": 7920 }, { "epoch": 188.59701492537314, "grad_norm": 1.3342655897140503, "learning_rate": 9.57857142857143e-06, "loss": 39.1729, "step": 7921 }, { "epoch": 188.62089552238805, "grad_norm": 1.3415793180465698, "learning_rate": 9.577380952380953e-06, "loss": 39.8557, "step": 7922 }, { "epoch": 188.644776119403, "grad_norm": 1.3423092365264893, "learning_rate": 9.576190476190477e-06, "loss": 39.6797, "step": 7923 }, { "epoch": 188.6686567164179, "grad_norm": 1.356002688407898, "learning_rate": 9.575e-06, "loss": 40.7329, "step": 7924 }, { "epoch": 188.69253731343284, "grad_norm": 1.3410885334014893, "learning_rate": 9.573809523809524e-06, "loss": 39.4478, "step": 7925 }, { "epoch": 188.71641791044777, "grad_norm": 1.3469687700271606, "learning_rate": 9.572619047619048e-06, "loss": 41.1341, "step": 7926 }, { "epoch": 188.74029850746268, "grad_norm": 1.3851624727249146, "learning_rate": 9.571428571428573e-06, "loss": 40.8802, "step": 7927 }, { "epoch": 188.76417910447762, "grad_norm": 1.344628095626831, "learning_rate": 9.570238095238095e-06, "loss": 39.3251, "step": 7928 }, { "epoch": 188.78805970149253, "grad_norm": 1.3693400621414185, "learning_rate": 9.56904761904762e-06, "loss": 40.7977, "step": 7929 }, { "epoch": 188.81194029850747, "grad_norm": 1.471144437789917, "learning_rate": 9.567857142857142e-06, "loss": 39.2367, "step": 7930 }, { "epoch": 188.83582089552237, "grad_norm": 1.363493800163269, "learning_rate": 9.566666666666668e-06, "loss": 38.9286, "step": 7931 }, { "epoch": 188.8597014925373, "grad_norm": 1.3429893255233765, "learning_rate": 9.565476190476191e-06, "loss": 41.3516, "step": 7932 }, { "epoch": 188.88358208955225, "grad_norm": 1.3769652843475342, "learning_rate": 9.564285714285715e-06, "loss": 40.5142, "step": 7933 }, { "epoch": 188.90746268656716, "grad_norm": 1.3317351341247559, "learning_rate": 9.563095238095239e-06, "loss": 39.6428, "step": 7934 }, { "epoch": 188.9313432835821, "grad_norm": 1.344335913658142, "learning_rate": 9.561904761904762e-06, "loss": 39.5352, "step": 7935 }, { "epoch": 188.955223880597, "grad_norm": 1.345728874206543, "learning_rate": 9.560714285714286e-06, "loss": 41.4293, "step": 7936 }, { "epoch": 188.97910447761194, "grad_norm": 1.3956395387649536, "learning_rate": 9.559523809523811e-06, "loss": 40.1532, "step": 7937 }, { "epoch": 189.0, "grad_norm": 1.3657138347625732, "learning_rate": 9.558333333333335e-06, "loss": 34.7132, "step": 7938 }, { "epoch": 189.02388059701494, "grad_norm": 1.3575364351272583, "learning_rate": 9.557142857142858e-06, "loss": 39.2288, "step": 7939 }, { "epoch": 189.04776119402985, "grad_norm": 1.3288277387619019, "learning_rate": 9.555952380952382e-06, "loss": 39.5385, "step": 7940 }, { "epoch": 189.07164179104478, "grad_norm": 1.325917363166809, "learning_rate": 9.554761904761906e-06, "loss": 41.0261, "step": 7941 }, { "epoch": 189.0955223880597, "grad_norm": 1.3414726257324219, "learning_rate": 9.55357142857143e-06, "loss": 39.4504, "step": 7942 }, { "epoch": 189.11940298507463, "grad_norm": 1.33698570728302, "learning_rate": 9.552380952380953e-06, "loss": 39.6444, "step": 7943 }, { "epoch": 189.14328358208957, "grad_norm": 1.3750137090682983, "learning_rate": 9.551190476190476e-06, "loss": 39.6394, "step": 7944 }, { "epoch": 189.16716417910447, "grad_norm": 1.4785301685333252, "learning_rate": 9.55e-06, "loss": 40.7058, "step": 7945 }, { "epoch": 189.1910447761194, "grad_norm": 1.3463925123214722, "learning_rate": 9.548809523809525e-06, "loss": 40.0316, "step": 7946 }, { "epoch": 189.21492537313432, "grad_norm": 1.3283019065856934, "learning_rate": 9.547619047619049e-06, "loss": 40.0483, "step": 7947 }, { "epoch": 189.23880597014926, "grad_norm": 1.3414371013641357, "learning_rate": 9.546428571428573e-06, "loss": 40.2106, "step": 7948 }, { "epoch": 189.26268656716417, "grad_norm": 1.3282434940338135, "learning_rate": 9.545238095238096e-06, "loss": 39.8131, "step": 7949 }, { "epoch": 189.2865671641791, "grad_norm": 1.383959174156189, "learning_rate": 9.54404761904762e-06, "loss": 39.0248, "step": 7950 }, { "epoch": 189.31044776119404, "grad_norm": 1.327483057975769, "learning_rate": 9.542857142857143e-06, "loss": 38.7495, "step": 7951 }, { "epoch": 189.33432835820895, "grad_norm": 1.3496366739273071, "learning_rate": 9.541666666666669e-06, "loss": 39.2862, "step": 7952 }, { "epoch": 189.3582089552239, "grad_norm": 1.3664982318878174, "learning_rate": 9.54047619047619e-06, "loss": 40.3035, "step": 7953 }, { "epoch": 189.3820895522388, "grad_norm": 1.3399566411972046, "learning_rate": 9.539285714285716e-06, "loss": 39.0506, "step": 7954 }, { "epoch": 189.40597014925373, "grad_norm": 1.3157318830490112, "learning_rate": 9.538095238095238e-06, "loss": 41.8778, "step": 7955 }, { "epoch": 189.42985074626867, "grad_norm": 1.3023029565811157, "learning_rate": 9.536904761904763e-06, "loss": 39.848, "step": 7956 }, { "epoch": 189.45373134328358, "grad_norm": 1.3733853101730347, "learning_rate": 9.535714285714287e-06, "loss": 40.004, "step": 7957 }, { "epoch": 189.47761194029852, "grad_norm": 1.3721355199813843, "learning_rate": 9.53452380952381e-06, "loss": 40.072, "step": 7958 }, { "epoch": 189.50149253731342, "grad_norm": 1.3396120071411133, "learning_rate": 9.533333333333334e-06, "loss": 39.9072, "step": 7959 }, { "epoch": 189.52537313432836, "grad_norm": 1.3481887578964233, "learning_rate": 9.532142857142858e-06, "loss": 39.0959, "step": 7960 }, { "epoch": 189.54925373134327, "grad_norm": 1.3990442752838135, "learning_rate": 9.530952380952381e-06, "loss": 39.4471, "step": 7961 }, { "epoch": 189.5731343283582, "grad_norm": 1.334368109703064, "learning_rate": 9.529761904761905e-06, "loss": 39.6361, "step": 7962 }, { "epoch": 189.59701492537314, "grad_norm": 1.3110425472259521, "learning_rate": 9.528571428571429e-06, "loss": 39.7735, "step": 7963 }, { "epoch": 189.62089552238805, "grad_norm": 1.3738718032836914, "learning_rate": 9.527380952380954e-06, "loss": 40.5615, "step": 7964 }, { "epoch": 189.644776119403, "grad_norm": 1.3932819366455078, "learning_rate": 9.526190476190476e-06, "loss": 39.4725, "step": 7965 }, { "epoch": 189.6686567164179, "grad_norm": 1.4250450134277344, "learning_rate": 9.525000000000001e-06, "loss": 39.3881, "step": 7966 }, { "epoch": 189.69253731343284, "grad_norm": 1.3645894527435303, "learning_rate": 9.523809523809525e-06, "loss": 38.9167, "step": 7967 }, { "epoch": 189.71641791044777, "grad_norm": 1.357110619544983, "learning_rate": 9.522619047619048e-06, "loss": 39.4212, "step": 7968 }, { "epoch": 189.74029850746268, "grad_norm": 1.5733661651611328, "learning_rate": 9.521428571428572e-06, "loss": 41.1723, "step": 7969 }, { "epoch": 189.76417910447762, "grad_norm": 1.4033374786376953, "learning_rate": 9.520238095238096e-06, "loss": 38.8742, "step": 7970 }, { "epoch": 189.78805970149253, "grad_norm": 1.3562182188034058, "learning_rate": 9.51904761904762e-06, "loss": 40.4984, "step": 7971 }, { "epoch": 189.81194029850747, "grad_norm": 1.3570888042449951, "learning_rate": 9.517857142857143e-06, "loss": 39.0115, "step": 7972 }, { "epoch": 189.83582089552237, "grad_norm": 1.3477377891540527, "learning_rate": 9.516666666666668e-06, "loss": 39.1035, "step": 7973 }, { "epoch": 189.8597014925373, "grad_norm": 1.3158197402954102, "learning_rate": 9.515476190476192e-06, "loss": 39.1848, "step": 7974 }, { "epoch": 189.88358208955225, "grad_norm": 1.3726009130477905, "learning_rate": 9.514285714285715e-06, "loss": 39.3252, "step": 7975 }, { "epoch": 189.90746268656716, "grad_norm": 1.355769395828247, "learning_rate": 9.513095238095239e-06, "loss": 39.8979, "step": 7976 }, { "epoch": 189.9313432835821, "grad_norm": 1.3282418251037598, "learning_rate": 9.511904761904763e-06, "loss": 40.2946, "step": 7977 }, { "epoch": 189.955223880597, "grad_norm": 1.3433393239974976, "learning_rate": 9.510714285714286e-06, "loss": 39.9997, "step": 7978 }, { "epoch": 189.97910447761194, "grad_norm": 1.361036777496338, "learning_rate": 9.50952380952381e-06, "loss": 40.1119, "step": 7979 }, { "epoch": 190.0, "grad_norm": 1.3404619693756104, "learning_rate": 9.508333333333333e-06, "loss": 35.3702, "step": 7980 }, { "epoch": 190.02388059701494, "grad_norm": 1.2960070371627808, "learning_rate": 9.507142857142859e-06, "loss": 40.2256, "step": 7981 }, { "epoch": 190.04776119402985, "grad_norm": 1.331105351448059, "learning_rate": 9.50595238095238e-06, "loss": 40.7262, "step": 7982 }, { "epoch": 190.07164179104478, "grad_norm": 1.3313236236572266, "learning_rate": 9.504761904761906e-06, "loss": 39.4228, "step": 7983 }, { "epoch": 190.0955223880597, "grad_norm": 1.3793481588363647, "learning_rate": 9.50357142857143e-06, "loss": 39.9808, "step": 7984 }, { "epoch": 190.11940298507463, "grad_norm": 1.304244041442871, "learning_rate": 9.502380952380953e-06, "loss": 39.5987, "step": 7985 }, { "epoch": 190.14328358208957, "grad_norm": 1.4000884294509888, "learning_rate": 9.501190476190477e-06, "loss": 38.7156, "step": 7986 }, { "epoch": 190.16716417910447, "grad_norm": 1.320894479751587, "learning_rate": 9.5e-06, "loss": 39.6314, "step": 7987 }, { "epoch": 190.1910447761194, "grad_norm": 1.3235328197479248, "learning_rate": 9.498809523809524e-06, "loss": 40.9279, "step": 7988 }, { "epoch": 190.21492537313432, "grad_norm": 1.3750410079956055, "learning_rate": 9.49761904761905e-06, "loss": 38.5321, "step": 7989 }, { "epoch": 190.23880597014926, "grad_norm": 1.356196403503418, "learning_rate": 9.496428571428571e-06, "loss": 41.2405, "step": 7990 }, { "epoch": 190.26268656716417, "grad_norm": 1.32931387424469, "learning_rate": 9.495238095238097e-06, "loss": 39.1872, "step": 7991 }, { "epoch": 190.2865671641791, "grad_norm": 1.3788326978683472, "learning_rate": 9.494047619047619e-06, "loss": 40.2905, "step": 7992 }, { "epoch": 190.31044776119404, "grad_norm": 1.3658127784729004, "learning_rate": 9.492857142857144e-06, "loss": 39.5931, "step": 7993 }, { "epoch": 190.33432835820895, "grad_norm": 1.3256090879440308, "learning_rate": 9.491666666666668e-06, "loss": 39.6655, "step": 7994 }, { "epoch": 190.3582089552239, "grad_norm": 1.3138411045074463, "learning_rate": 9.490476190476191e-06, "loss": 40.7032, "step": 7995 }, { "epoch": 190.3820895522388, "grad_norm": 1.3769841194152832, "learning_rate": 9.489285714285715e-06, "loss": 39.6528, "step": 7996 }, { "epoch": 190.40597014925373, "grad_norm": 1.3702908754348755, "learning_rate": 9.488095238095238e-06, "loss": 39.8254, "step": 7997 }, { "epoch": 190.42985074626867, "grad_norm": 1.3650037050247192, "learning_rate": 9.486904761904762e-06, "loss": 40.5366, "step": 7998 }, { "epoch": 190.45373134328358, "grad_norm": 1.3124290704727173, "learning_rate": 9.485714285714287e-06, "loss": 40.4331, "step": 7999 }, { "epoch": 190.47761194029852, "grad_norm": 1.385108232498169, "learning_rate": 9.484523809523811e-06, "loss": 40.3711, "step": 8000 }, { "epoch": 190.50149253731342, "grad_norm": 1.3207087516784668, "learning_rate": 9.483333333333335e-06, "loss": 40.5207, "step": 8001 }, { "epoch": 190.52537313432836, "grad_norm": 1.3777023553848267, "learning_rate": 9.482142857142858e-06, "loss": 37.995, "step": 8002 }, { "epoch": 190.54925373134327, "grad_norm": 1.3449172973632812, "learning_rate": 9.480952380952382e-06, "loss": 39.0541, "step": 8003 }, { "epoch": 190.5731343283582, "grad_norm": 1.331497073173523, "learning_rate": 9.479761904761905e-06, "loss": 38.6272, "step": 8004 }, { "epoch": 190.59701492537314, "grad_norm": 1.4274811744689941, "learning_rate": 9.478571428571429e-06, "loss": 38.7971, "step": 8005 }, { "epoch": 190.62089552238805, "grad_norm": 1.3587981462478638, "learning_rate": 9.477380952380953e-06, "loss": 39.8261, "step": 8006 }, { "epoch": 190.644776119403, "grad_norm": 1.3601469993591309, "learning_rate": 9.476190476190476e-06, "loss": 39.9195, "step": 8007 }, { "epoch": 190.6686567164179, "grad_norm": 1.3465371131896973, "learning_rate": 9.475000000000002e-06, "loss": 38.5457, "step": 8008 }, { "epoch": 190.69253731343284, "grad_norm": 1.3322174549102783, "learning_rate": 9.473809523809525e-06, "loss": 41.2676, "step": 8009 }, { "epoch": 190.71641791044777, "grad_norm": 1.3771815299987793, "learning_rate": 9.472619047619049e-06, "loss": 39.2349, "step": 8010 }, { "epoch": 190.74029850746268, "grad_norm": 1.3361351490020752, "learning_rate": 9.471428571428572e-06, "loss": 40.0625, "step": 8011 }, { "epoch": 190.76417910447762, "grad_norm": 1.353151798248291, "learning_rate": 9.470238095238096e-06, "loss": 40.9578, "step": 8012 }, { "epoch": 190.78805970149253, "grad_norm": 1.342685580253601, "learning_rate": 9.46904761904762e-06, "loss": 41.1633, "step": 8013 }, { "epoch": 190.81194029850747, "grad_norm": 1.3385871648788452, "learning_rate": 9.467857142857143e-06, "loss": 39.2725, "step": 8014 }, { "epoch": 190.83582089552237, "grad_norm": 1.3806015253067017, "learning_rate": 9.466666666666667e-06, "loss": 39.2081, "step": 8015 }, { "epoch": 190.8597014925373, "grad_norm": 1.3438597917556763, "learning_rate": 9.465476190476192e-06, "loss": 39.4107, "step": 8016 }, { "epoch": 190.88358208955225, "grad_norm": 1.401416540145874, "learning_rate": 9.464285714285714e-06, "loss": 38.9786, "step": 8017 }, { "epoch": 190.90746268656716, "grad_norm": 1.4244006872177124, "learning_rate": 9.46309523809524e-06, "loss": 40.5079, "step": 8018 }, { "epoch": 190.9313432835821, "grad_norm": 1.3439393043518066, "learning_rate": 9.461904761904761e-06, "loss": 38.9927, "step": 8019 }, { "epoch": 190.955223880597, "grad_norm": 1.3794993162155151, "learning_rate": 9.460714285714287e-06, "loss": 40.6916, "step": 8020 }, { "epoch": 190.97910447761194, "grad_norm": 1.3528650999069214, "learning_rate": 9.45952380952381e-06, "loss": 39.6387, "step": 8021 }, { "epoch": 191.0, "grad_norm": 1.3337764739990234, "learning_rate": 9.458333333333334e-06, "loss": 33.5169, "step": 8022 }, { "epoch": 191.02388059701494, "grad_norm": 1.3296302556991577, "learning_rate": 9.457142857142858e-06, "loss": 39.9291, "step": 8023 }, { "epoch": 191.04776119402985, "grad_norm": 1.3452187776565552, "learning_rate": 9.455952380952381e-06, "loss": 39.5024, "step": 8024 }, { "epoch": 191.07164179104478, "grad_norm": 1.3460960388183594, "learning_rate": 9.454761904761905e-06, "loss": 39.8409, "step": 8025 }, { "epoch": 191.0955223880597, "grad_norm": 1.3481632471084595, "learning_rate": 9.45357142857143e-06, "loss": 40.4573, "step": 8026 }, { "epoch": 191.11940298507463, "grad_norm": 1.3564794063568115, "learning_rate": 9.452380952380952e-06, "loss": 39.3951, "step": 8027 }, { "epoch": 191.14328358208957, "grad_norm": 1.3434679508209229, "learning_rate": 9.451190476190477e-06, "loss": 40.5594, "step": 8028 }, { "epoch": 191.16716417910447, "grad_norm": 1.3565998077392578, "learning_rate": 9.450000000000001e-06, "loss": 39.2461, "step": 8029 }, { "epoch": 191.1910447761194, "grad_norm": 1.3339287042617798, "learning_rate": 9.448809523809525e-06, "loss": 39.2729, "step": 8030 }, { "epoch": 191.21492537313432, "grad_norm": 1.3689452409744263, "learning_rate": 9.447619047619048e-06, "loss": 39.5538, "step": 8031 }, { "epoch": 191.23880597014926, "grad_norm": 1.3419914245605469, "learning_rate": 9.446428571428572e-06, "loss": 39.3452, "step": 8032 }, { "epoch": 191.26268656716417, "grad_norm": 1.3562124967575073, "learning_rate": 9.445238095238095e-06, "loss": 40.3475, "step": 8033 }, { "epoch": 191.2865671641791, "grad_norm": 1.367467999458313, "learning_rate": 9.444047619047619e-06, "loss": 39.5169, "step": 8034 }, { "epoch": 191.31044776119404, "grad_norm": 1.3371708393096924, "learning_rate": 9.442857142857144e-06, "loss": 39.6813, "step": 8035 }, { "epoch": 191.33432835820895, "grad_norm": 1.325134038925171, "learning_rate": 9.441666666666668e-06, "loss": 38.6339, "step": 8036 }, { "epoch": 191.3582089552239, "grad_norm": 1.362094759941101, "learning_rate": 9.440476190476192e-06, "loss": 37.3554, "step": 8037 }, { "epoch": 191.3820895522388, "grad_norm": 1.339093804359436, "learning_rate": 9.439285714285715e-06, "loss": 39.9558, "step": 8038 }, { "epoch": 191.40597014925373, "grad_norm": 1.362038254737854, "learning_rate": 9.438095238095239e-06, "loss": 39.339, "step": 8039 }, { "epoch": 191.42985074626867, "grad_norm": 1.308975100517273, "learning_rate": 9.436904761904763e-06, "loss": 39.0685, "step": 8040 }, { "epoch": 191.45373134328358, "grad_norm": 1.3050904273986816, "learning_rate": 9.435714285714286e-06, "loss": 41.2991, "step": 8041 }, { "epoch": 191.47761194029852, "grad_norm": 1.3439526557922363, "learning_rate": 9.43452380952381e-06, "loss": 39.6562, "step": 8042 }, { "epoch": 191.50149253731342, "grad_norm": 1.3768411874771118, "learning_rate": 9.433333333333335e-06, "loss": 39.8082, "step": 8043 }, { "epoch": 191.52537313432836, "grad_norm": 1.3955868482589722, "learning_rate": 9.432142857142857e-06, "loss": 40.4883, "step": 8044 }, { "epoch": 191.54925373134327, "grad_norm": 1.3432313203811646, "learning_rate": 9.430952380952382e-06, "loss": 39.6283, "step": 8045 }, { "epoch": 191.5731343283582, "grad_norm": 1.3450462818145752, "learning_rate": 9.429761904761906e-06, "loss": 39.9974, "step": 8046 }, { "epoch": 191.59701492537314, "grad_norm": 1.3745976686477661, "learning_rate": 9.42857142857143e-06, "loss": 39.8627, "step": 8047 }, { "epoch": 191.62089552238805, "grad_norm": 1.3047659397125244, "learning_rate": 9.427380952380953e-06, "loss": 40.7895, "step": 8048 }, { "epoch": 191.644776119403, "grad_norm": 1.3221369981765747, "learning_rate": 9.426190476190477e-06, "loss": 41.2135, "step": 8049 }, { "epoch": 191.6686567164179, "grad_norm": 1.3959829807281494, "learning_rate": 9.425e-06, "loss": 40.1096, "step": 8050 }, { "epoch": 191.69253731343284, "grad_norm": 1.3417184352874756, "learning_rate": 9.423809523809526e-06, "loss": 39.687, "step": 8051 }, { "epoch": 191.71641791044777, "grad_norm": 1.391257643699646, "learning_rate": 9.422619047619048e-06, "loss": 40.0735, "step": 8052 }, { "epoch": 191.74029850746268, "grad_norm": 1.5898792743682861, "learning_rate": 9.421428571428573e-06, "loss": 39.8425, "step": 8053 }, { "epoch": 191.76417910447762, "grad_norm": 1.3367646932601929, "learning_rate": 9.420238095238095e-06, "loss": 39.8922, "step": 8054 }, { "epoch": 191.78805970149253, "grad_norm": 1.3344682455062866, "learning_rate": 9.41904761904762e-06, "loss": 40.3156, "step": 8055 }, { "epoch": 191.81194029850747, "grad_norm": 1.3712769746780396, "learning_rate": 9.417857142857144e-06, "loss": 39.7956, "step": 8056 }, { "epoch": 191.83582089552237, "grad_norm": 1.310329794883728, "learning_rate": 9.416666666666667e-06, "loss": 37.9843, "step": 8057 }, { "epoch": 191.8597014925373, "grad_norm": 1.4192101955413818, "learning_rate": 9.415476190476191e-06, "loss": 39.6497, "step": 8058 }, { "epoch": 191.88358208955225, "grad_norm": 1.3332245349884033, "learning_rate": 9.414285714285715e-06, "loss": 40.4634, "step": 8059 }, { "epoch": 191.90746268656716, "grad_norm": 1.3799422979354858, "learning_rate": 9.413095238095238e-06, "loss": 39.6259, "step": 8060 }, { "epoch": 191.9313432835821, "grad_norm": 1.326279640197754, "learning_rate": 9.411904761904764e-06, "loss": 39.127, "step": 8061 }, { "epoch": 191.955223880597, "grad_norm": 1.364497184753418, "learning_rate": 9.410714285714286e-06, "loss": 39.7196, "step": 8062 }, { "epoch": 191.97910447761194, "grad_norm": 1.2999094724655151, "learning_rate": 9.40952380952381e-06, "loss": 39.7599, "step": 8063 }, { "epoch": 192.0, "grad_norm": 1.361376404762268, "learning_rate": 9.408333333333334e-06, "loss": 35.5815, "step": 8064 }, { "epoch": 192.02388059701494, "grad_norm": 1.3672561645507812, "learning_rate": 9.407142857142858e-06, "loss": 39.999, "step": 8065 }, { "epoch": 192.04776119402985, "grad_norm": 1.3349528312683105, "learning_rate": 9.405952380952382e-06, "loss": 39.3769, "step": 8066 }, { "epoch": 192.07164179104478, "grad_norm": 1.3391177654266357, "learning_rate": 9.404761904761905e-06, "loss": 40.4139, "step": 8067 }, { "epoch": 192.0955223880597, "grad_norm": 1.3258390426635742, "learning_rate": 9.403571428571429e-06, "loss": 38.8765, "step": 8068 }, { "epoch": 192.11940298507463, "grad_norm": 1.3227555751800537, "learning_rate": 9.402380952380953e-06, "loss": 40.2979, "step": 8069 }, { "epoch": 192.14328358208957, "grad_norm": 1.4296211004257202, "learning_rate": 9.401190476190478e-06, "loss": 40.4707, "step": 8070 }, { "epoch": 192.16716417910447, "grad_norm": 1.3604408502578735, "learning_rate": 9.4e-06, "loss": 39.0604, "step": 8071 }, { "epoch": 192.1910447761194, "grad_norm": 1.3694298267364502, "learning_rate": 9.398809523809525e-06, "loss": 40.8131, "step": 8072 }, { "epoch": 192.21492537313432, "grad_norm": 1.3569613695144653, "learning_rate": 9.397619047619049e-06, "loss": 40.2263, "step": 8073 }, { "epoch": 192.23880597014926, "grad_norm": 1.3534624576568604, "learning_rate": 9.396428571428572e-06, "loss": 40.4984, "step": 8074 }, { "epoch": 192.26268656716417, "grad_norm": NaN, "learning_rate": 9.395238095238096e-06, "loss": 65.3698, "step": 8075 }, { "epoch": 192.2865671641791, "grad_norm": 1.453345537185669, "learning_rate": 9.395238095238096e-06, "loss": 39.063, "step": 8076 }, { "epoch": 192.31044776119404, "grad_norm": 1.3553715944290161, "learning_rate": 9.39404761904762e-06, "loss": 40.0514, "step": 8077 }, { "epoch": 192.33432835820895, "grad_norm": 1.342347264289856, "learning_rate": 9.392857142857143e-06, "loss": 40.2913, "step": 8078 }, { "epoch": 192.3582089552239, "grad_norm": 1.2990400791168213, "learning_rate": 9.391666666666669e-06, "loss": 38.8053, "step": 8079 }, { "epoch": 192.3820895522388, "grad_norm": 1.3598767518997192, "learning_rate": 9.39047619047619e-06, "loss": 40.0949, "step": 8080 }, { "epoch": 192.40597014925373, "grad_norm": 1.3865834474563599, "learning_rate": 9.389285714285716e-06, "loss": 39.6753, "step": 8081 }, { "epoch": 192.42985074626867, "grad_norm": 1.3983739614486694, "learning_rate": 9.388095238095238e-06, "loss": 40.4096, "step": 8082 }, { "epoch": 192.45373134328358, "grad_norm": 1.3374706506729126, "learning_rate": 9.386904761904763e-06, "loss": 39.1414, "step": 8083 }, { "epoch": 192.47761194029852, "grad_norm": 1.375590205192566, "learning_rate": 9.385714285714287e-06, "loss": 39.2927, "step": 8084 }, { "epoch": 192.50149253731342, "grad_norm": 1.325024127960205, "learning_rate": 9.38452380952381e-06, "loss": 40.0753, "step": 8085 }, { "epoch": 192.52537313432836, "grad_norm": 1.338007926940918, "learning_rate": 9.383333333333334e-06, "loss": 38.8173, "step": 8086 }, { "epoch": 192.54925373134327, "grad_norm": 1.3460127115249634, "learning_rate": 9.382142857142857e-06, "loss": 39.6836, "step": 8087 }, { "epoch": 192.5731343283582, "grad_norm": 1.3944281339645386, "learning_rate": 9.380952380952381e-06, "loss": 39.6512, "step": 8088 }, { "epoch": 192.59701492537314, "grad_norm": 1.3501447439193726, "learning_rate": 9.379761904761906e-06, "loss": 39.0982, "step": 8089 }, { "epoch": 192.62089552238805, "grad_norm": 1.40170419216156, "learning_rate": 9.378571428571428e-06, "loss": 38.7099, "step": 8090 }, { "epoch": 192.644776119403, "grad_norm": 1.359175682067871, "learning_rate": 9.377380952380954e-06, "loss": 40.2111, "step": 8091 }, { "epoch": 192.6686567164179, "grad_norm": 1.37232506275177, "learning_rate": 9.376190476190477e-06, "loss": 38.6715, "step": 8092 }, { "epoch": 192.69253731343284, "grad_norm": 1.3884413242340088, "learning_rate": 9.375000000000001e-06, "loss": 40.2473, "step": 8093 }, { "epoch": 192.71641791044777, "grad_norm": 1.3383928537368774, "learning_rate": 9.373809523809524e-06, "loss": 40.1194, "step": 8094 }, { "epoch": 192.74029850746268, "grad_norm": 1.3051388263702393, "learning_rate": 9.372619047619048e-06, "loss": 40.0183, "step": 8095 }, { "epoch": 192.76417910447762, "grad_norm": 1.3376734256744385, "learning_rate": 9.371428571428572e-06, "loss": 40.0003, "step": 8096 }, { "epoch": 192.78805970149253, "grad_norm": 1.3837220668792725, "learning_rate": 9.370238095238095e-06, "loss": 39.0458, "step": 8097 }, { "epoch": 192.81194029850747, "grad_norm": 1.3860543966293335, "learning_rate": 9.36904761904762e-06, "loss": 39.5186, "step": 8098 }, { "epoch": 192.83582089552237, "grad_norm": 1.343321442604065, "learning_rate": 9.367857142857144e-06, "loss": 39.2526, "step": 8099 }, { "epoch": 192.8597014925373, "grad_norm": 1.3903098106384277, "learning_rate": 9.366666666666668e-06, "loss": 39.9155, "step": 8100 }, { "epoch": 192.88358208955225, "grad_norm": 1.331059217453003, "learning_rate": 9.365476190476192e-06, "loss": 39.5596, "step": 8101 }, { "epoch": 192.90746268656716, "grad_norm": 1.3600807189941406, "learning_rate": 9.364285714285715e-06, "loss": 40.0414, "step": 8102 }, { "epoch": 192.9313432835821, "grad_norm": 1.336927056312561, "learning_rate": 9.363095238095239e-06, "loss": 39.8047, "step": 8103 }, { "epoch": 192.955223880597, "grad_norm": 1.3732855319976807, "learning_rate": 9.361904761904762e-06, "loss": 40.4838, "step": 8104 }, { "epoch": 192.97910447761194, "grad_norm": 1.3323813676834106, "learning_rate": 9.360714285714286e-06, "loss": 39.87, "step": 8105 }, { "epoch": 193.0, "grad_norm": 1.3134664297103882, "learning_rate": 9.359523809523811e-06, "loss": 35.6234, "step": 8106 }, { "epoch": 193.02388059701494, "grad_norm": 1.3526980876922607, "learning_rate": 9.358333333333333e-06, "loss": 39.453, "step": 8107 }, { "epoch": 193.04776119402985, "grad_norm": 1.3201533555984497, "learning_rate": 9.357142857142859e-06, "loss": 40.9205, "step": 8108 }, { "epoch": 193.07164179104478, "grad_norm": 1.3156428337097168, "learning_rate": 9.355952380952382e-06, "loss": 39.9638, "step": 8109 }, { "epoch": 193.0955223880597, "grad_norm": 1.4106032848358154, "learning_rate": 9.354761904761906e-06, "loss": 39.3973, "step": 8110 }, { "epoch": 193.11940298507463, "grad_norm": 1.3563669919967651, "learning_rate": 9.35357142857143e-06, "loss": 40.1596, "step": 8111 }, { "epoch": 193.14328358208957, "grad_norm": 1.379704236984253, "learning_rate": 9.352380952380953e-06, "loss": 39.6401, "step": 8112 }, { "epoch": 193.16716417910447, "grad_norm": 1.4319480657577515, "learning_rate": 9.351190476190477e-06, "loss": 39.5203, "step": 8113 }, { "epoch": 193.1910447761194, "grad_norm": 1.3336199522018433, "learning_rate": 9.350000000000002e-06, "loss": 39.9124, "step": 8114 }, { "epoch": 193.21492537313432, "grad_norm": 1.3507353067398071, "learning_rate": 9.348809523809524e-06, "loss": 40.8551, "step": 8115 }, { "epoch": 193.23880597014926, "grad_norm": 1.3296079635620117, "learning_rate": 9.34761904761905e-06, "loss": 39.1483, "step": 8116 }, { "epoch": 193.26268656716417, "grad_norm": 1.3255174160003662, "learning_rate": 9.346428571428571e-06, "loss": 36.8879, "step": 8117 }, { "epoch": 193.2865671641791, "grad_norm": 1.7781307697296143, "learning_rate": 9.345238095238096e-06, "loss": 41.0785, "step": 8118 }, { "epoch": 193.31044776119404, "grad_norm": 1.374171495437622, "learning_rate": 9.34404761904762e-06, "loss": 39.0034, "step": 8119 }, { "epoch": 193.33432835820895, "grad_norm": 1.300121784210205, "learning_rate": 9.342857142857144e-06, "loss": 39.4635, "step": 8120 }, { "epoch": 193.3582089552239, "grad_norm": 1.3290036916732788, "learning_rate": 9.341666666666667e-06, "loss": 40.5266, "step": 8121 }, { "epoch": 193.3820895522388, "grad_norm": 1.3366600275039673, "learning_rate": 9.340476190476191e-06, "loss": 39.9913, "step": 8122 }, { "epoch": 193.40597014925373, "grad_norm": 1.3336387872695923, "learning_rate": 9.339285714285715e-06, "loss": 39.787, "step": 8123 }, { "epoch": 193.42985074626867, "grad_norm": 1.363978385925293, "learning_rate": 9.338095238095238e-06, "loss": 39.1851, "step": 8124 }, { "epoch": 193.45373134328358, "grad_norm": 1.3782750368118286, "learning_rate": 9.336904761904762e-06, "loss": 40.015, "step": 8125 }, { "epoch": 193.47761194029852, "grad_norm": 1.3785141706466675, "learning_rate": 9.335714285714287e-06, "loss": 39.7636, "step": 8126 }, { "epoch": 193.50149253731342, "grad_norm": 1.3970105648040771, "learning_rate": 9.33452380952381e-06, "loss": 39.6458, "step": 8127 }, { "epoch": 193.52537313432836, "grad_norm": 1.3425569534301758, "learning_rate": 9.333333333333334e-06, "loss": 39.5634, "step": 8128 }, { "epoch": 193.54925373134327, "grad_norm": 1.3226897716522217, "learning_rate": 9.332142857142858e-06, "loss": 40.3706, "step": 8129 }, { "epoch": 193.5731343283582, "grad_norm": 1.345410704612732, "learning_rate": 9.330952380952382e-06, "loss": 40.9153, "step": 8130 }, { "epoch": 193.59701492537314, "grad_norm": 1.3438916206359863, "learning_rate": 9.329761904761905e-06, "loss": 39.0958, "step": 8131 }, { "epoch": 193.62089552238805, "grad_norm": 1.3111969232559204, "learning_rate": 9.328571428571429e-06, "loss": 38.5729, "step": 8132 }, { "epoch": 193.644776119403, "grad_norm": 1.33928382396698, "learning_rate": 9.327380952380954e-06, "loss": 40.2366, "step": 8133 }, { "epoch": 193.6686567164179, "grad_norm": 1.3600127696990967, "learning_rate": 9.326190476190476e-06, "loss": 39.174, "step": 8134 }, { "epoch": 193.69253731343284, "grad_norm": 1.3434395790100098, "learning_rate": 9.325000000000001e-06, "loss": 38.5761, "step": 8135 }, { "epoch": 193.71641791044777, "grad_norm": 1.3799177408218384, "learning_rate": 9.323809523809525e-06, "loss": 39.8227, "step": 8136 }, { "epoch": 193.74029850746268, "grad_norm": 1.3420696258544922, "learning_rate": 9.322619047619049e-06, "loss": 40.3774, "step": 8137 }, { "epoch": 193.76417910447762, "grad_norm": 1.3548200130462646, "learning_rate": 9.321428571428572e-06, "loss": 40.512, "step": 8138 }, { "epoch": 193.78805970149253, "grad_norm": 1.364455223083496, "learning_rate": 9.320238095238096e-06, "loss": 39.0398, "step": 8139 }, { "epoch": 193.81194029850747, "grad_norm": 1.3866287469863892, "learning_rate": 9.31904761904762e-06, "loss": 39.7179, "step": 8140 }, { "epoch": 193.83582089552237, "grad_norm": 1.5081520080566406, "learning_rate": 9.317857142857145e-06, "loss": 38.7731, "step": 8141 }, { "epoch": 193.8597014925373, "grad_norm": 1.3429768085479736, "learning_rate": 9.316666666666667e-06, "loss": 40.0998, "step": 8142 }, { "epoch": 193.88358208955225, "grad_norm": 1.3729416131973267, "learning_rate": 9.315476190476192e-06, "loss": 40.5674, "step": 8143 }, { "epoch": 193.90746268656716, "grad_norm": 1.3227925300598145, "learning_rate": 9.314285714285714e-06, "loss": 40.4387, "step": 8144 }, { "epoch": 193.9313432835821, "grad_norm": 1.3253285884857178, "learning_rate": 9.31309523809524e-06, "loss": 40.1297, "step": 8145 }, { "epoch": 193.955223880597, "grad_norm": 1.365182876586914, "learning_rate": 9.311904761904763e-06, "loss": 40.0677, "step": 8146 }, { "epoch": 193.97910447761194, "grad_norm": 1.3866856098175049, "learning_rate": 9.310714285714286e-06, "loss": 39.1155, "step": 8147 }, { "epoch": 194.0, "grad_norm": 1.3522495031356812, "learning_rate": 9.30952380952381e-06, "loss": 35.7775, "step": 8148 }, { "epoch": 194.02388059701494, "grad_norm": 1.3418184518814087, "learning_rate": 9.308333333333334e-06, "loss": 38.4443, "step": 8149 }, { "epoch": 194.04776119402985, "grad_norm": 1.3108323812484741, "learning_rate": 9.307142857142857e-06, "loss": 40.2513, "step": 8150 }, { "epoch": 194.07164179104478, "grad_norm": 1.3105860948562622, "learning_rate": 9.305952380952383e-06, "loss": 40.5818, "step": 8151 }, { "epoch": 194.0955223880597, "grad_norm": 1.3254317045211792, "learning_rate": 9.304761904761905e-06, "loss": 38.3013, "step": 8152 }, { "epoch": 194.11940298507463, "grad_norm": 1.3543351888656616, "learning_rate": 9.30357142857143e-06, "loss": 40.8127, "step": 8153 }, { "epoch": 194.14328358208957, "grad_norm": 1.3476797342300415, "learning_rate": 9.302380952380954e-06, "loss": 40.57, "step": 8154 }, { "epoch": 194.16716417910447, "grad_norm": 1.3536100387573242, "learning_rate": 9.301190476190477e-06, "loss": 40.0478, "step": 8155 }, { "epoch": 194.1910447761194, "grad_norm": 1.3780878782272339, "learning_rate": 9.3e-06, "loss": 39.122, "step": 8156 }, { "epoch": 194.21492537313432, "grad_norm": 1.3358213901519775, "learning_rate": 9.298809523809524e-06, "loss": 41.3692, "step": 8157 }, { "epoch": 194.23880597014926, "grad_norm": 1.4232157468795776, "learning_rate": 9.297619047619048e-06, "loss": 39.7234, "step": 8158 }, { "epoch": 194.26268656716417, "grad_norm": 1.3235212564468384, "learning_rate": 9.296428571428572e-06, "loss": 39.5406, "step": 8159 }, { "epoch": 194.2865671641791, "grad_norm": 1.355603575706482, "learning_rate": 9.295238095238095e-06, "loss": 38.9169, "step": 8160 }, { "epoch": 194.31044776119404, "grad_norm": 1.4014873504638672, "learning_rate": 9.29404761904762e-06, "loss": 39.1834, "step": 8161 }, { "epoch": 194.33432835820895, "grad_norm": 1.3355145454406738, "learning_rate": 9.292857142857144e-06, "loss": 39.5318, "step": 8162 }, { "epoch": 194.3582089552239, "grad_norm": 1.4020789861679077, "learning_rate": 9.291666666666668e-06, "loss": 40.0597, "step": 8163 }, { "epoch": 194.3820895522388, "grad_norm": 1.3575400114059448, "learning_rate": 9.290476190476191e-06, "loss": 38.4982, "step": 8164 }, { "epoch": 194.40597014925373, "grad_norm": 1.354243516921997, "learning_rate": 9.289285714285715e-06, "loss": 39.9332, "step": 8165 }, { "epoch": 194.42985074626867, "grad_norm": 1.3607856035232544, "learning_rate": 9.288095238095239e-06, "loss": 40.1748, "step": 8166 }, { "epoch": 194.45373134328358, "grad_norm": 1.3424760103225708, "learning_rate": 9.286904761904762e-06, "loss": 40.8282, "step": 8167 }, { "epoch": 194.47761194029852, "grad_norm": 1.3654613494873047, "learning_rate": 9.285714285714288e-06, "loss": 39.5047, "step": 8168 }, { "epoch": 194.50149253731342, "grad_norm": 1.3189622163772583, "learning_rate": 9.28452380952381e-06, "loss": 40.113, "step": 8169 }, { "epoch": 194.52537313432836, "grad_norm": 1.368929147720337, "learning_rate": 9.283333333333335e-06, "loss": 39.7598, "step": 8170 }, { "epoch": 194.54925373134327, "grad_norm": 1.3727912902832031, "learning_rate": 9.282142857142858e-06, "loss": 39.9266, "step": 8171 }, { "epoch": 194.5731343283582, "grad_norm": 1.3644814491271973, "learning_rate": 9.280952380952382e-06, "loss": 40.4944, "step": 8172 }, { "epoch": 194.59701492537314, "grad_norm": 1.3572221994400024, "learning_rate": 9.279761904761906e-06, "loss": 40.5248, "step": 8173 }, { "epoch": 194.62089552238805, "grad_norm": 1.390061855316162, "learning_rate": 9.27857142857143e-06, "loss": 39.8376, "step": 8174 }, { "epoch": 194.644776119403, "grad_norm": NaN, "learning_rate": 9.277380952380953e-06, "loss": 59.6181, "step": 8175 }, { "epoch": 194.6686567164179, "grad_norm": 1.3391075134277344, "learning_rate": 9.277380952380953e-06, "loss": 40.1326, "step": 8176 }, { "epoch": 194.69253731343284, "grad_norm": 1.3510384559631348, "learning_rate": 9.276190476190477e-06, "loss": 40.2491, "step": 8177 }, { "epoch": 194.71641791044777, "grad_norm": 1.3332641124725342, "learning_rate": 9.275e-06, "loss": 39.5267, "step": 8178 }, { "epoch": 194.74029850746268, "grad_norm": 1.3420401811599731, "learning_rate": 9.273809523809525e-06, "loss": 38.8612, "step": 8179 }, { "epoch": 194.76417910447762, "grad_norm": 1.3343944549560547, "learning_rate": 9.272619047619047e-06, "loss": 39.1501, "step": 8180 }, { "epoch": 194.78805970149253, "grad_norm": 1.3221231698989868, "learning_rate": 9.271428571428573e-06, "loss": 40.7765, "step": 8181 }, { "epoch": 194.81194029850747, "grad_norm": 1.3822109699249268, "learning_rate": 9.270238095238096e-06, "loss": 38.8485, "step": 8182 }, { "epoch": 194.83582089552237, "grad_norm": 1.3701359033584595, "learning_rate": 9.26904761904762e-06, "loss": 40.6096, "step": 8183 }, { "epoch": 194.8597014925373, "grad_norm": 1.3813859224319458, "learning_rate": 9.267857142857144e-06, "loss": 38.79, "step": 8184 }, { "epoch": 194.88358208955225, "grad_norm": 1.362526535987854, "learning_rate": 9.266666666666667e-06, "loss": 39.7218, "step": 8185 }, { "epoch": 194.90746268656716, "grad_norm": 1.3570263385772705, "learning_rate": 9.26547619047619e-06, "loss": 39.4134, "step": 8186 }, { "epoch": 194.9313432835821, "grad_norm": 1.347283124923706, "learning_rate": 9.264285714285714e-06, "loss": 40.0555, "step": 8187 }, { "epoch": 194.955223880597, "grad_norm": 1.3138185739517212, "learning_rate": 9.263095238095238e-06, "loss": 40.0463, "step": 8188 }, { "epoch": 194.97910447761194, "grad_norm": 1.3552470207214355, "learning_rate": 9.261904761904763e-06, "loss": 38.7693, "step": 8189 }, { "epoch": 195.0, "grad_norm": 1.326535940170288, "learning_rate": 9.260714285714287e-06, "loss": 34.4675, "step": 8190 }, { "epoch": 195.02388059701494, "grad_norm": 1.3612662553787231, "learning_rate": 9.25952380952381e-06, "loss": 40.3441, "step": 8191 }, { "epoch": 195.04776119402985, "grad_norm": 1.3380229473114014, "learning_rate": 9.258333333333334e-06, "loss": 40.3796, "step": 8192 }, { "epoch": 195.07164179104478, "grad_norm": 1.389438271522522, "learning_rate": 9.257142857142858e-06, "loss": 39.2167, "step": 8193 }, { "epoch": 195.0955223880597, "grad_norm": 1.3744789361953735, "learning_rate": 9.255952380952381e-06, "loss": 39.5257, "step": 8194 }, { "epoch": 195.11940298507463, "grad_norm": 1.339280128479004, "learning_rate": 9.254761904761905e-06, "loss": 39.6349, "step": 8195 }, { "epoch": 195.14328358208957, "grad_norm": 1.3727983236312866, "learning_rate": 9.25357142857143e-06, "loss": 39.5139, "step": 8196 }, { "epoch": 195.16716417910447, "grad_norm": 1.3416467905044556, "learning_rate": 9.252380952380952e-06, "loss": 40.4896, "step": 8197 }, { "epoch": 195.1910447761194, "grad_norm": 1.3142980337142944, "learning_rate": 9.251190476190478e-06, "loss": 39.7604, "step": 8198 }, { "epoch": 195.21492537313432, "grad_norm": 1.4115710258483887, "learning_rate": 9.250000000000001e-06, "loss": 39.2725, "step": 8199 }, { "epoch": 195.23880597014926, "grad_norm": 1.3778796195983887, "learning_rate": 9.248809523809525e-06, "loss": 39.8278, "step": 8200 }, { "epoch": 195.26268656716417, "grad_norm": 1.3621126413345337, "learning_rate": 9.247619047619048e-06, "loss": 40.0275, "step": 8201 }, { "epoch": 195.2865671641791, "grad_norm": 1.3476710319519043, "learning_rate": 9.246428571428572e-06, "loss": 39.8234, "step": 8202 }, { "epoch": 195.31044776119404, "grad_norm": 1.5457059144973755, "learning_rate": 9.245238095238096e-06, "loss": 40.0556, "step": 8203 }, { "epoch": 195.33432835820895, "grad_norm": 1.3735928535461426, "learning_rate": 9.244047619047621e-06, "loss": 39.904, "step": 8204 }, { "epoch": 195.3582089552239, "grad_norm": 1.413859248161316, "learning_rate": 9.242857142857143e-06, "loss": 40.9064, "step": 8205 }, { "epoch": 195.3820895522388, "grad_norm": 1.3153880834579468, "learning_rate": 9.241666666666668e-06, "loss": 38.6149, "step": 8206 }, { "epoch": 195.40597014925373, "grad_norm": 1.3246228694915771, "learning_rate": 9.24047619047619e-06, "loss": 39.3915, "step": 8207 }, { "epoch": 195.42985074626867, "grad_norm": 1.4108952283859253, "learning_rate": 9.239285714285715e-06, "loss": 39.2928, "step": 8208 }, { "epoch": 195.45373134328358, "grad_norm": 1.376838207244873, "learning_rate": 9.238095238095239e-06, "loss": 39.0629, "step": 8209 }, { "epoch": 195.47761194029852, "grad_norm": 1.3088308572769165, "learning_rate": 9.236904761904763e-06, "loss": 40.2166, "step": 8210 }, { "epoch": 195.50149253731342, "grad_norm": 1.3641176223754883, "learning_rate": 9.235714285714286e-06, "loss": 39.2443, "step": 8211 }, { "epoch": 195.52537313432836, "grad_norm": 1.4035718441009521, "learning_rate": 9.23452380952381e-06, "loss": 40.3314, "step": 8212 }, { "epoch": 195.54925373134327, "grad_norm": 1.3205360174179077, "learning_rate": 9.233333333333334e-06, "loss": 39.1679, "step": 8213 }, { "epoch": 195.5731343283582, "grad_norm": 1.3699171543121338, "learning_rate": 9.232142857142859e-06, "loss": 39.75, "step": 8214 }, { "epoch": 195.59701492537314, "grad_norm": 1.342581033706665, "learning_rate": 9.23095238095238e-06, "loss": 39.0574, "step": 8215 }, { "epoch": 195.62089552238805, "grad_norm": 1.4127804040908813, "learning_rate": 9.229761904761906e-06, "loss": 39.4427, "step": 8216 }, { "epoch": 195.644776119403, "grad_norm": 1.3483479022979736, "learning_rate": 9.22857142857143e-06, "loss": 38.1351, "step": 8217 }, { "epoch": 195.6686567164179, "grad_norm": 1.3519258499145508, "learning_rate": 9.227380952380953e-06, "loss": 39.5632, "step": 8218 }, { "epoch": 195.69253731343284, "grad_norm": 1.3018907308578491, "learning_rate": 9.226190476190477e-06, "loss": 40.4779, "step": 8219 }, { "epoch": 195.71641791044777, "grad_norm": 1.3546749353408813, "learning_rate": 9.225e-06, "loss": 38.959, "step": 8220 }, { "epoch": 195.74029850746268, "grad_norm": 1.3474795818328857, "learning_rate": 9.223809523809524e-06, "loss": 39.362, "step": 8221 }, { "epoch": 195.76417910447762, "grad_norm": 1.3714686632156372, "learning_rate": 9.222619047619048e-06, "loss": 40.0137, "step": 8222 }, { "epoch": 195.78805970149253, "grad_norm": 1.3710826635360718, "learning_rate": 9.221428571428571e-06, "loss": 40.8866, "step": 8223 }, { "epoch": 195.81194029850747, "grad_norm": 1.3635708093643188, "learning_rate": 9.220238095238097e-06, "loss": 40.2457, "step": 8224 }, { "epoch": 195.83582089552237, "grad_norm": 1.3579343557357788, "learning_rate": 9.21904761904762e-06, "loss": 38.873, "step": 8225 }, { "epoch": 195.8597014925373, "grad_norm": 1.398798942565918, "learning_rate": 9.217857142857144e-06, "loss": 39.082, "step": 8226 }, { "epoch": 195.88358208955225, "grad_norm": 1.3268498182296753, "learning_rate": 9.216666666666668e-06, "loss": 41.091, "step": 8227 }, { "epoch": 195.90746268656716, "grad_norm": 1.3873074054718018, "learning_rate": 9.215476190476191e-06, "loss": 40.6406, "step": 8228 }, { "epoch": 195.9313432835821, "grad_norm": 1.3314863443374634, "learning_rate": 9.214285714285715e-06, "loss": 40.1914, "step": 8229 }, { "epoch": 195.955223880597, "grad_norm": 1.4354932308197021, "learning_rate": 9.213095238095239e-06, "loss": 40.3664, "step": 8230 }, { "epoch": 195.97910447761194, "grad_norm": 1.3203027248382568, "learning_rate": 9.211904761904764e-06, "loss": 39.538, "step": 8231 }, { "epoch": 196.0, "grad_norm": 1.2899844646453857, "learning_rate": 9.210714285714286e-06, "loss": 34.6047, "step": 8232 }, { "epoch": 196.02388059701494, "grad_norm": 1.3357369899749756, "learning_rate": 9.209523809523811e-06, "loss": 39.1338, "step": 8233 }, { "epoch": 196.04776119402985, "grad_norm": 1.359092354774475, "learning_rate": 9.208333333333333e-06, "loss": 38.0999, "step": 8234 }, { "epoch": 196.07164179104478, "grad_norm": 1.3306421041488647, "learning_rate": 9.207142857142858e-06, "loss": 39.7781, "step": 8235 }, { "epoch": 196.0955223880597, "grad_norm": 1.3126306533813477, "learning_rate": 9.205952380952382e-06, "loss": 39.3158, "step": 8236 }, { "epoch": 196.11940298507463, "grad_norm": 1.3966485261917114, "learning_rate": 9.204761904761906e-06, "loss": 39.3702, "step": 8237 }, { "epoch": 196.14328358208957, "grad_norm": 1.3523187637329102, "learning_rate": 9.203571428571429e-06, "loss": 39.6424, "step": 8238 }, { "epoch": 196.16716417910447, "grad_norm": 1.4099583625793457, "learning_rate": 9.202380952380953e-06, "loss": 39.8263, "step": 8239 }, { "epoch": 196.1910447761194, "grad_norm": 1.3975635766983032, "learning_rate": 9.201190476190476e-06, "loss": 39.3018, "step": 8240 }, { "epoch": 196.21492537313432, "grad_norm": 1.3303102254867554, "learning_rate": 9.200000000000002e-06, "loss": 40.7612, "step": 8241 }, { "epoch": 196.23880597014926, "grad_norm": 1.3592562675476074, "learning_rate": 9.198809523809524e-06, "loss": 41.427, "step": 8242 }, { "epoch": 196.26268656716417, "grad_norm": 1.3766870498657227, "learning_rate": 9.197619047619049e-06, "loss": 37.9711, "step": 8243 }, { "epoch": 196.2865671641791, "grad_norm": 1.3568012714385986, "learning_rate": 9.196428571428571e-06, "loss": 39.77, "step": 8244 }, { "epoch": 196.31044776119404, "grad_norm": 1.3565465211868286, "learning_rate": 9.195238095238096e-06, "loss": 39.5824, "step": 8245 }, { "epoch": 196.33432835820895, "grad_norm": 1.3645588159561157, "learning_rate": 9.19404761904762e-06, "loss": 40.1668, "step": 8246 }, { "epoch": 196.3582089552239, "grad_norm": 1.4874681234359741, "learning_rate": 9.192857142857143e-06, "loss": 38.7246, "step": 8247 }, { "epoch": 196.3820895522388, "grad_norm": 1.362986445426941, "learning_rate": 9.191666666666667e-06, "loss": 40.0761, "step": 8248 }, { "epoch": 196.40597014925373, "grad_norm": 1.3685634136199951, "learning_rate": 9.19047619047619e-06, "loss": 38.1625, "step": 8249 }, { "epoch": 196.42985074626867, "grad_norm": 1.33214271068573, "learning_rate": 9.189285714285714e-06, "loss": 40.6355, "step": 8250 }, { "epoch": 196.45373134328358, "grad_norm": 1.3138313293457031, "learning_rate": 9.18809523809524e-06, "loss": 40.3995, "step": 8251 }, { "epoch": 196.47761194029852, "grad_norm": 1.370100736618042, "learning_rate": 9.186904761904763e-06, "loss": 40.377, "step": 8252 }, { "epoch": 196.50149253731342, "grad_norm": 1.3368659019470215, "learning_rate": 9.185714285714287e-06, "loss": 41.3038, "step": 8253 }, { "epoch": 196.52537313432836, "grad_norm": 1.366429090499878, "learning_rate": 9.18452380952381e-06, "loss": 40.0049, "step": 8254 }, { "epoch": 196.54925373134327, "grad_norm": 1.3697006702423096, "learning_rate": 9.183333333333334e-06, "loss": 40.1348, "step": 8255 }, { "epoch": 196.5731343283582, "grad_norm": 1.3494455814361572, "learning_rate": 9.182142857142858e-06, "loss": 39.7134, "step": 8256 }, { "epoch": 196.59701492537314, "grad_norm": 1.3436124324798584, "learning_rate": 9.180952380952381e-06, "loss": 39.9956, "step": 8257 }, { "epoch": 196.62089552238805, "grad_norm": 1.3270702362060547, "learning_rate": 9.179761904761905e-06, "loss": 39.9917, "step": 8258 }, { "epoch": 196.644776119403, "grad_norm": 1.33135986328125, "learning_rate": 9.178571428571429e-06, "loss": 39.8372, "step": 8259 }, { "epoch": 196.6686567164179, "grad_norm": 1.3677103519439697, "learning_rate": 9.177380952380954e-06, "loss": 39.7359, "step": 8260 }, { "epoch": 196.69253731343284, "grad_norm": 1.3504973649978638, "learning_rate": 9.176190476190477e-06, "loss": 39.5585, "step": 8261 }, { "epoch": 196.71641791044777, "grad_norm": 1.3181111812591553, "learning_rate": 9.175000000000001e-06, "loss": 40.7897, "step": 8262 }, { "epoch": 196.74029850746268, "grad_norm": 1.396393060684204, "learning_rate": 9.173809523809525e-06, "loss": 39.7682, "step": 8263 }, { "epoch": 196.76417910447762, "grad_norm": 1.3868110179901123, "learning_rate": 9.172619047619048e-06, "loss": 38.6021, "step": 8264 }, { "epoch": 196.78805970149253, "grad_norm": 1.2942819595336914, "learning_rate": 9.171428571428572e-06, "loss": 39.7091, "step": 8265 }, { "epoch": 196.81194029850747, "grad_norm": 1.3444409370422363, "learning_rate": 9.170238095238097e-06, "loss": 39.56, "step": 8266 }, { "epoch": 196.83582089552237, "grad_norm": 1.3316888809204102, "learning_rate": 9.16904761904762e-06, "loss": 40.4136, "step": 8267 }, { "epoch": 196.8597014925373, "grad_norm": 1.3198986053466797, "learning_rate": 9.167857142857145e-06, "loss": 39.762, "step": 8268 }, { "epoch": 196.88358208955225, "grad_norm": 1.3658802509307861, "learning_rate": 9.166666666666666e-06, "loss": 40.1539, "step": 8269 }, { "epoch": 196.90746268656716, "grad_norm": 1.4427595138549805, "learning_rate": 9.165476190476192e-06, "loss": 39.5781, "step": 8270 }, { "epoch": 196.9313432835821, "grad_norm": 1.3726118803024292, "learning_rate": 9.164285714285715e-06, "loss": 40.7175, "step": 8271 }, { "epoch": 196.955223880597, "grad_norm": 1.3595292568206787, "learning_rate": 9.163095238095239e-06, "loss": 38.7659, "step": 8272 }, { "epoch": 196.97910447761194, "grad_norm": 1.36148202419281, "learning_rate": 9.161904761904763e-06, "loss": 39.6148, "step": 8273 }, { "epoch": 197.0, "grad_norm": 1.4080588817596436, "learning_rate": 9.160714285714286e-06, "loss": 33.9565, "step": 8274 }, { "epoch": 197.02388059701494, "grad_norm": 1.3503855466842651, "learning_rate": 9.15952380952381e-06, "loss": 40.0289, "step": 8275 }, { "epoch": 197.04776119402985, "grad_norm": 1.3526374101638794, "learning_rate": 9.158333333333335e-06, "loss": 39.6021, "step": 8276 }, { "epoch": 197.07164179104478, "grad_norm": 1.3841989040374756, "learning_rate": 9.157142857142857e-06, "loss": 38.6472, "step": 8277 }, { "epoch": 197.0955223880597, "grad_norm": 1.393977403640747, "learning_rate": 9.155952380952382e-06, "loss": 40.6484, "step": 8278 }, { "epoch": 197.11940298507463, "grad_norm": 1.3296873569488525, "learning_rate": 9.154761904761906e-06, "loss": 38.5409, "step": 8279 }, { "epoch": 197.14328358208957, "grad_norm": 1.3447153568267822, "learning_rate": 9.15357142857143e-06, "loss": 40.7826, "step": 8280 }, { "epoch": 197.16716417910447, "grad_norm": 1.3146417140960693, "learning_rate": 9.152380952380953e-06, "loss": 39.2244, "step": 8281 }, { "epoch": 197.1910447761194, "grad_norm": 1.3483967781066895, "learning_rate": 9.151190476190477e-06, "loss": 38.5204, "step": 8282 }, { "epoch": 197.21492537313432, "grad_norm": 1.3146545886993408, "learning_rate": 9.15e-06, "loss": 38.5432, "step": 8283 }, { "epoch": 197.23880597014926, "grad_norm": 1.360947847366333, "learning_rate": 9.148809523809524e-06, "loss": 39.5052, "step": 8284 }, { "epoch": 197.26268656716417, "grad_norm": 1.4010241031646729, "learning_rate": 9.147619047619048e-06, "loss": 38.9758, "step": 8285 }, { "epoch": 197.2865671641791, "grad_norm": 1.3133169412612915, "learning_rate": 9.146428571428571e-06, "loss": 40.1573, "step": 8286 }, { "epoch": 197.31044776119404, "grad_norm": 1.4743196964263916, "learning_rate": 9.145238095238097e-06, "loss": 40.1331, "step": 8287 }, { "epoch": 197.33432835820895, "grad_norm": 1.3996152877807617, "learning_rate": 9.14404761904762e-06, "loss": 39.0051, "step": 8288 }, { "epoch": 197.3582089552239, "grad_norm": 1.343434453010559, "learning_rate": 9.142857142857144e-06, "loss": 40.1605, "step": 8289 }, { "epoch": 197.3820895522388, "grad_norm": 1.3281519412994385, "learning_rate": 9.141666666666668e-06, "loss": 39.8787, "step": 8290 }, { "epoch": 197.40597014925373, "grad_norm": 1.3565195798873901, "learning_rate": 9.140476190476191e-06, "loss": 39.2663, "step": 8291 }, { "epoch": 197.42985074626867, "grad_norm": 1.3518437147140503, "learning_rate": 9.139285714285715e-06, "loss": 39.369, "step": 8292 }, { "epoch": 197.45373134328358, "grad_norm": 1.3787649869918823, "learning_rate": 9.13809523809524e-06, "loss": 40.0642, "step": 8293 }, { "epoch": 197.47761194029852, "grad_norm": 1.357149600982666, "learning_rate": 9.136904761904762e-06, "loss": 40.3977, "step": 8294 }, { "epoch": 197.50149253731342, "grad_norm": 1.334930181503296, "learning_rate": 9.135714285714287e-06, "loss": 39.8463, "step": 8295 }, { "epoch": 197.52537313432836, "grad_norm": 1.3275251388549805, "learning_rate": 9.13452380952381e-06, "loss": 38.8637, "step": 8296 }, { "epoch": 197.54925373134327, "grad_norm": 1.3726164102554321, "learning_rate": 9.133333333333335e-06, "loss": 40.0314, "step": 8297 }, { "epoch": 197.5731343283582, "grad_norm": 1.3171132802963257, "learning_rate": 9.132142857142858e-06, "loss": 40.5282, "step": 8298 }, { "epoch": 197.59701492537314, "grad_norm": 1.3664679527282715, "learning_rate": 9.130952380952382e-06, "loss": 40.7994, "step": 8299 }, { "epoch": 197.62089552238805, "grad_norm": 1.369454026222229, "learning_rate": 9.129761904761905e-06, "loss": 39.6448, "step": 8300 }, { "epoch": 197.644776119403, "grad_norm": 1.3281992673873901, "learning_rate": 9.128571428571429e-06, "loss": 40.0708, "step": 8301 }, { "epoch": 197.6686567164179, "grad_norm": 1.3577313423156738, "learning_rate": 9.127380952380953e-06, "loss": 39.6333, "step": 8302 }, { "epoch": 197.69253731343284, "grad_norm": 1.3722410202026367, "learning_rate": 9.126190476190478e-06, "loss": 40.4437, "step": 8303 }, { "epoch": 197.71641791044777, "grad_norm": 1.3616948127746582, "learning_rate": 9.125e-06, "loss": 39.599, "step": 8304 }, { "epoch": 197.74029850746268, "grad_norm": 1.4231816530227661, "learning_rate": 9.123809523809525e-06, "loss": 41.2318, "step": 8305 }, { "epoch": 197.76417910447762, "grad_norm": 1.3079227209091187, "learning_rate": 9.122619047619047e-06, "loss": 40.5184, "step": 8306 }, { "epoch": 197.78805970149253, "grad_norm": 1.3400167226791382, "learning_rate": 9.121428571428572e-06, "loss": 39.7653, "step": 8307 }, { "epoch": 197.81194029850747, "grad_norm": 1.3582713603973389, "learning_rate": 9.120238095238096e-06, "loss": 39.7315, "step": 8308 }, { "epoch": 197.83582089552237, "grad_norm": 1.3395192623138428, "learning_rate": 9.11904761904762e-06, "loss": 40.5832, "step": 8309 }, { "epoch": 197.8597014925373, "grad_norm": 1.340584635734558, "learning_rate": 9.117857142857143e-06, "loss": 40.0862, "step": 8310 }, { "epoch": 197.88358208955225, "grad_norm": 1.345178246498108, "learning_rate": 9.116666666666667e-06, "loss": 40.2127, "step": 8311 }, { "epoch": 197.90746268656716, "grad_norm": 1.3527147769927979, "learning_rate": 9.11547619047619e-06, "loss": 38.5807, "step": 8312 }, { "epoch": 197.9313432835821, "grad_norm": 1.3597252368927002, "learning_rate": 9.114285714285716e-06, "loss": 38.2354, "step": 8313 }, { "epoch": 197.955223880597, "grad_norm": 1.357584834098816, "learning_rate": 9.11309523809524e-06, "loss": 39.2665, "step": 8314 }, { "epoch": 197.97910447761194, "grad_norm": 1.4052231311798096, "learning_rate": 9.111904761904763e-06, "loss": 40.5787, "step": 8315 }, { "epoch": 198.0, "grad_norm": 1.3361458778381348, "learning_rate": 9.110714285714287e-06, "loss": 34.6027, "step": 8316 }, { "epoch": 198.02388059701494, "grad_norm": 1.3552029132843018, "learning_rate": 9.10952380952381e-06, "loss": 39.7132, "step": 8317 }, { "epoch": 198.04776119402985, "grad_norm": 1.383323311805725, "learning_rate": 9.108333333333334e-06, "loss": 39.2879, "step": 8318 }, { "epoch": 198.07164179104478, "grad_norm": 1.3905277252197266, "learning_rate": 9.107142857142858e-06, "loss": 41.1733, "step": 8319 }, { "epoch": 198.0955223880597, "grad_norm": 1.3613609075546265, "learning_rate": 9.105952380952381e-06, "loss": 39.5596, "step": 8320 }, { "epoch": 198.11940298507463, "grad_norm": 1.3153300285339355, "learning_rate": 9.104761904761905e-06, "loss": 39.5083, "step": 8321 }, { "epoch": 198.14328358208957, "grad_norm": 1.3405272960662842, "learning_rate": 9.10357142857143e-06, "loss": 41.2833, "step": 8322 }, { "epoch": 198.16716417910447, "grad_norm": 1.4101898670196533, "learning_rate": 9.102380952380954e-06, "loss": 40.314, "step": 8323 }, { "epoch": 198.1910447761194, "grad_norm": 1.3642925024032593, "learning_rate": 9.101190476190477e-06, "loss": 38.5863, "step": 8324 }, { "epoch": 198.21492537313432, "grad_norm": 1.3379172086715698, "learning_rate": 9.100000000000001e-06, "loss": 40.1521, "step": 8325 }, { "epoch": 198.23880597014926, "grad_norm": 1.3321168422698975, "learning_rate": 9.098809523809525e-06, "loss": 38.4818, "step": 8326 }, { "epoch": 198.26268656716417, "grad_norm": 1.3324553966522217, "learning_rate": 9.097619047619048e-06, "loss": 40.1818, "step": 8327 }, { "epoch": 198.2865671641791, "grad_norm": 1.3881027698516846, "learning_rate": 9.096428571428574e-06, "loss": 40.4369, "step": 8328 }, { "epoch": 198.31044776119404, "grad_norm": 1.3308145999908447, "learning_rate": 9.095238095238095e-06, "loss": 39.1639, "step": 8329 }, { "epoch": 198.33432835820895, "grad_norm": 1.384558081626892, "learning_rate": 9.09404761904762e-06, "loss": 39.6796, "step": 8330 }, { "epoch": 198.3582089552239, "grad_norm": 1.375219702720642, "learning_rate": 9.092857142857143e-06, "loss": 38.7568, "step": 8331 }, { "epoch": 198.3820895522388, "grad_norm": 1.3610416650772095, "learning_rate": 9.091666666666668e-06, "loss": 39.1983, "step": 8332 }, { "epoch": 198.40597014925373, "grad_norm": 1.3643382787704468, "learning_rate": 9.09047619047619e-06, "loss": 37.8444, "step": 8333 }, { "epoch": 198.42985074626867, "grad_norm": 1.3735100030899048, "learning_rate": 9.089285714285715e-06, "loss": 40.8578, "step": 8334 }, { "epoch": 198.45373134328358, "grad_norm": 1.3649832010269165, "learning_rate": 9.088095238095239e-06, "loss": 39.4095, "step": 8335 }, { "epoch": 198.47761194029852, "grad_norm": 1.3614352941513062, "learning_rate": 9.086904761904762e-06, "loss": 40.1622, "step": 8336 }, { "epoch": 198.50149253731342, "grad_norm": 1.3643244504928589, "learning_rate": 9.085714285714286e-06, "loss": 40.0414, "step": 8337 }, { "epoch": 198.52537313432836, "grad_norm": 1.316392183303833, "learning_rate": 9.08452380952381e-06, "loss": 40.4454, "step": 8338 }, { "epoch": 198.54925373134327, "grad_norm": 1.3754719495773315, "learning_rate": 9.083333333333333e-06, "loss": 39.4144, "step": 8339 }, { "epoch": 198.5731343283582, "grad_norm": 1.4263947010040283, "learning_rate": 9.082142857142859e-06, "loss": 40.1828, "step": 8340 }, { "epoch": 198.59701492537314, "grad_norm": 1.3474528789520264, "learning_rate": 9.08095238095238e-06, "loss": 39.4598, "step": 8341 }, { "epoch": 198.62089552238805, "grad_norm": 1.3409900665283203, "learning_rate": 9.079761904761906e-06, "loss": 40.7827, "step": 8342 }, { "epoch": 198.644776119403, "grad_norm": 1.3676395416259766, "learning_rate": 9.07857142857143e-06, "loss": 38.9603, "step": 8343 }, { "epoch": 198.6686567164179, "grad_norm": 1.3087782859802246, "learning_rate": 9.077380952380953e-06, "loss": 38.6047, "step": 8344 }, { "epoch": 198.69253731343284, "grad_norm": 1.3479573726654053, "learning_rate": 9.076190476190477e-06, "loss": 40.4327, "step": 8345 }, { "epoch": 198.71641791044777, "grad_norm": 1.3801110982894897, "learning_rate": 9.075e-06, "loss": 39.1225, "step": 8346 }, { "epoch": 198.74029850746268, "grad_norm": 1.3416346311569214, "learning_rate": 9.073809523809524e-06, "loss": 40.1538, "step": 8347 }, { "epoch": 198.76417910447762, "grad_norm": 1.3497296571731567, "learning_rate": 9.072619047619048e-06, "loss": 39.7964, "step": 8348 }, { "epoch": 198.78805970149253, "grad_norm": 1.3584421873092651, "learning_rate": 9.071428571428573e-06, "loss": 39.7958, "step": 8349 }, { "epoch": 198.81194029850747, "grad_norm": 1.4215072393417358, "learning_rate": 9.070238095238097e-06, "loss": 40.9393, "step": 8350 }, { "epoch": 198.83582089552237, "grad_norm": 1.349091649055481, "learning_rate": 9.06904761904762e-06, "loss": 39.1963, "step": 8351 }, { "epoch": 198.8597014925373, "grad_norm": 1.3312218189239502, "learning_rate": 9.067857142857144e-06, "loss": 39.7593, "step": 8352 }, { "epoch": 198.88358208955225, "grad_norm": 1.3494644165039062, "learning_rate": 9.066666666666667e-06, "loss": 40.2969, "step": 8353 }, { "epoch": 198.90746268656716, "grad_norm": 1.3603371381759644, "learning_rate": 9.065476190476191e-06, "loss": 39.6535, "step": 8354 }, { "epoch": 198.9313432835821, "grad_norm": 1.3265267610549927, "learning_rate": 9.064285714285715e-06, "loss": 38.1547, "step": 8355 }, { "epoch": 198.955223880597, "grad_norm": 1.3299893140792847, "learning_rate": 9.063095238095238e-06, "loss": 39.6953, "step": 8356 }, { "epoch": 198.97910447761194, "grad_norm": 1.3722138404846191, "learning_rate": 9.061904761904764e-06, "loss": 40.255, "step": 8357 }, { "epoch": 199.0, "grad_norm": 1.3580138683319092, "learning_rate": 9.060714285714285e-06, "loss": 35.3742, "step": 8358 }, { "epoch": 199.02388059701494, "grad_norm": 1.3456823825836182, "learning_rate": 9.05952380952381e-06, "loss": 38.7693, "step": 8359 }, { "epoch": 199.04776119402985, "grad_norm": 1.3411715030670166, "learning_rate": 9.058333333333334e-06, "loss": 39.5728, "step": 8360 }, { "epoch": 199.07164179104478, "grad_norm": 1.275390625, "learning_rate": 9.057142857142858e-06, "loss": 39.8416, "step": 8361 }, { "epoch": 199.0955223880597, "grad_norm": 1.3219788074493408, "learning_rate": 9.055952380952382e-06, "loss": 40.2765, "step": 8362 }, { "epoch": 199.11940298507463, "grad_norm": 1.3847650289535522, "learning_rate": 9.054761904761905e-06, "loss": 40.0008, "step": 8363 }, { "epoch": 199.14328358208957, "grad_norm": 1.2939895391464233, "learning_rate": 9.053571428571429e-06, "loss": 39.6104, "step": 8364 }, { "epoch": 199.16716417910447, "grad_norm": 1.4034801721572876, "learning_rate": 9.052380952380954e-06, "loss": 40.3801, "step": 8365 }, { "epoch": 199.1910447761194, "grad_norm": 1.3517119884490967, "learning_rate": 9.051190476190476e-06, "loss": 40.5922, "step": 8366 }, { "epoch": 199.21492537313432, "grad_norm": 1.317785620689392, "learning_rate": 9.050000000000001e-06, "loss": 39.1358, "step": 8367 }, { "epoch": 199.23880597014926, "grad_norm": 1.3429865837097168, "learning_rate": 9.048809523809523e-06, "loss": 39.841, "step": 8368 }, { "epoch": 199.26268656716417, "grad_norm": 1.342489242553711, "learning_rate": 9.047619047619049e-06, "loss": 40.1997, "step": 8369 }, { "epoch": 199.2865671641791, "grad_norm": 1.438088059425354, "learning_rate": 9.046428571428572e-06, "loss": 39.1631, "step": 8370 }, { "epoch": 199.31044776119404, "grad_norm": 1.351151466369629, "learning_rate": 9.045238095238096e-06, "loss": 38.8649, "step": 8371 }, { "epoch": 199.33432835820895, "grad_norm": 1.3759918212890625, "learning_rate": 9.04404761904762e-06, "loss": 39.587, "step": 8372 }, { "epoch": 199.3582089552239, "grad_norm": 1.4590145349502563, "learning_rate": 9.042857142857143e-06, "loss": 38.9902, "step": 8373 }, { "epoch": 199.3820895522388, "grad_norm": 1.3828874826431274, "learning_rate": 9.041666666666667e-06, "loss": 39.7394, "step": 8374 }, { "epoch": 199.40597014925373, "grad_norm": 1.3808432817459106, "learning_rate": 9.040476190476192e-06, "loss": 38.8001, "step": 8375 }, { "epoch": 199.42985074626867, "grad_norm": 1.3405669927597046, "learning_rate": 9.039285714285716e-06, "loss": 39.8281, "step": 8376 }, { "epoch": 199.45373134328358, "grad_norm": 1.337307095527649, "learning_rate": 9.03809523809524e-06, "loss": 38.7834, "step": 8377 }, { "epoch": 199.47761194029852, "grad_norm": 1.323648452758789, "learning_rate": 9.036904761904763e-06, "loss": 40.0284, "step": 8378 }, { "epoch": 199.50149253731342, "grad_norm": 1.3775368928909302, "learning_rate": 9.035714285714287e-06, "loss": 38.9881, "step": 8379 }, { "epoch": 199.52537313432836, "grad_norm": 1.3781110048294067, "learning_rate": 9.03452380952381e-06, "loss": 40.994, "step": 8380 }, { "epoch": 199.54925373134327, "grad_norm": 1.3442286252975464, "learning_rate": 9.033333333333334e-06, "loss": 39.863, "step": 8381 }, { "epoch": 199.5731343283582, "grad_norm": 1.3574823141098022, "learning_rate": 9.032142857142857e-06, "loss": 39.3366, "step": 8382 }, { "epoch": 199.59701492537314, "grad_norm": 1.3684008121490479, "learning_rate": 9.030952380952381e-06, "loss": 40.1082, "step": 8383 }, { "epoch": 199.62089552238805, "grad_norm": 1.3860973119735718, "learning_rate": 9.029761904761906e-06, "loss": 39.2621, "step": 8384 }, { "epoch": 199.644776119403, "grad_norm": 1.3725820779800415, "learning_rate": 9.028571428571428e-06, "loss": 40.1811, "step": 8385 }, { "epoch": 199.6686567164179, "grad_norm": 1.3652878999710083, "learning_rate": 9.027380952380954e-06, "loss": 40.3604, "step": 8386 }, { "epoch": 199.69253731343284, "grad_norm": 1.3340288400650024, "learning_rate": 9.026190476190477e-06, "loss": 40.5679, "step": 8387 }, { "epoch": 199.71641791044777, "grad_norm": 1.361693263053894, "learning_rate": 9.025e-06, "loss": 40.5482, "step": 8388 }, { "epoch": 199.74029850746268, "grad_norm": 1.4040027856826782, "learning_rate": 9.023809523809524e-06, "loss": 40.3545, "step": 8389 }, { "epoch": 199.76417910447762, "grad_norm": 1.4308195114135742, "learning_rate": 9.022619047619048e-06, "loss": 37.2321, "step": 8390 }, { "epoch": 199.78805970149253, "grad_norm": 1.3773605823516846, "learning_rate": 9.021428571428572e-06, "loss": 40.1834, "step": 8391 }, { "epoch": 199.81194029850747, "grad_norm": 1.3263499736785889, "learning_rate": 9.020238095238097e-06, "loss": 41.6857, "step": 8392 }, { "epoch": 199.83582089552237, "grad_norm": 1.3643227815628052, "learning_rate": 9.019047619047619e-06, "loss": 40.4675, "step": 8393 }, { "epoch": 199.8597014925373, "grad_norm": 1.328350305557251, "learning_rate": 9.017857142857144e-06, "loss": 39.8179, "step": 8394 }, { "epoch": 199.88358208955225, "grad_norm": 1.3895118236541748, "learning_rate": 9.016666666666666e-06, "loss": 40.1798, "step": 8395 }, { "epoch": 199.90746268656716, "grad_norm": 1.3377916812896729, "learning_rate": 9.015476190476191e-06, "loss": 39.4406, "step": 8396 }, { "epoch": 199.9313432835821, "grad_norm": 1.368583083152771, "learning_rate": 9.014285714285715e-06, "loss": 38.8616, "step": 8397 }, { "epoch": 199.955223880597, "grad_norm": 1.3287698030471802, "learning_rate": 9.013095238095239e-06, "loss": 39.3278, "step": 8398 }, { "epoch": 199.97910447761194, "grad_norm": 1.3306357860565186, "learning_rate": 9.011904761904762e-06, "loss": 39.851, "step": 8399 }, { "epoch": 200.0, "grad_norm": 1.4013166427612305, "learning_rate": 9.010714285714286e-06, "loss": 34.3859, "step": 8400 }, { "epoch": 200.0, "step": 8400, "total_flos": 4.15342011963894e+17, "train_loss": 3.985629536764962, "train_runtime": 25254.4126, "train_samples_per_second": 42.385, "train_steps_per_second": 0.333 }, { "epoch": 200.02388059701494, "grad_norm": 2.1195502281188965, "learning_rate": 1e-05, "loss": 39.8959, "step": 8401 }, { "epoch": 200.04776119402985, "grad_norm": 2.1413254737854004, "learning_rate": 9.99891774891775e-06, "loss": 39.4558, "step": 8402 }, { "epoch": 200.07164179104478, "grad_norm": 1.877864122390747, "learning_rate": 9.997835497835499e-06, "loss": 40.9881, "step": 8403 }, { "epoch": 200.0955223880597, "grad_norm": 1.763649344444275, "learning_rate": 9.996753246753248e-06, "loss": 40.4186, "step": 8404 }, { "epoch": 200.11940298507463, "grad_norm": 1.6444029808044434, "learning_rate": 9.995670995670996e-06, "loss": 38.8818, "step": 8405 }, { "epoch": 200.14328358208957, "grad_norm": 1.4822683334350586, "learning_rate": 9.994588744588745e-06, "loss": 39.2411, "step": 8406 }, { "epoch": 200.16716417910447, "grad_norm": 1.4035753011703491, "learning_rate": 9.993506493506494e-06, "loss": 40.0047, "step": 8407 }, { "epoch": 200.1910447761194, "grad_norm": 1.3580434322357178, "learning_rate": 9.992424242424243e-06, "loss": 40.5325, "step": 8408 }, { "epoch": 200.21492537313432, "grad_norm": 1.4315625429153442, "learning_rate": 9.991341991341992e-06, "loss": 40.3679, "step": 8409 }, { "epoch": 200.23880597014926, "grad_norm": 1.419010877609253, "learning_rate": 9.990259740259741e-06, "loss": 39.4211, "step": 8410 }, { "epoch": 200.26268656716417, "grad_norm": 1.4835749864578247, "learning_rate": 9.98917748917749e-06, "loss": 40.2571, "step": 8411 }, { "epoch": 200.2865671641791, "grad_norm": 1.4399628639221191, "learning_rate": 9.988095238095239e-06, "loss": 41.1591, "step": 8412 }, { "epoch": 200.31044776119404, "grad_norm": 1.4762556552886963, "learning_rate": 9.987012987012988e-06, "loss": 39.6944, "step": 8413 }, { "epoch": 200.33432835820895, "grad_norm": 1.451470136642456, "learning_rate": 9.985930735930737e-06, "loss": 39.9064, "step": 8414 }, { "epoch": 200.3582089552239, "grad_norm": 1.4357372522354126, "learning_rate": 9.984848484848485e-06, "loss": 39.9746, "step": 8415 }, { "epoch": 200.3820895522388, "grad_norm": 1.511968970298767, "learning_rate": 9.983766233766234e-06, "loss": 39.6803, "step": 8416 }, { "epoch": 200.40597014925373, "grad_norm": 1.4524909257888794, "learning_rate": 9.982683982683983e-06, "loss": 39.3517, "step": 8417 }, { "epoch": 200.42985074626867, "grad_norm": 1.3423553705215454, "learning_rate": 9.981601731601732e-06, "loss": 39.8872, "step": 8418 }, { "epoch": 200.45373134328358, "grad_norm": 1.3117338418960571, "learning_rate": 9.980519480519481e-06, "loss": 40.6442, "step": 8419 }, { "epoch": 200.47761194029852, "grad_norm": 1.3659687042236328, "learning_rate": 9.97943722943723e-06, "loss": 39.1832, "step": 8420 }, { "epoch": 200.50149253731342, "grad_norm": 1.3573120832443237, "learning_rate": 9.978354978354979e-06, "loss": 39.757, "step": 8421 }, { "epoch": 200.52537313432836, "grad_norm": 1.3675850629806519, "learning_rate": 9.977272727272728e-06, "loss": 39.7137, "step": 8422 }, { "epoch": 200.54925373134327, "grad_norm": 1.373896837234497, "learning_rate": 9.976190476190477e-06, "loss": 40.5882, "step": 8423 }, { "epoch": 200.5731343283582, "grad_norm": 1.3613499402999878, "learning_rate": 9.975108225108225e-06, "loss": 39.6152, "step": 8424 }, { "epoch": 200.59701492537314, "grad_norm": 1.4053380489349365, "learning_rate": 9.974025974025974e-06, "loss": 40.276, "step": 8425 }, { "epoch": 200.62089552238805, "grad_norm": 1.4112203121185303, "learning_rate": 9.972943722943725e-06, "loss": 39.1157, "step": 8426 }, { "epoch": 200.644776119403, "grad_norm": 1.3629989624023438, "learning_rate": 9.971861471861472e-06, "loss": 39.5532, "step": 8427 }, { "epoch": 200.6686567164179, "grad_norm": 1.3748594522476196, "learning_rate": 9.970779220779223e-06, "loss": 40.9771, "step": 8428 }, { "epoch": 200.69253731343284, "grad_norm": 1.380418300628662, "learning_rate": 9.96969696969697e-06, "loss": 40.2732, "step": 8429 }, { "epoch": 200.71641791044777, "grad_norm": 1.3557655811309814, "learning_rate": 9.96861471861472e-06, "loss": 39.9824, "step": 8430 }, { "epoch": 200.74029850746268, "grad_norm": 1.4485446214675903, "learning_rate": 9.967532467532468e-06, "loss": 39.6923, "step": 8431 }, { "epoch": 200.76417910447762, "grad_norm": 1.354023814201355, "learning_rate": 9.966450216450217e-06, "loss": 40.3986, "step": 8432 }, { "epoch": 200.78805970149253, "grad_norm": NaN, "learning_rate": 9.965367965367966e-06, "loss": 34.6977, "step": 8433 }, { "epoch": 200.81194029850747, "grad_norm": 1.3842402696609497, "learning_rate": 9.965367965367966e-06, "loss": 38.3836, "step": 8434 }, { "epoch": 200.83582089552237, "grad_norm": 1.3209642171859741, "learning_rate": 9.964285714285714e-06, "loss": 39.0095, "step": 8435 }, { "epoch": 200.8597014925373, "grad_norm": 1.423038125038147, "learning_rate": 9.963203463203463e-06, "loss": 38.4559, "step": 8436 }, { "epoch": 200.88358208955225, "grad_norm": 1.3200688362121582, "learning_rate": 9.962121212121212e-06, "loss": 40.1882, "step": 8437 }, { "epoch": 200.90746268656716, "grad_norm": 1.4185880422592163, "learning_rate": 9.961038961038963e-06, "loss": 40.4153, "step": 8438 }, { "epoch": 200.9313432835821, "grad_norm": 1.327790379524231, "learning_rate": 9.95995670995671e-06, "loss": 40.4699, "step": 8439 }, { "epoch": 200.955223880597, "grad_norm": 1.3564465045928955, "learning_rate": 9.95887445887446e-06, "loss": 39.9767, "step": 8440 }, { "epoch": 200.97910447761194, "grad_norm": 1.3825603723526, "learning_rate": 9.957792207792208e-06, "loss": 39.2995, "step": 8441 }, { "epoch": 201.0, "grad_norm": 1.325448751449585, "learning_rate": 9.956709956709958e-06, "loss": 34.5396, "step": 8442 }, { "epoch": 201.02388059701494, "grad_norm": 1.3660399913787842, "learning_rate": 9.955627705627706e-06, "loss": 38.3011, "step": 8443 }, { "epoch": 201.04776119402985, "grad_norm": 1.4088799953460693, "learning_rate": 9.954545454545456e-06, "loss": 39.8577, "step": 8444 }, { "epoch": 201.07164179104478, "grad_norm": 1.3425579071044922, "learning_rate": 9.953463203463203e-06, "loss": 40.5846, "step": 8445 }, { "epoch": 201.0955223880597, "grad_norm": 1.355627417564392, "learning_rate": 9.952380952380954e-06, "loss": 40.545, "step": 8446 }, { "epoch": 201.11940298507463, "grad_norm": 1.3953248262405396, "learning_rate": 9.951298701298701e-06, "loss": 40.2857, "step": 8447 }, { "epoch": 201.14328358208957, "grad_norm": 1.3633506298065186, "learning_rate": 9.950216450216452e-06, "loss": 39.3903, "step": 8448 }, { "epoch": 201.16716417910447, "grad_norm": 1.3455333709716797, "learning_rate": 9.949134199134199e-06, "loss": 39.1299, "step": 8449 }, { "epoch": 201.1910447761194, "grad_norm": 1.3515082597732544, "learning_rate": 9.94805194805195e-06, "loss": 38.6539, "step": 8450 }, { "epoch": 201.21492537313432, "grad_norm": 1.380735158920288, "learning_rate": 9.946969696969699e-06, "loss": 39.7634, "step": 8451 }, { "epoch": 201.23880597014926, "grad_norm": 1.347655177116394, "learning_rate": 9.945887445887446e-06, "loss": 39.291, "step": 8452 }, { "epoch": 201.26268656716417, "grad_norm": 1.351773977279663, "learning_rate": 9.944805194805196e-06, "loss": 40.3377, "step": 8453 }, { "epoch": 201.2865671641791, "grad_norm": 1.3738620281219482, "learning_rate": 9.943722943722944e-06, "loss": 40.5934, "step": 8454 }, { "epoch": 201.31044776119404, "grad_norm": 1.3605024814605713, "learning_rate": 9.942640692640694e-06, "loss": 39.1694, "step": 8455 }, { "epoch": 201.33432835820895, "grad_norm": 1.4437203407287598, "learning_rate": 9.941558441558441e-06, "loss": 40.5104, "step": 8456 }, { "epoch": 201.3582089552239, "grad_norm": 1.358488917350769, "learning_rate": 9.940476190476192e-06, "loss": 38.9206, "step": 8457 }, { "epoch": 201.3820895522388, "grad_norm": 1.3274990320205688, "learning_rate": 9.939393939393939e-06, "loss": 39.1223, "step": 8458 }, { "epoch": 201.40597014925373, "grad_norm": 1.3880196809768677, "learning_rate": 9.93831168831169e-06, "loss": 39.3876, "step": 8459 }, { "epoch": 201.42985074626867, "grad_norm": 1.3826501369476318, "learning_rate": 9.937229437229437e-06, "loss": 39.8548, "step": 8460 }, { "epoch": 201.45373134328358, "grad_norm": NaN, "learning_rate": 9.936147186147188e-06, "loss": 39.2627, "step": 8461 }, { "epoch": 201.47761194029852, "grad_norm": 1.3543522357940674, "learning_rate": 9.936147186147188e-06, "loss": 39.2187, "step": 8462 }, { "epoch": 201.50149253731342, "grad_norm": 1.408271074295044, "learning_rate": 9.935064935064936e-06, "loss": 39.8992, "step": 8463 }, { "epoch": 201.52537313432836, "grad_norm": 1.3365343809127808, "learning_rate": 9.933982683982685e-06, "loss": 38.4601, "step": 8464 }, { "epoch": 201.54925373134327, "grad_norm": 1.3201212882995605, "learning_rate": 9.932900432900434e-06, "loss": 39.2924, "step": 8465 }, { "epoch": 201.5731343283582, "grad_norm": 1.3899401426315308, "learning_rate": 9.931818181818183e-06, "loss": 39.9999, "step": 8466 }, { "epoch": 201.59701492537314, "grad_norm": 1.3483387231826782, "learning_rate": 9.930735930735932e-06, "loss": 41.3389, "step": 8467 }, { "epoch": 201.62089552238805, "grad_norm": 1.3873690366744995, "learning_rate": 9.929653679653681e-06, "loss": 40.2397, "step": 8468 }, { "epoch": 201.644776119403, "grad_norm": 1.3776922225952148, "learning_rate": 9.92857142857143e-06, "loss": 40.5522, "step": 8469 }, { "epoch": 201.6686567164179, "grad_norm": 1.3371458053588867, "learning_rate": 9.927489177489179e-06, "loss": 39.8155, "step": 8470 }, { "epoch": 201.69253731343284, "grad_norm": 1.3374888896942139, "learning_rate": 9.926406926406928e-06, "loss": 39.5109, "step": 8471 }, { "epoch": 201.71641791044777, "grad_norm": 1.3666965961456299, "learning_rate": 9.925324675324677e-06, "loss": 40.4929, "step": 8472 }, { "epoch": 201.74029850746268, "grad_norm": 1.3574906587600708, "learning_rate": 9.924242424242425e-06, "loss": 39.7443, "step": 8473 }, { "epoch": 201.76417910447762, "grad_norm": 1.3354772329330444, "learning_rate": 9.923160173160173e-06, "loss": 40.7204, "step": 8474 }, { "epoch": 201.78805970149253, "grad_norm": 1.3584409952163696, "learning_rate": 9.922077922077923e-06, "loss": 39.6694, "step": 8475 }, { "epoch": 201.81194029850747, "grad_norm": 1.3279088735580444, "learning_rate": 9.920995670995672e-06, "loss": 40.2911, "step": 8476 }, { "epoch": 201.83582089552237, "grad_norm": 1.3389276266098022, "learning_rate": 9.919913419913421e-06, "loss": 40.2478, "step": 8477 }, { "epoch": 201.8597014925373, "grad_norm": 1.3502564430236816, "learning_rate": 9.91883116883117e-06, "loss": 38.2543, "step": 8478 }, { "epoch": 201.88358208955225, "grad_norm": 1.3324962854385376, "learning_rate": 9.917748917748919e-06, "loss": 39.5073, "step": 8479 }, { "epoch": 201.90746268656716, "grad_norm": 1.3794143199920654, "learning_rate": 9.916666666666668e-06, "loss": 39.5413, "step": 8480 }, { "epoch": 201.9313432835821, "grad_norm": 1.3096647262573242, "learning_rate": 9.915584415584417e-06, "loss": 40.2791, "step": 8481 }, { "epoch": 201.955223880597, "grad_norm": 1.40058434009552, "learning_rate": 9.914502164502166e-06, "loss": 40.9026, "step": 8482 }, { "epoch": 201.97910447761194, "grad_norm": 1.3045790195465088, "learning_rate": 9.913419913419914e-06, "loss": 41.8992, "step": 8483 }, { "epoch": 202.0, "grad_norm": 1.3812085390090942, "learning_rate": 9.912337662337663e-06, "loss": 34.8652, "step": 8484 }, { "epoch": 202.02388059701494, "grad_norm": 1.3392449617385864, "learning_rate": 9.911255411255412e-06, "loss": 40.4868, "step": 8485 }, { "epoch": 202.04776119402985, "grad_norm": 1.326598882675171, "learning_rate": 9.910173160173161e-06, "loss": 40.4001, "step": 8486 }, { "epoch": 202.07164179104478, "grad_norm": 1.344519019126892, "learning_rate": 9.90909090909091e-06, "loss": 39.542, "step": 8487 }, { "epoch": 202.0955223880597, "grad_norm": 1.403530240058899, "learning_rate": 9.908008658008659e-06, "loss": 39.7246, "step": 8488 }, { "epoch": 202.11940298507463, "grad_norm": 1.5372259616851807, "learning_rate": 9.906926406926408e-06, "loss": 39.6689, "step": 8489 }, { "epoch": 202.14328358208957, "grad_norm": 1.358682632446289, "learning_rate": 9.905844155844157e-06, "loss": 40.1358, "step": 8490 }, { "epoch": 202.16716417910447, "grad_norm": 1.3716994524002075, "learning_rate": 9.904761904761906e-06, "loss": 39.0199, "step": 8491 }, { "epoch": 202.1910447761194, "grad_norm": 1.4490586519241333, "learning_rate": 9.903679653679655e-06, "loss": 38.9125, "step": 8492 }, { "epoch": 202.21492537313432, "grad_norm": 1.329615831375122, "learning_rate": 9.902597402597403e-06, "loss": 39.2993, "step": 8493 }, { "epoch": 202.23880597014926, "grad_norm": 1.3572126626968384, "learning_rate": 9.901515151515152e-06, "loss": 39.6082, "step": 8494 }, { "epoch": 202.26268656716417, "grad_norm": 1.4243184328079224, "learning_rate": 9.900432900432901e-06, "loss": 39.5263, "step": 8495 }, { "epoch": 202.2865671641791, "grad_norm": 1.5305248498916626, "learning_rate": 9.89935064935065e-06, "loss": 38.2848, "step": 8496 }, { "epoch": 202.31044776119404, "grad_norm": 1.440354824066162, "learning_rate": 9.898268398268399e-06, "loss": 39.1987, "step": 8497 }, { "epoch": 202.33432835820895, "grad_norm": 1.4369511604309082, "learning_rate": 9.897186147186148e-06, "loss": 40.6514, "step": 8498 }, { "epoch": 202.3582089552239, "grad_norm": 1.34109365940094, "learning_rate": 9.896103896103897e-06, "loss": 40.742, "step": 8499 }, { "epoch": 202.3820895522388, "grad_norm": 1.3409417867660522, "learning_rate": 9.895021645021646e-06, "loss": 39.6949, "step": 8500 }, { "epoch": 202.40597014925373, "grad_norm": 1.4680659770965576, "learning_rate": 9.893939393939395e-06, "loss": 39.932, "step": 8501 }, { "epoch": 202.42985074626867, "grad_norm": 1.3301887512207031, "learning_rate": 9.892857142857143e-06, "loss": 39.748, "step": 8502 }, { "epoch": 202.45373134328358, "grad_norm": 1.350243091583252, "learning_rate": 9.891774891774892e-06, "loss": 39.3886, "step": 8503 }, { "epoch": 202.47761194029852, "grad_norm": 1.3358641862869263, "learning_rate": 9.890692640692641e-06, "loss": 39.8462, "step": 8504 }, { "epoch": 202.50149253731342, "grad_norm": 1.3334248065948486, "learning_rate": 9.88961038961039e-06, "loss": 40.3326, "step": 8505 }, { "epoch": 202.52537313432836, "grad_norm": 1.3392813205718994, "learning_rate": 9.888528138528139e-06, "loss": 39.2142, "step": 8506 }, { "epoch": 202.54925373134327, "grad_norm": 1.3868093490600586, "learning_rate": 9.887445887445888e-06, "loss": 39.7993, "step": 8507 }, { "epoch": 202.5731343283582, "grad_norm": 1.3549549579620361, "learning_rate": 9.886363636363637e-06, "loss": 39.9657, "step": 8508 }, { "epoch": 202.59701492537314, "grad_norm": 1.394365906715393, "learning_rate": 9.885281385281386e-06, "loss": 40.8488, "step": 8509 }, { "epoch": 202.62089552238805, "grad_norm": 1.3568087816238403, "learning_rate": 9.884199134199135e-06, "loss": 39.5929, "step": 8510 }, { "epoch": 202.644776119403, "grad_norm": 1.3304247856140137, "learning_rate": 9.883116883116885e-06, "loss": 40.6643, "step": 8511 }, { "epoch": 202.6686567164179, "grad_norm": 1.378134846687317, "learning_rate": 9.882034632034632e-06, "loss": 39.3606, "step": 8512 }, { "epoch": 202.69253731343284, "grad_norm": 1.3511158227920532, "learning_rate": 9.880952380952381e-06, "loss": 41.1181, "step": 8513 }, { "epoch": 202.71641791044777, "grad_norm": 1.3504726886749268, "learning_rate": 9.87987012987013e-06, "loss": 39.8274, "step": 8514 }, { "epoch": 202.74029850746268, "grad_norm": 1.3652353286743164, "learning_rate": 9.87878787878788e-06, "loss": 39.5689, "step": 8515 }, { "epoch": 202.76417910447762, "grad_norm": 1.3714081048965454, "learning_rate": 9.877705627705628e-06, "loss": 39.5625, "step": 8516 }, { "epoch": 202.78805970149253, "grad_norm": 1.3876492977142334, "learning_rate": 9.876623376623377e-06, "loss": 39.8309, "step": 8517 }, { "epoch": 202.81194029850747, "grad_norm": 1.3487988710403442, "learning_rate": 9.875541125541126e-06, "loss": 40.8116, "step": 8518 }, { "epoch": 202.83582089552237, "grad_norm": 1.3697420358657837, "learning_rate": 9.874458874458875e-06, "loss": 39.5705, "step": 8519 }, { "epoch": 202.8597014925373, "grad_norm": 1.34088134765625, "learning_rate": 9.873376623376624e-06, "loss": 39.9533, "step": 8520 }, { "epoch": 202.88358208955225, "grad_norm": 1.3623926639556885, "learning_rate": 9.872294372294373e-06, "loss": 39.7599, "step": 8521 }, { "epoch": 202.90746268656716, "grad_norm": 1.3572895526885986, "learning_rate": 9.871212121212121e-06, "loss": 39.5918, "step": 8522 }, { "epoch": 202.9313432835821, "grad_norm": 1.3670583963394165, "learning_rate": 9.87012987012987e-06, "loss": 39.2493, "step": 8523 }, { "epoch": 202.955223880597, "grad_norm": 1.372652292251587, "learning_rate": 9.869047619047621e-06, "loss": 40.7964, "step": 8524 }, { "epoch": 202.97910447761194, "grad_norm": 1.353052020072937, "learning_rate": 9.867965367965368e-06, "loss": 40.1276, "step": 8525 }, { "epoch": 203.0, "grad_norm": 1.3234151601791382, "learning_rate": 9.866883116883119e-06, "loss": 34.8646, "step": 8526 }, { "epoch": 203.02388059701494, "grad_norm": 1.368504285812378, "learning_rate": 9.865800865800866e-06, "loss": 40.0368, "step": 8527 }, { "epoch": 203.04776119402985, "grad_norm": 1.3239166736602783, "learning_rate": 9.864718614718617e-06, "loss": 40.2772, "step": 8528 }, { "epoch": 203.07164179104478, "grad_norm": 1.3166489601135254, "learning_rate": 9.863636363636364e-06, "loss": 39.4239, "step": 8529 }, { "epoch": 203.0955223880597, "grad_norm": 1.344531774520874, "learning_rate": 9.862554112554114e-06, "loss": 39.2395, "step": 8530 }, { "epoch": 203.11940298507463, "grad_norm": 1.3740757703781128, "learning_rate": 9.861471861471862e-06, "loss": 40.3153, "step": 8531 }, { "epoch": 203.14328358208957, "grad_norm": 1.340865135192871, "learning_rate": 9.86038961038961e-06, "loss": 40.859, "step": 8532 }, { "epoch": 203.16716417910447, "grad_norm": 1.335741400718689, "learning_rate": 9.85930735930736e-06, "loss": 39.337, "step": 8533 }, { "epoch": 203.1910447761194, "grad_norm": 1.3189759254455566, "learning_rate": 9.858225108225108e-06, "loss": 38.7484, "step": 8534 }, { "epoch": 203.21492537313432, "grad_norm": 1.347547173500061, "learning_rate": 9.857142857142859e-06, "loss": 39.6967, "step": 8535 }, { "epoch": 203.23880597014926, "grad_norm": NaN, "learning_rate": 9.856060606060606e-06, "loss": 33.438, "step": 8536 }, { "epoch": 203.26268656716417, "grad_norm": 1.3511691093444824, "learning_rate": 9.856060606060606e-06, "loss": 40.747, "step": 8537 }, { "epoch": 203.2865671641791, "grad_norm": 1.3571406602859497, "learning_rate": 9.854978354978357e-06, "loss": 39.8058, "step": 8538 }, { "epoch": 203.31044776119404, "grad_norm": 1.355405569076538, "learning_rate": 9.853896103896104e-06, "loss": 39.6239, "step": 8539 }, { "epoch": 203.33432835820895, "grad_norm": 1.3559458255767822, "learning_rate": 9.852813852813854e-06, "loss": 39.6681, "step": 8540 }, { "epoch": 203.3582089552239, "grad_norm": 1.3621097803115845, "learning_rate": 9.851731601731602e-06, "loss": 39.8051, "step": 8541 }, { "epoch": 203.3820895522388, "grad_norm": 1.350886583328247, "learning_rate": 9.850649350649352e-06, "loss": 40.5377, "step": 8542 }, { "epoch": 203.40597014925373, "grad_norm": 1.365471363067627, "learning_rate": 9.8495670995671e-06, "loss": 39.9006, "step": 8543 }, { "epoch": 203.42985074626867, "grad_norm": 1.3369276523590088, "learning_rate": 9.84848484848485e-06, "loss": 39.362, "step": 8544 }, { "epoch": 203.45373134328358, "grad_norm": 1.3444722890853882, "learning_rate": 9.847402597402597e-06, "loss": 39.7894, "step": 8545 }, { "epoch": 203.47761194029852, "grad_norm": 1.3551136255264282, "learning_rate": 9.846320346320348e-06, "loss": 39.8917, "step": 8546 }, { "epoch": 203.50149253731342, "grad_norm": 1.3875447511672974, "learning_rate": 9.845238095238097e-06, "loss": 38.9767, "step": 8547 }, { "epoch": 203.52537313432836, "grad_norm": 1.3281328678131104, "learning_rate": 9.844155844155846e-06, "loss": 40.1651, "step": 8548 }, { "epoch": 203.54925373134327, "grad_norm": 1.3125382661819458, "learning_rate": 9.843073593073595e-06, "loss": 38.477, "step": 8549 }, { "epoch": 203.5731343283582, "grad_norm": 1.3165040016174316, "learning_rate": 9.841991341991343e-06, "loss": 39.8648, "step": 8550 }, { "epoch": 203.59701492537314, "grad_norm": 1.355776071548462, "learning_rate": 9.840909090909092e-06, "loss": 40.5803, "step": 8551 }, { "epoch": 203.62089552238805, "grad_norm": 1.3388727903366089, "learning_rate": 9.839826839826841e-06, "loss": 40.3507, "step": 8552 }, { "epoch": 203.644776119403, "grad_norm": 1.3471029996871948, "learning_rate": 9.83874458874459e-06, "loss": 39.7409, "step": 8553 }, { "epoch": 203.6686567164179, "grad_norm": 1.3633403778076172, "learning_rate": 9.837662337662337e-06, "loss": 38.9814, "step": 8554 }, { "epoch": 203.69253731343284, "grad_norm": 1.3420095443725586, "learning_rate": 9.836580086580088e-06, "loss": 40.6614, "step": 8555 }, { "epoch": 203.71641791044777, "grad_norm": 1.3590494394302368, "learning_rate": 9.835497835497835e-06, "loss": 40.3171, "step": 8556 }, { "epoch": 203.74029850746268, "grad_norm": 1.3958531618118286, "learning_rate": 9.834415584415586e-06, "loss": 39.4059, "step": 8557 }, { "epoch": 203.76417910447762, "grad_norm": 1.3501266241073608, "learning_rate": 9.833333333333333e-06, "loss": 39.7017, "step": 8558 }, { "epoch": 203.78805970149253, "grad_norm": 1.392917275428772, "learning_rate": 9.832251082251084e-06, "loss": 39.5622, "step": 8559 }, { "epoch": 203.81194029850747, "grad_norm": 1.3739819526672363, "learning_rate": 9.831168831168832e-06, "loss": 39.7097, "step": 8560 }, { "epoch": 203.83582089552237, "grad_norm": 1.3471810817718506, "learning_rate": 9.830086580086581e-06, "loss": 40.016, "step": 8561 }, { "epoch": 203.8597014925373, "grad_norm": 1.3445953130722046, "learning_rate": 9.82900432900433e-06, "loss": 40.6338, "step": 8562 }, { "epoch": 203.88358208955225, "grad_norm": 1.3349745273590088, "learning_rate": 9.827922077922079e-06, "loss": 39.8669, "step": 8563 }, { "epoch": 203.90746268656716, "grad_norm": 1.313550353050232, "learning_rate": 9.826839826839828e-06, "loss": 39.9588, "step": 8564 }, { "epoch": 203.9313432835821, "grad_norm": 1.331763505935669, "learning_rate": 9.825757575757577e-06, "loss": 41.0201, "step": 8565 }, { "epoch": 203.955223880597, "grad_norm": 1.456132411956787, "learning_rate": 9.824675324675326e-06, "loss": 39.6592, "step": 8566 }, { "epoch": 203.97910447761194, "grad_norm": 1.3017507791519165, "learning_rate": 9.823593073593075e-06, "loss": 39.1343, "step": 8567 }, { "epoch": 204.0, "grad_norm": NaN, "learning_rate": 9.822510822510824e-06, "loss": 46.1762, "step": 8568 }, { "epoch": 204.02388059701494, "grad_norm": 1.3824049234390259, "learning_rate": 9.822510822510824e-06, "loss": 40.1781, "step": 8569 }, { "epoch": 204.04776119402985, "grad_norm": 1.3490878343582153, "learning_rate": 9.821428571428573e-06, "loss": 39.7684, "step": 8570 }, { "epoch": 204.07164179104478, "grad_norm": 1.311771273612976, "learning_rate": 9.820346320346321e-06, "loss": 40.1026, "step": 8571 }, { "epoch": 204.0955223880597, "grad_norm": 1.3553950786590576, "learning_rate": 9.81926406926407e-06, "loss": 40.3155, "step": 8572 }, { "epoch": 204.11940298507463, "grad_norm": 1.3495888710021973, "learning_rate": 9.81818181818182e-06, "loss": 39.5604, "step": 8573 }, { "epoch": 204.14328358208957, "grad_norm": 1.3362724781036377, "learning_rate": 9.817099567099568e-06, "loss": 39.8104, "step": 8574 }, { "epoch": 204.16716417910447, "grad_norm": 1.358236312866211, "learning_rate": 9.816017316017317e-06, "loss": 38.7443, "step": 8575 }, { "epoch": 204.1910447761194, "grad_norm": 1.398469090461731, "learning_rate": 9.814935064935066e-06, "loss": 39.7793, "step": 8576 }, { "epoch": 204.21492537313432, "grad_norm": 1.3483610153198242, "learning_rate": 9.813852813852815e-06, "loss": 39.0393, "step": 8577 }, { "epoch": 204.23880597014926, "grad_norm": 1.3800088167190552, "learning_rate": 9.812770562770564e-06, "loss": 41.3238, "step": 8578 }, { "epoch": 204.26268656716417, "grad_norm": 1.35890531539917, "learning_rate": 9.811688311688313e-06, "loss": 39.4917, "step": 8579 }, { "epoch": 204.2865671641791, "grad_norm": 1.357926845550537, "learning_rate": 9.810606060606061e-06, "loss": 39.982, "step": 8580 }, { "epoch": 204.31044776119404, "grad_norm": 1.4157592058181763, "learning_rate": 9.80952380952381e-06, "loss": 37.6176, "step": 8581 }, { "epoch": 204.33432835820895, "grad_norm": 1.391133427619934, "learning_rate": 9.80844155844156e-06, "loss": 38.9508, "step": 8582 }, { "epoch": 204.3582089552239, "grad_norm": 1.3329156637191772, "learning_rate": 9.807359307359308e-06, "loss": 39.8415, "step": 8583 }, { "epoch": 204.3820895522388, "grad_norm": 1.3923654556274414, "learning_rate": 9.806277056277057e-06, "loss": 39.7097, "step": 8584 }, { "epoch": 204.40597014925373, "grad_norm": 1.3117201328277588, "learning_rate": 9.805194805194806e-06, "loss": 38.7359, "step": 8585 }, { "epoch": 204.42985074626867, "grad_norm": 1.3279907703399658, "learning_rate": 9.804112554112555e-06, "loss": 39.8776, "step": 8586 }, { "epoch": 204.45373134328358, "grad_norm": 1.3501886129379272, "learning_rate": 9.803030303030304e-06, "loss": 38.9771, "step": 8587 }, { "epoch": 204.47761194029852, "grad_norm": 1.374982237815857, "learning_rate": 9.801948051948053e-06, "loss": 38.835, "step": 8588 }, { "epoch": 204.50149253731342, "grad_norm": 1.3740473985671997, "learning_rate": 9.800865800865802e-06, "loss": 39.1171, "step": 8589 }, { "epoch": 204.52537313432836, "grad_norm": 1.3658461570739746, "learning_rate": 9.79978354978355e-06, "loss": 40.4881, "step": 8590 }, { "epoch": 204.54925373134327, "grad_norm": 1.3756301403045654, "learning_rate": 9.7987012987013e-06, "loss": 40.0403, "step": 8591 }, { "epoch": 204.5731343283582, "grad_norm": 1.361266016960144, "learning_rate": 9.797619047619048e-06, "loss": 41.1347, "step": 8592 }, { "epoch": 204.59701492537314, "grad_norm": 1.344994068145752, "learning_rate": 9.796536796536797e-06, "loss": 39.7614, "step": 8593 }, { "epoch": 204.62089552238805, "grad_norm": 1.3442108631134033, "learning_rate": 9.795454545454546e-06, "loss": 40.7159, "step": 8594 }, { "epoch": 204.644776119403, "grad_norm": 1.4157758951187134, "learning_rate": 9.794372294372295e-06, "loss": 39.9855, "step": 8595 }, { "epoch": 204.6686567164179, "grad_norm": 1.3436305522918701, "learning_rate": 9.793290043290044e-06, "loss": 40.072, "step": 8596 }, { "epoch": 204.69253731343284, "grad_norm": 1.3241760730743408, "learning_rate": 9.792207792207793e-06, "loss": 40.8881, "step": 8597 }, { "epoch": 204.71641791044777, "grad_norm": 1.4925997257232666, "learning_rate": 9.791125541125542e-06, "loss": 40.4019, "step": 8598 }, { "epoch": 204.74029850746268, "grad_norm": 1.3798843622207642, "learning_rate": 9.79004329004329e-06, "loss": 40.6034, "step": 8599 }, { "epoch": 204.76417910447762, "grad_norm": 1.3690818548202515, "learning_rate": 9.78896103896104e-06, "loss": 40.8486, "step": 8600 }, { "epoch": 204.78805970149253, "grad_norm": 1.344929575920105, "learning_rate": 9.787878787878788e-06, "loss": 40.3295, "step": 8601 }, { "epoch": 204.81194029850747, "grad_norm": 1.355258584022522, "learning_rate": 9.786796536796537e-06, "loss": 40.4366, "step": 8602 }, { "epoch": 204.83582089552237, "grad_norm": 1.3434778451919556, "learning_rate": 9.785714285714286e-06, "loss": 39.4732, "step": 8603 }, { "epoch": 204.8597014925373, "grad_norm": 1.3295626640319824, "learning_rate": 9.784632034632035e-06, "loss": 38.3219, "step": 8604 }, { "epoch": 204.88358208955225, "grad_norm": 1.3565593957901, "learning_rate": 9.783549783549784e-06, "loss": 38.5006, "step": 8605 }, { "epoch": 204.90746268656716, "grad_norm": 1.409366250038147, "learning_rate": 9.782467532467533e-06, "loss": 40.5858, "step": 8606 }, { "epoch": 204.9313432835821, "grad_norm": 1.3263705968856812, "learning_rate": 9.781385281385282e-06, "loss": 38.8793, "step": 8607 }, { "epoch": 204.955223880597, "grad_norm": 1.3189215660095215, "learning_rate": 9.78030303030303e-06, "loss": 41.2919, "step": 8608 }, { "epoch": 204.97910447761194, "grad_norm": 1.3795719146728516, "learning_rate": 9.779220779220781e-06, "loss": 39.8344, "step": 8609 }, { "epoch": 205.0, "grad_norm": 1.5358713865280151, "learning_rate": 9.778138528138528e-06, "loss": 34.9038, "step": 8610 }, { "epoch": 205.02388059701494, "grad_norm": 1.365280032157898, "learning_rate": 9.777056277056279e-06, "loss": 39.5195, "step": 8611 }, { "epoch": 205.04776119402985, "grad_norm": 1.368942379951477, "learning_rate": 9.775974025974026e-06, "loss": 39.6803, "step": 8612 }, { "epoch": 205.07164179104478, "grad_norm": 1.3710911273956299, "learning_rate": 9.774891774891775e-06, "loss": 39.8404, "step": 8613 }, { "epoch": 205.0955223880597, "grad_norm": 1.3293559551239014, "learning_rate": 9.773809523809524e-06, "loss": 38.998, "step": 8614 }, { "epoch": 205.11940298507463, "grad_norm": 1.3549565076828003, "learning_rate": 9.772727272727273e-06, "loss": 40.0746, "step": 8615 }, { "epoch": 205.14328358208957, "grad_norm": 1.3354560136795044, "learning_rate": 9.771645021645022e-06, "loss": 41.08, "step": 8616 }, { "epoch": 205.16716417910447, "grad_norm": 1.3680152893066406, "learning_rate": 9.77056277056277e-06, "loss": 40.2446, "step": 8617 }, { "epoch": 205.1910447761194, "grad_norm": 1.3856617212295532, "learning_rate": 9.76948051948052e-06, "loss": 39.1995, "step": 8618 }, { "epoch": 205.21492537313432, "grad_norm": 1.380136251449585, "learning_rate": 9.768398268398269e-06, "loss": 40.3514, "step": 8619 }, { "epoch": 205.23880597014926, "grad_norm": 1.3951385021209717, "learning_rate": 9.767316017316019e-06, "loss": 39.2672, "step": 8620 }, { "epoch": 205.26268656716417, "grad_norm": 1.329370141029358, "learning_rate": 9.766233766233766e-06, "loss": 40.558, "step": 8621 }, { "epoch": 205.2865671641791, "grad_norm": 1.3303219079971313, "learning_rate": 9.765151515151517e-06, "loss": 40.4305, "step": 8622 }, { "epoch": 205.31044776119404, "grad_norm": 1.4346680641174316, "learning_rate": 9.764069264069264e-06, "loss": 40.303, "step": 8623 }, { "epoch": 205.33432835820895, "grad_norm": 1.3465746641159058, "learning_rate": 9.762987012987015e-06, "loss": 39.9316, "step": 8624 }, { "epoch": 205.3582089552239, "grad_norm": 1.3494256734848022, "learning_rate": 9.761904761904762e-06, "loss": 39.6773, "step": 8625 }, { "epoch": 205.3820895522388, "grad_norm": 1.3724424839019775, "learning_rate": 9.760822510822513e-06, "loss": 39.0758, "step": 8626 }, { "epoch": 205.40597014925373, "grad_norm": 1.322094440460205, "learning_rate": 9.75974025974026e-06, "loss": 38.5897, "step": 8627 }, { "epoch": 205.42985074626867, "grad_norm": 1.406888723373413, "learning_rate": 9.75865800865801e-06, "loss": 39.3751, "step": 8628 }, { "epoch": 205.45373134328358, "grad_norm": 1.3455674648284912, "learning_rate": 9.757575757575758e-06, "loss": 38.5558, "step": 8629 }, { "epoch": 205.47761194029852, "grad_norm": 1.318764090538025, "learning_rate": 9.756493506493508e-06, "loss": 39.8048, "step": 8630 }, { "epoch": 205.50149253731342, "grad_norm": 1.4132392406463623, "learning_rate": 9.755411255411255e-06, "loss": 39.2253, "step": 8631 }, { "epoch": 205.52537313432836, "grad_norm": 1.3565527200698853, "learning_rate": 9.754329004329006e-06, "loss": 38.8588, "step": 8632 }, { "epoch": 205.54925373134327, "grad_norm": 1.3433454036712646, "learning_rate": 9.753246753246755e-06, "loss": 39.0655, "step": 8633 }, { "epoch": 205.5731343283582, "grad_norm": 1.3427348136901855, "learning_rate": 9.752164502164502e-06, "loss": 40.3165, "step": 8634 }, { "epoch": 205.59701492537314, "grad_norm": 1.3323968648910522, "learning_rate": 9.751082251082253e-06, "loss": 40.4186, "step": 8635 }, { "epoch": 205.62089552238805, "grad_norm": 1.3863612413406372, "learning_rate": 9.75e-06, "loss": 38.9462, "step": 8636 }, { "epoch": 205.644776119403, "grad_norm": 1.382765531539917, "learning_rate": 9.74891774891775e-06, "loss": 39.3955, "step": 8637 }, { "epoch": 205.6686567164179, "grad_norm": 1.402635931968689, "learning_rate": 9.747835497835498e-06, "loss": 39.921, "step": 8638 }, { "epoch": 205.69253731343284, "grad_norm": 1.3341659307479858, "learning_rate": 9.746753246753248e-06, "loss": 39.7337, "step": 8639 }, { "epoch": 205.71641791044777, "grad_norm": 1.3422999382019043, "learning_rate": 9.745670995670995e-06, "loss": 41.1244, "step": 8640 }, { "epoch": 205.74029850746268, "grad_norm": 1.3724491596221924, "learning_rate": 9.744588744588746e-06, "loss": 40.4848, "step": 8641 }, { "epoch": 205.76417910447762, "grad_norm": 1.4453892707824707, "learning_rate": 9.743506493506493e-06, "loss": 38.882, "step": 8642 }, { "epoch": 205.78805970149253, "grad_norm": 1.382094144821167, "learning_rate": 9.742424242424244e-06, "loss": 40.74, "step": 8643 }, { "epoch": 205.81194029850747, "grad_norm": 1.3637057542800903, "learning_rate": 9.741341991341993e-06, "loss": 39.8377, "step": 8644 }, { "epoch": 205.83582089552237, "grad_norm": 1.3618139028549194, "learning_rate": 9.740259740259742e-06, "loss": 40.6805, "step": 8645 }, { "epoch": 205.8597014925373, "grad_norm": 1.367019772529602, "learning_rate": 9.73917748917749e-06, "loss": 40.7782, "step": 8646 }, { "epoch": 205.88358208955225, "grad_norm": 1.353763222694397, "learning_rate": 9.73809523809524e-06, "loss": 38.2164, "step": 8647 }, { "epoch": 205.90746268656716, "grad_norm": 1.4666216373443604, "learning_rate": 9.737012987012988e-06, "loss": 39.9627, "step": 8648 }, { "epoch": 205.9313432835821, "grad_norm": 1.3820366859436035, "learning_rate": 9.735930735930737e-06, "loss": 41.1619, "step": 8649 }, { "epoch": 205.955223880597, "grad_norm": 1.347941517829895, "learning_rate": 9.734848484848486e-06, "loss": 40.5167, "step": 8650 }, { "epoch": 205.97910447761194, "grad_norm": 1.3841211795806885, "learning_rate": 9.733766233766235e-06, "loss": 39.8646, "step": 8651 }, { "epoch": 206.0, "grad_norm": 1.3188380002975464, "learning_rate": 9.732683982683984e-06, "loss": 34.674, "step": 8652 }, { "epoch": 206.02388059701494, "grad_norm": 1.3909363746643066, "learning_rate": 9.731601731601731e-06, "loss": 40.2516, "step": 8653 }, { "epoch": 206.04776119402985, "grad_norm": 1.3141813278198242, "learning_rate": 9.730519480519482e-06, "loss": 39.6352, "step": 8654 }, { "epoch": 206.07164179104478, "grad_norm": 1.3523144721984863, "learning_rate": 9.729437229437229e-06, "loss": 39.7813, "step": 8655 }, { "epoch": 206.0955223880597, "grad_norm": 1.348853588104248, "learning_rate": 9.72835497835498e-06, "loss": 40.0055, "step": 8656 }, { "epoch": 206.11940298507463, "grad_norm": 1.3406504392623901, "learning_rate": 9.727272727272728e-06, "loss": 39.916, "step": 8657 }, { "epoch": 206.14328358208957, "grad_norm": 1.3132129907608032, "learning_rate": 9.726190476190477e-06, "loss": 40.579, "step": 8658 }, { "epoch": 206.16716417910447, "grad_norm": 1.3503813743591309, "learning_rate": 9.725108225108226e-06, "loss": 39.1113, "step": 8659 }, { "epoch": 206.1910447761194, "grad_norm": 1.3078677654266357, "learning_rate": 9.724025974025975e-06, "loss": 40.1852, "step": 8660 }, { "epoch": 206.21492537313432, "grad_norm": 1.3449079990386963, "learning_rate": 9.722943722943724e-06, "loss": 39.9687, "step": 8661 }, { "epoch": 206.23880597014926, "grad_norm": 1.3278062343597412, "learning_rate": 9.721861471861473e-06, "loss": 39.3812, "step": 8662 }, { "epoch": 206.26268656716417, "grad_norm": 1.3352363109588623, "learning_rate": 9.720779220779222e-06, "loss": 39.038, "step": 8663 }, { "epoch": 206.2865671641791, "grad_norm": 1.3638793230056763, "learning_rate": 9.71969696969697e-06, "loss": 39.4534, "step": 8664 }, { "epoch": 206.31044776119404, "grad_norm": 1.3547192811965942, "learning_rate": 9.71861471861472e-06, "loss": 39.7471, "step": 8665 }, { "epoch": 206.33432835820895, "grad_norm": 1.3610570430755615, "learning_rate": 9.717532467532468e-06, "loss": 40.6904, "step": 8666 }, { "epoch": 206.3582089552239, "grad_norm": 1.3772169351577759, "learning_rate": 9.716450216450217e-06, "loss": 40.6247, "step": 8667 }, { "epoch": 206.3820895522388, "grad_norm": NaN, "learning_rate": 9.715367965367966e-06, "loss": 49.4286, "step": 8668 }, { "epoch": 206.40597014925373, "grad_norm": 1.3889539241790771, "learning_rate": 9.715367965367966e-06, "loss": 40.3652, "step": 8669 }, { "epoch": 206.42985074626867, "grad_norm": 1.356130838394165, "learning_rate": 9.714285714285715e-06, "loss": 40.6128, "step": 8670 }, { "epoch": 206.45373134328358, "grad_norm": 1.3980751037597656, "learning_rate": 9.713203463203464e-06, "loss": 39.3439, "step": 8671 }, { "epoch": 206.47761194029852, "grad_norm": 1.3529024124145508, "learning_rate": 9.712121212121213e-06, "loss": 38.7063, "step": 8672 }, { "epoch": 206.50149253731342, "grad_norm": 1.3549461364746094, "learning_rate": 9.711038961038962e-06, "loss": 38.8398, "step": 8673 }, { "epoch": 206.52537313432836, "grad_norm": 1.314579963684082, "learning_rate": 9.70995670995671e-06, "loss": 38.9628, "step": 8674 }, { "epoch": 206.54925373134327, "grad_norm": 1.3566210269927979, "learning_rate": 9.70887445887446e-06, "loss": 40.113, "step": 8675 }, { "epoch": 206.5731343283582, "grad_norm": 1.3369402885437012, "learning_rate": 9.707792207792209e-06, "loss": 39.7786, "step": 8676 }, { "epoch": 206.59701492537314, "grad_norm": 1.3501322269439697, "learning_rate": 9.706709956709957e-06, "loss": 40.0657, "step": 8677 }, { "epoch": 206.62089552238805, "grad_norm": 1.432174563407898, "learning_rate": 9.705627705627706e-06, "loss": 39.9026, "step": 8678 }, { "epoch": 206.644776119403, "grad_norm": 1.36751389503479, "learning_rate": 9.704545454545455e-06, "loss": 39.6706, "step": 8679 }, { "epoch": 206.6686567164179, "grad_norm": 1.3049030303955078, "learning_rate": 9.703463203463204e-06, "loss": 39.7797, "step": 8680 }, { "epoch": 206.69253731343284, "grad_norm": 1.378965139389038, "learning_rate": 9.702380952380953e-06, "loss": 40.1596, "step": 8681 }, { "epoch": 206.71641791044777, "grad_norm": 1.4143418073654175, "learning_rate": 9.701298701298702e-06, "loss": 39.2847, "step": 8682 }, { "epoch": 206.74029850746268, "grad_norm": 1.3411129713058472, "learning_rate": 9.700216450216451e-06, "loss": 40.6895, "step": 8683 }, { "epoch": 206.76417910447762, "grad_norm": 1.3735721111297607, "learning_rate": 9.6991341991342e-06, "loss": 39.7038, "step": 8684 }, { "epoch": 206.78805970149253, "grad_norm": 1.3914451599121094, "learning_rate": 9.698051948051949e-06, "loss": 40.3893, "step": 8685 }, { "epoch": 206.81194029850747, "grad_norm": 1.3264738321304321, "learning_rate": 9.696969696969698e-06, "loss": 40.4376, "step": 8686 }, { "epoch": 206.83582089552237, "grad_norm": 1.3466116189956665, "learning_rate": 9.695887445887446e-06, "loss": 40.3116, "step": 8687 }, { "epoch": 206.8597014925373, "grad_norm": 1.380892038345337, "learning_rate": 9.694805194805195e-06, "loss": 39.5935, "step": 8688 }, { "epoch": 206.88358208955225, "grad_norm": 1.3395328521728516, "learning_rate": 9.693722943722944e-06, "loss": 40.6492, "step": 8689 }, { "epoch": 206.90746268656716, "grad_norm": 1.3381954431533813, "learning_rate": 9.692640692640693e-06, "loss": 38.9285, "step": 8690 }, { "epoch": 206.9313432835821, "grad_norm": 1.3869540691375732, "learning_rate": 9.691558441558442e-06, "loss": 37.2335, "step": 8691 }, { "epoch": 206.955223880597, "grad_norm": 1.3251712322235107, "learning_rate": 9.690476190476191e-06, "loss": 40.5073, "step": 8692 }, { "epoch": 206.97910447761194, "grad_norm": 1.4055966138839722, "learning_rate": 9.68939393939394e-06, "loss": 39.6808, "step": 8693 }, { "epoch": 207.0, "grad_norm": 1.3889753818511963, "learning_rate": 9.688311688311689e-06, "loss": 35.0025, "step": 8694 }, { "epoch": 207.02388059701494, "grad_norm": 1.3435806035995483, "learning_rate": 9.687229437229438e-06, "loss": 38.8303, "step": 8695 }, { "epoch": 207.04776119402985, "grad_norm": 1.370622992515564, "learning_rate": 9.686147186147187e-06, "loss": 38.4364, "step": 8696 }, { "epoch": 207.07164179104478, "grad_norm": 1.3753960132598877, "learning_rate": 9.685064935064935e-06, "loss": 39.7707, "step": 8697 }, { "epoch": 207.0955223880597, "grad_norm": 1.402347207069397, "learning_rate": 9.683982683982684e-06, "loss": 40.45, "step": 8698 }, { "epoch": 207.11940298507463, "grad_norm": 1.3224753141403198, "learning_rate": 9.682900432900433e-06, "loss": 40.5171, "step": 8699 }, { "epoch": 207.14328358208957, "grad_norm": 1.3706496953964233, "learning_rate": 9.681818181818182e-06, "loss": 40.2149, "step": 8700 }, { "epoch": 207.16716417910447, "grad_norm": 1.3176217079162598, "learning_rate": 9.680735930735931e-06, "loss": 39.039, "step": 8701 }, { "epoch": 207.1910447761194, "grad_norm": 1.3530744314193726, "learning_rate": 9.67965367965368e-06, "loss": 39.8632, "step": 8702 }, { "epoch": 207.21492537313432, "grad_norm": 1.3502708673477173, "learning_rate": 9.678571428571429e-06, "loss": 40.3208, "step": 8703 }, { "epoch": 207.23880597014926, "grad_norm": 1.3731478452682495, "learning_rate": 9.67748917748918e-06, "loss": 40.9124, "step": 8704 }, { "epoch": 207.26268656716417, "grad_norm": 1.378189206123352, "learning_rate": 9.676406926406927e-06, "loss": 39.811, "step": 8705 }, { "epoch": 207.2865671641791, "grad_norm": 1.354251742362976, "learning_rate": 9.675324675324677e-06, "loss": 39.3682, "step": 8706 }, { "epoch": 207.31044776119404, "grad_norm": 1.3198610544204712, "learning_rate": 9.674242424242424e-06, "loss": 39.3442, "step": 8707 }, { "epoch": 207.33432835820895, "grad_norm": 1.3535128831863403, "learning_rate": 9.673160173160175e-06, "loss": 39.1411, "step": 8708 }, { "epoch": 207.3582089552239, "grad_norm": 1.333244800567627, "learning_rate": 9.672077922077922e-06, "loss": 38.4222, "step": 8709 }, { "epoch": 207.3820895522388, "grad_norm": 1.3189555406570435, "learning_rate": 9.670995670995673e-06, "loss": 40.0595, "step": 8710 }, { "epoch": 207.40597014925373, "grad_norm": 1.3436378240585327, "learning_rate": 9.66991341991342e-06, "loss": 40.964, "step": 8711 }, { "epoch": 207.42985074626867, "grad_norm": 1.3455276489257812, "learning_rate": 9.66883116883117e-06, "loss": 40.1797, "step": 8712 }, { "epoch": 207.45373134328358, "grad_norm": 1.4199739694595337, "learning_rate": 9.667748917748918e-06, "loss": 40.5889, "step": 8713 }, { "epoch": 207.47761194029852, "grad_norm": 1.352252721786499, "learning_rate": 9.666666666666667e-06, "loss": 40.541, "step": 8714 }, { "epoch": 207.50149253731342, "grad_norm": 1.3966203927993774, "learning_rate": 9.665584415584416e-06, "loss": 40.1562, "step": 8715 }, { "epoch": 207.52537313432836, "grad_norm": 1.3588403463363647, "learning_rate": 9.664502164502165e-06, "loss": 39.4664, "step": 8716 }, { "epoch": 207.54925373134327, "grad_norm": 1.3538744449615479, "learning_rate": 9.663419913419915e-06, "loss": 40.1173, "step": 8717 }, { "epoch": 207.5731343283582, "grad_norm": 1.3268805742263794, "learning_rate": 9.662337662337662e-06, "loss": 40.5875, "step": 8718 }, { "epoch": 207.59701492537314, "grad_norm": 1.3957899808883667, "learning_rate": 9.661255411255413e-06, "loss": 40.0538, "step": 8719 }, { "epoch": 207.62089552238805, "grad_norm": 1.3438713550567627, "learning_rate": 9.66017316017316e-06, "loss": 38.8197, "step": 8720 }, { "epoch": 207.644776119403, "grad_norm": 1.3390988111495972, "learning_rate": 9.65909090909091e-06, "loss": 40.2805, "step": 8721 }, { "epoch": 207.6686567164179, "grad_norm": 1.345411777496338, "learning_rate": 9.658008658008658e-06, "loss": 39.5026, "step": 8722 }, { "epoch": 207.69253731343284, "grad_norm": 1.371526837348938, "learning_rate": 9.656926406926409e-06, "loss": 39.57, "step": 8723 }, { "epoch": 207.71641791044777, "grad_norm": 1.3679569959640503, "learning_rate": 9.655844155844156e-06, "loss": 38.7811, "step": 8724 }, { "epoch": 207.74029850746268, "grad_norm": 1.3378201723098755, "learning_rate": 9.654761904761906e-06, "loss": 39.4076, "step": 8725 }, { "epoch": 207.76417910447762, "grad_norm": 1.340839147567749, "learning_rate": 9.653679653679654e-06, "loss": 40.1136, "step": 8726 }, { "epoch": 207.78805970149253, "grad_norm": 1.369566559791565, "learning_rate": 9.652597402597404e-06, "loss": 39.1237, "step": 8727 }, { "epoch": 207.81194029850747, "grad_norm": 1.3828129768371582, "learning_rate": 9.651515151515153e-06, "loss": 39.5279, "step": 8728 }, { "epoch": 207.83582089552237, "grad_norm": 1.3299708366394043, "learning_rate": 9.650432900432902e-06, "loss": 39.7672, "step": 8729 }, { "epoch": 207.8597014925373, "grad_norm": 1.3245450258255005, "learning_rate": 9.64935064935065e-06, "loss": 40.0324, "step": 8730 }, { "epoch": 207.88358208955225, "grad_norm": 1.3664745092391968, "learning_rate": 9.6482683982684e-06, "loss": 40.0098, "step": 8731 }, { "epoch": 207.90746268656716, "grad_norm": 1.321441888809204, "learning_rate": 9.647186147186149e-06, "loss": 39.5387, "step": 8732 }, { "epoch": 207.9313432835821, "grad_norm": 1.3307889699935913, "learning_rate": 9.646103896103896e-06, "loss": 39.7263, "step": 8733 }, { "epoch": 207.955223880597, "grad_norm": 1.3961788415908813, "learning_rate": 9.645021645021646e-06, "loss": 39.2415, "step": 8734 }, { "epoch": 207.97910447761194, "grad_norm": 1.340364694595337, "learning_rate": 9.643939393939394e-06, "loss": 40.5661, "step": 8735 }, { "epoch": 208.0, "grad_norm": 1.3330281972885132, "learning_rate": 9.642857142857144e-06, "loss": 35.5022, "step": 8736 }, { "epoch": 208.02388059701494, "grad_norm": 1.3285889625549316, "learning_rate": 9.641774891774891e-06, "loss": 40.2238, "step": 8737 }, { "epoch": 208.04776119402985, "grad_norm": 1.3088833093643188, "learning_rate": 9.640692640692642e-06, "loss": 40.3751, "step": 8738 }, { "epoch": 208.07164179104478, "grad_norm": 1.3066315650939941, "learning_rate": 9.63961038961039e-06, "loss": 38.9733, "step": 8739 }, { "epoch": 208.0955223880597, "grad_norm": 1.3318415880203247, "learning_rate": 9.63852813852814e-06, "loss": 38.0453, "step": 8740 }, { "epoch": 208.11940298507463, "grad_norm": 1.37196683883667, "learning_rate": 9.637445887445889e-06, "loss": 40.6345, "step": 8741 }, { "epoch": 208.14328358208957, "grad_norm": 1.356888771057129, "learning_rate": 9.636363636363638e-06, "loss": 39.5827, "step": 8742 }, { "epoch": 208.16716417910447, "grad_norm": 1.3307679891586304, "learning_rate": 9.635281385281386e-06, "loss": 40.1479, "step": 8743 }, { "epoch": 208.1910447761194, "grad_norm": 1.3697056770324707, "learning_rate": 9.634199134199135e-06, "loss": 40.0388, "step": 8744 }, { "epoch": 208.21492537313432, "grad_norm": 1.3585140705108643, "learning_rate": 9.633116883116884e-06, "loss": 39.8867, "step": 8745 }, { "epoch": 208.23880597014926, "grad_norm": 1.3381097316741943, "learning_rate": 9.632034632034633e-06, "loss": 39.753, "step": 8746 }, { "epoch": 208.26268656716417, "grad_norm": 1.3888802528381348, "learning_rate": 9.630952380952382e-06, "loss": 40.4863, "step": 8747 }, { "epoch": 208.2865671641791, "grad_norm": 1.374106526374817, "learning_rate": 9.629870129870131e-06, "loss": 39.4346, "step": 8748 }, { "epoch": 208.31044776119404, "grad_norm": 1.3473783731460571, "learning_rate": 9.62878787878788e-06, "loss": 40.6447, "step": 8749 }, { "epoch": 208.33432835820895, "grad_norm": 1.3787063360214233, "learning_rate": 9.627705627705629e-06, "loss": 40.7623, "step": 8750 }, { "epoch": 208.3582089552239, "grad_norm": NaN, "learning_rate": 9.626623376623378e-06, "loss": 63.2268, "step": 8751 }, { "epoch": 208.3820895522388, "grad_norm": 1.31142258644104, "learning_rate": 9.626623376623378e-06, "loss": 38.3989, "step": 8752 }, { "epoch": 208.40597014925373, "grad_norm": 1.3436646461486816, "learning_rate": 9.625541125541127e-06, "loss": 40.2477, "step": 8753 }, { "epoch": 208.42985074626867, "grad_norm": 1.403708815574646, "learning_rate": 9.624458874458875e-06, "loss": 39.547, "step": 8754 }, { "epoch": 208.45373134328358, "grad_norm": 1.308201789855957, "learning_rate": 9.623376623376624e-06, "loss": 40.2019, "step": 8755 }, { "epoch": 208.47761194029852, "grad_norm": 1.3376202583312988, "learning_rate": 9.622294372294373e-06, "loss": 40.0219, "step": 8756 }, { "epoch": 208.50149253731342, "grad_norm": 1.338472604751587, "learning_rate": 9.621212121212122e-06, "loss": 40.5837, "step": 8757 }, { "epoch": 208.52537313432836, "grad_norm": 1.3346757888793945, "learning_rate": 9.620129870129871e-06, "loss": 39.6783, "step": 8758 }, { "epoch": 208.54925373134327, "grad_norm": 1.3821760416030884, "learning_rate": 9.61904761904762e-06, "loss": 37.8957, "step": 8759 }, { "epoch": 208.5731343283582, "grad_norm": 1.3510831594467163, "learning_rate": 9.617965367965369e-06, "loss": 40.0326, "step": 8760 }, { "epoch": 208.59701492537314, "grad_norm": 1.3700441122055054, "learning_rate": 9.616883116883118e-06, "loss": 39.6395, "step": 8761 }, { "epoch": 208.62089552238805, "grad_norm": 1.3780709505081177, "learning_rate": 9.615800865800867e-06, "loss": 38.7359, "step": 8762 }, { "epoch": 208.644776119403, "grad_norm": 1.3634003400802612, "learning_rate": 9.614718614718616e-06, "loss": 39.2743, "step": 8763 }, { "epoch": 208.6686567164179, "grad_norm": 1.3463929891586304, "learning_rate": 9.613636363636364e-06, "loss": 39.2786, "step": 8764 }, { "epoch": 208.69253731343284, "grad_norm": 1.3938487768173218, "learning_rate": 9.612554112554113e-06, "loss": 38.4887, "step": 8765 }, { "epoch": 208.71641791044777, "grad_norm": 1.3897905349731445, "learning_rate": 9.611471861471862e-06, "loss": 40.9641, "step": 8766 }, { "epoch": 208.74029850746268, "grad_norm": 1.3548675775527954, "learning_rate": 9.610389610389611e-06, "loss": 39.1566, "step": 8767 }, { "epoch": 208.76417910447762, "grad_norm": 1.4117032289505005, "learning_rate": 9.60930735930736e-06, "loss": 39.9359, "step": 8768 }, { "epoch": 208.78805970149253, "grad_norm": 1.4413483142852783, "learning_rate": 9.608225108225109e-06, "loss": 39.6836, "step": 8769 }, { "epoch": 208.81194029850747, "grad_norm": 1.3489794731140137, "learning_rate": 9.607142857142858e-06, "loss": 40.5621, "step": 8770 }, { "epoch": 208.83582089552237, "grad_norm": 1.371641755104065, "learning_rate": 9.606060606060607e-06, "loss": 39.7529, "step": 8771 }, { "epoch": 208.8597014925373, "grad_norm": 1.336719274520874, "learning_rate": 9.604978354978356e-06, "loss": 40.0171, "step": 8772 }, { "epoch": 208.88358208955225, "grad_norm": 1.3367419242858887, "learning_rate": 9.603896103896105e-06, "loss": 40.3346, "step": 8773 }, { "epoch": 208.90746268656716, "grad_norm": 1.3405991792678833, "learning_rate": 9.602813852813853e-06, "loss": 40.3723, "step": 8774 }, { "epoch": 208.9313432835821, "grad_norm": 1.3736621141433716, "learning_rate": 9.601731601731602e-06, "loss": 40.1811, "step": 8775 }, { "epoch": 208.955223880597, "grad_norm": 1.3476605415344238, "learning_rate": 9.600649350649351e-06, "loss": 39.8129, "step": 8776 }, { "epoch": 208.97910447761194, "grad_norm": 1.3514008522033691, "learning_rate": 9.5995670995671e-06, "loss": 39.8556, "step": 8777 }, { "epoch": 209.0, "grad_norm": 1.3359158039093018, "learning_rate": 9.598484848484849e-06, "loss": 35.7193, "step": 8778 }, { "epoch": 209.02388059701494, "grad_norm": 1.3604894876480103, "learning_rate": 9.597402597402598e-06, "loss": 38.5998, "step": 8779 }, { "epoch": 209.04776119402985, "grad_norm": 1.34781813621521, "learning_rate": 9.596320346320347e-06, "loss": 39.4375, "step": 8780 }, { "epoch": 209.07164179104478, "grad_norm": 1.3754634857177734, "learning_rate": 9.595238095238096e-06, "loss": 40.3925, "step": 8781 }, { "epoch": 209.0955223880597, "grad_norm": 1.3592923879623413, "learning_rate": 9.594155844155845e-06, "loss": 40.285, "step": 8782 }, { "epoch": 209.11940298507463, "grad_norm": 1.3091309070587158, "learning_rate": 9.593073593073594e-06, "loss": 39.0882, "step": 8783 }, { "epoch": 209.14328358208957, "grad_norm": 1.3650256395339966, "learning_rate": 9.591991341991342e-06, "loss": 39.2074, "step": 8784 }, { "epoch": 209.16716417910447, "grad_norm": 1.3189674615859985, "learning_rate": 9.590909090909091e-06, "loss": 39.2402, "step": 8785 }, { "epoch": 209.1910447761194, "grad_norm": 1.3583528995513916, "learning_rate": 9.58982683982684e-06, "loss": 38.1526, "step": 8786 }, { "epoch": 209.21492537313432, "grad_norm": 1.3276830911636353, "learning_rate": 9.588744588744589e-06, "loss": 40.3605, "step": 8787 }, { "epoch": 209.23880597014926, "grad_norm": 1.375235676765442, "learning_rate": 9.587662337662338e-06, "loss": 40.0383, "step": 8788 }, { "epoch": 209.26268656716417, "grad_norm": 1.3503100872039795, "learning_rate": 9.586580086580087e-06, "loss": 40.0166, "step": 8789 }, { "epoch": 209.2865671641791, "grad_norm": 1.3606221675872803, "learning_rate": 9.585497835497838e-06, "loss": 38.874, "step": 8790 }, { "epoch": 209.31044776119404, "grad_norm": 1.353880763053894, "learning_rate": 9.584415584415585e-06, "loss": 38.4007, "step": 8791 }, { "epoch": 209.33432835820895, "grad_norm": 1.3729524612426758, "learning_rate": 9.583333333333335e-06, "loss": 39.3229, "step": 8792 }, { "epoch": 209.3582089552239, "grad_norm": 1.358292818069458, "learning_rate": 9.582251082251083e-06, "loss": 40.0239, "step": 8793 }, { "epoch": 209.3820895522388, "grad_norm": 1.3045099973678589, "learning_rate": 9.581168831168831e-06, "loss": 39.432, "step": 8794 }, { "epoch": 209.40597014925373, "grad_norm": 1.3730512857437134, "learning_rate": 9.58008658008658e-06, "loss": 39.6634, "step": 8795 }, { "epoch": 209.42985074626867, "grad_norm": 1.3657894134521484, "learning_rate": 9.57900432900433e-06, "loss": 40.2598, "step": 8796 }, { "epoch": 209.45373134328358, "grad_norm": 1.328834891319275, "learning_rate": 9.577922077922078e-06, "loss": 40.0472, "step": 8797 }, { "epoch": 209.47761194029852, "grad_norm": 1.3449023962020874, "learning_rate": 9.576839826839827e-06, "loss": 40.5182, "step": 8798 }, { "epoch": 209.50149253731342, "grad_norm": 1.3250757455825806, "learning_rate": 9.575757575757576e-06, "loss": 40.2425, "step": 8799 }, { "epoch": 209.52537313432836, "grad_norm": 1.4324774742126465, "learning_rate": 9.574675324675325e-06, "loss": 39.2444, "step": 8800 }, { "epoch": 209.54925373134327, "grad_norm": 1.346042513847351, "learning_rate": 9.573593073593075e-06, "loss": 40.3867, "step": 8801 }, { "epoch": 209.5731343283582, "grad_norm": 1.3340811729431152, "learning_rate": 9.572510822510823e-06, "loss": 39.8862, "step": 8802 }, { "epoch": 209.59701492537314, "grad_norm": 1.4204115867614746, "learning_rate": 9.571428571428573e-06, "loss": 38.462, "step": 8803 }, { "epoch": 209.62089552238805, "grad_norm": 1.4842796325683594, "learning_rate": 9.57034632034632e-06, "loss": 38.9258, "step": 8804 }, { "epoch": 209.644776119403, "grad_norm": 1.3187850713729858, "learning_rate": 9.569264069264071e-06, "loss": 39.3658, "step": 8805 }, { "epoch": 209.6686567164179, "grad_norm": 1.361606478691101, "learning_rate": 9.568181818181818e-06, "loss": 39.6968, "step": 8806 }, { "epoch": 209.69253731343284, "grad_norm": 1.3819087743759155, "learning_rate": 9.567099567099569e-06, "loss": 40.7596, "step": 8807 }, { "epoch": 209.71641791044777, "grad_norm": 1.3528673648834229, "learning_rate": 9.566017316017316e-06, "loss": 40.2827, "step": 8808 }, { "epoch": 209.74029850746268, "grad_norm": 1.3310041427612305, "learning_rate": 9.564935064935067e-06, "loss": 40.1711, "step": 8809 }, { "epoch": 209.76417910447762, "grad_norm": 1.316280722618103, "learning_rate": 9.563852813852814e-06, "loss": 40.4871, "step": 8810 }, { "epoch": 209.78805970149253, "grad_norm": 1.38788902759552, "learning_rate": 9.562770562770564e-06, "loss": 39.0898, "step": 8811 }, { "epoch": 209.81194029850747, "grad_norm": 1.3461848497390747, "learning_rate": 9.561688311688313e-06, "loss": 40.7952, "step": 8812 }, { "epoch": 209.83582089552237, "grad_norm": 1.3517199754714966, "learning_rate": 9.56060606060606e-06, "loss": 39.9062, "step": 8813 }, { "epoch": 209.8597014925373, "grad_norm": 1.3253819942474365, "learning_rate": 9.559523809523811e-06, "loss": 40.2982, "step": 8814 }, { "epoch": 209.88358208955225, "grad_norm": 1.3833763599395752, "learning_rate": 9.558441558441558e-06, "loss": 40.4952, "step": 8815 }, { "epoch": 209.90746268656716, "grad_norm": 1.3875101804733276, "learning_rate": 9.557359307359309e-06, "loss": 39.8086, "step": 8816 }, { "epoch": 209.9313432835821, "grad_norm": 1.3631867170333862, "learning_rate": 9.556277056277056e-06, "loss": 40.0383, "step": 8817 }, { "epoch": 209.955223880597, "grad_norm": 1.3749154806137085, "learning_rate": 9.555194805194807e-06, "loss": 40.5161, "step": 8818 }, { "epoch": 209.97910447761194, "grad_norm": NaN, "learning_rate": 9.554112554112554e-06, "loss": 54.9479, "step": 8819 }, { "epoch": 210.0, "grad_norm": 1.497111439704895, "learning_rate": 9.554112554112554e-06, "loss": 35.3626, "step": 8820 }, { "epoch": 210.02388059701494, "grad_norm": 1.321395993232727, "learning_rate": 9.553030303030304e-06, "loss": 39.8756, "step": 8821 }, { "epoch": 210.04776119402985, "grad_norm": 1.3705570697784424, "learning_rate": 9.551948051948052e-06, "loss": 40.4907, "step": 8822 }, { "epoch": 210.07164179104478, "grad_norm": 1.3449461460113525, "learning_rate": 9.550865800865802e-06, "loss": 40.0747, "step": 8823 }, { "epoch": 210.0955223880597, "grad_norm": 1.3320589065551758, "learning_rate": 9.54978354978355e-06, "loss": 39.5201, "step": 8824 }, { "epoch": 210.11940298507463, "grad_norm": 1.3432953357696533, "learning_rate": 9.5487012987013e-06, "loss": 40.0954, "step": 8825 }, { "epoch": 210.14328358208957, "grad_norm": 1.3655306100845337, "learning_rate": 9.547619047619049e-06, "loss": 38.676, "step": 8826 }, { "epoch": 210.16716417910447, "grad_norm": 1.3497449159622192, "learning_rate": 9.546536796536798e-06, "loss": 40.2281, "step": 8827 }, { "epoch": 210.1910447761194, "grad_norm": NaN, "learning_rate": 9.545454545454547e-06, "loss": 68.638, "step": 8828 }, { "epoch": 210.21492537313432, "grad_norm": 1.3341877460479736, "learning_rate": 9.545454545454547e-06, "loss": 39.2952, "step": 8829 }, { "epoch": 210.23880597014926, "grad_norm": 1.3743181228637695, "learning_rate": 9.544372294372296e-06, "loss": 38.7101, "step": 8830 }, { "epoch": 210.26268656716417, "grad_norm": 1.3716835975646973, "learning_rate": 9.543290043290045e-06, "loss": 39.1246, "step": 8831 }, { "epoch": 210.2865671641791, "grad_norm": 1.3723886013031006, "learning_rate": 9.542207792207793e-06, "loss": 39.4671, "step": 8832 }, { "epoch": 210.31044776119404, "grad_norm": 1.3235583305358887, "learning_rate": 9.541125541125542e-06, "loss": 40.2564, "step": 8833 }, { "epoch": 210.33432835820895, "grad_norm": 1.3450580835342407, "learning_rate": 9.540043290043291e-06, "loss": 39.4135, "step": 8834 }, { "epoch": 210.3582089552239, "grad_norm": 1.345076560974121, "learning_rate": 9.53896103896104e-06, "loss": 40.7438, "step": 8835 }, { "epoch": 210.3820895522388, "grad_norm": 1.322695016860962, "learning_rate": 9.537878787878787e-06, "loss": 39.4731, "step": 8836 }, { "epoch": 210.40597014925373, "grad_norm": 1.3571319580078125, "learning_rate": 9.536796536796538e-06, "loss": 39.63, "step": 8837 }, { "epoch": 210.42985074626867, "grad_norm": 1.3598859310150146, "learning_rate": 9.535714285714287e-06, "loss": 38.3566, "step": 8838 }, { "epoch": 210.45373134328358, "grad_norm": 1.34986412525177, "learning_rate": 9.534632034632036e-06, "loss": 38.9364, "step": 8839 }, { "epoch": 210.47761194029852, "grad_norm": 1.3784576654434204, "learning_rate": 9.533549783549785e-06, "loss": 40.8526, "step": 8840 }, { "epoch": 210.50149253731342, "grad_norm": 1.3404110670089722, "learning_rate": 9.532467532467534e-06, "loss": 40.9763, "step": 8841 }, { "epoch": 210.52537313432836, "grad_norm": 1.3433525562286377, "learning_rate": 9.531385281385282e-06, "loss": 39.5431, "step": 8842 }, { "epoch": 210.54925373134327, "grad_norm": 1.3696935176849365, "learning_rate": 9.530303030303031e-06, "loss": 39.769, "step": 8843 }, { "epoch": 210.5731343283582, "grad_norm": 1.333480715751648, "learning_rate": 9.52922077922078e-06, "loss": 41.0464, "step": 8844 }, { "epoch": 210.59701492537314, "grad_norm": 1.3425663709640503, "learning_rate": 9.52813852813853e-06, "loss": 40.2951, "step": 8845 }, { "epoch": 210.62089552238805, "grad_norm": 1.350966453552246, "learning_rate": 9.527056277056278e-06, "loss": 39.768, "step": 8846 }, { "epoch": 210.644776119403, "grad_norm": 1.3416324853897095, "learning_rate": 9.525974025974027e-06, "loss": 39.6391, "step": 8847 }, { "epoch": 210.6686567164179, "grad_norm": 1.3381752967834473, "learning_rate": 9.524891774891776e-06, "loss": 39.6522, "step": 8848 }, { "epoch": 210.69253731343284, "grad_norm": 1.3394417762756348, "learning_rate": 9.523809523809525e-06, "loss": 39.1609, "step": 8849 }, { "epoch": 210.71641791044777, "grad_norm": 1.3334687948226929, "learning_rate": 9.522727272727274e-06, "loss": 40.1595, "step": 8850 }, { "epoch": 210.74029850746268, "grad_norm": 1.294854760169983, "learning_rate": 9.521645021645023e-06, "loss": 39.5768, "step": 8851 }, { "epoch": 210.76417910447762, "grad_norm": 1.3623311519622803, "learning_rate": 9.520562770562771e-06, "loss": 39.8312, "step": 8852 }, { "epoch": 210.78805970149253, "grad_norm": 1.3493179082870483, "learning_rate": 9.51948051948052e-06, "loss": 38.5004, "step": 8853 }, { "epoch": 210.81194029850747, "grad_norm": 1.4007699489593506, "learning_rate": 9.51839826839827e-06, "loss": 39.5357, "step": 8854 }, { "epoch": 210.83582089552237, "grad_norm": 1.391189455986023, "learning_rate": 9.517316017316018e-06, "loss": 39.8861, "step": 8855 }, { "epoch": 210.8597014925373, "grad_norm": 1.332085371017456, "learning_rate": 9.516233766233767e-06, "loss": 40.9401, "step": 8856 }, { "epoch": 210.88358208955225, "grad_norm": 1.3533538579940796, "learning_rate": 9.515151515151516e-06, "loss": 39.0814, "step": 8857 }, { "epoch": 210.90746268656716, "grad_norm": 1.3499486446380615, "learning_rate": 9.514069264069265e-06, "loss": 39.3856, "step": 8858 }, { "epoch": 210.9313432835821, "grad_norm": 1.371348261833191, "learning_rate": 9.512987012987014e-06, "loss": 41.0122, "step": 8859 }, { "epoch": 210.955223880597, "grad_norm": 1.3705040216445923, "learning_rate": 9.511904761904763e-06, "loss": 40.8792, "step": 8860 }, { "epoch": 210.97910447761194, "grad_norm": 1.3630342483520508, "learning_rate": 9.510822510822512e-06, "loss": 40.0277, "step": 8861 }, { "epoch": 211.0, "grad_norm": 1.3623372316360474, "learning_rate": 9.50974025974026e-06, "loss": 34.8121, "step": 8862 }, { "epoch": 211.02388059701494, "grad_norm": 1.3277981281280518, "learning_rate": 9.50865800865801e-06, "loss": 39.9114, "step": 8863 }, { "epoch": 211.04776119402985, "grad_norm": 1.3176239728927612, "learning_rate": 9.507575757575758e-06, "loss": 39.7839, "step": 8864 }, { "epoch": 211.07164179104478, "grad_norm": 1.340466856956482, "learning_rate": 9.506493506493507e-06, "loss": 40.7061, "step": 8865 }, { "epoch": 211.0955223880597, "grad_norm": 1.3701274394989014, "learning_rate": 9.505411255411256e-06, "loss": 39.8281, "step": 8866 }, { "epoch": 211.11940298507463, "grad_norm": 1.343354344367981, "learning_rate": 9.504329004329005e-06, "loss": 41.0104, "step": 8867 }, { "epoch": 211.14328358208957, "grad_norm": 1.3472039699554443, "learning_rate": 9.503246753246754e-06, "loss": 38.9, "step": 8868 }, { "epoch": 211.16716417910447, "grad_norm": 1.389875054359436, "learning_rate": 9.502164502164503e-06, "loss": 38.9415, "step": 8869 }, { "epoch": 211.1910447761194, "grad_norm": 1.3131377696990967, "learning_rate": 9.501082251082252e-06, "loss": 40.2387, "step": 8870 }, { "epoch": 211.21492537313432, "grad_norm": 1.386014461517334, "learning_rate": 9.5e-06, "loss": 39.9665, "step": 8871 }, { "epoch": 211.23880597014926, "grad_norm": 1.3277069330215454, "learning_rate": 9.49891774891775e-06, "loss": 40.865, "step": 8872 }, { "epoch": 211.26268656716417, "grad_norm": 1.354255199432373, "learning_rate": 9.497835497835498e-06, "loss": 38.0496, "step": 8873 }, { "epoch": 211.2865671641791, "grad_norm": 1.3520774841308594, "learning_rate": 9.496753246753247e-06, "loss": 40.5511, "step": 8874 }, { "epoch": 211.31044776119404, "grad_norm": 1.3289692401885986, "learning_rate": 9.495670995670996e-06, "loss": 39.1448, "step": 8875 }, { "epoch": 211.33432835820895, "grad_norm": 1.3787994384765625, "learning_rate": 9.494588744588745e-06, "loss": 39.8369, "step": 8876 }, { "epoch": 211.3582089552239, "grad_norm": 1.3559629917144775, "learning_rate": 9.493506493506494e-06, "loss": 39.0378, "step": 8877 }, { "epoch": 211.3820895522388, "grad_norm": 1.3328937292099, "learning_rate": 9.492424242424243e-06, "loss": 38.8956, "step": 8878 }, { "epoch": 211.40597014925373, "grad_norm": 1.3709808588027954, "learning_rate": 9.491341991341992e-06, "loss": 39.9465, "step": 8879 }, { "epoch": 211.42985074626867, "grad_norm": 1.34311044216156, "learning_rate": 9.49025974025974e-06, "loss": 39.2277, "step": 8880 }, { "epoch": 211.45373134328358, "grad_norm": 1.344923496246338, "learning_rate": 9.48917748917749e-06, "loss": 39.3847, "step": 8881 }, { "epoch": 211.47761194029852, "grad_norm": 1.3787468671798706, "learning_rate": 9.488095238095238e-06, "loss": 38.9383, "step": 8882 }, { "epoch": 211.50149253731342, "grad_norm": 1.327025055885315, "learning_rate": 9.487012987012987e-06, "loss": 40.8387, "step": 8883 }, { "epoch": 211.52537313432836, "grad_norm": 1.3563165664672852, "learning_rate": 9.485930735930736e-06, "loss": 40.1521, "step": 8884 }, { "epoch": 211.54925373134327, "grad_norm": 1.3156354427337646, "learning_rate": 9.484848484848485e-06, "loss": 38.5329, "step": 8885 }, { "epoch": 211.5731343283582, "grad_norm": 1.3025293350219727, "learning_rate": 9.483766233766236e-06, "loss": 40.4447, "step": 8886 }, { "epoch": 211.59701492537314, "grad_norm": 1.3447726964950562, "learning_rate": 9.482683982683983e-06, "loss": 40.1646, "step": 8887 }, { "epoch": 211.62089552238805, "grad_norm": 1.332624077796936, "learning_rate": 9.481601731601733e-06, "loss": 38.8376, "step": 8888 }, { "epoch": 211.644776119403, "grad_norm": 1.3637744188308716, "learning_rate": 9.48051948051948e-06, "loss": 41.4119, "step": 8889 }, { "epoch": 211.6686567164179, "grad_norm": 1.3037474155426025, "learning_rate": 9.479437229437231e-06, "loss": 39.9562, "step": 8890 }, { "epoch": 211.69253731343284, "grad_norm": 1.450317621231079, "learning_rate": 9.478354978354978e-06, "loss": 41.1502, "step": 8891 }, { "epoch": 211.71641791044777, "grad_norm": 1.3693903684616089, "learning_rate": 9.477272727272729e-06, "loss": 40.464, "step": 8892 }, { "epoch": 211.74029850746268, "grad_norm": 1.3114017248153687, "learning_rate": 9.476190476190476e-06, "loss": 40.5619, "step": 8893 }, { "epoch": 211.76417910447762, "grad_norm": 1.334392786026001, "learning_rate": 9.475108225108225e-06, "loss": 39.8774, "step": 8894 }, { "epoch": 211.78805970149253, "grad_norm": 1.3252685070037842, "learning_rate": 9.474025974025974e-06, "loss": 40.1257, "step": 8895 }, { "epoch": 211.81194029850747, "grad_norm": 1.3736448287963867, "learning_rate": 9.472943722943723e-06, "loss": 39.3607, "step": 8896 }, { "epoch": 211.83582089552237, "grad_norm": 1.3827797174453735, "learning_rate": 9.471861471861472e-06, "loss": 39.5292, "step": 8897 }, { "epoch": 211.8597014925373, "grad_norm": 1.366312861442566, "learning_rate": 9.47077922077922e-06, "loss": 39.9146, "step": 8898 }, { "epoch": 211.88358208955225, "grad_norm": NaN, "learning_rate": 9.469696969696971e-06, "loss": 63.362, "step": 8899 }, { "epoch": 211.90746268656716, "grad_norm": 1.3701595067977905, "learning_rate": 9.469696969696971e-06, "loss": 38.6732, "step": 8900 }, { "epoch": 211.9313432835821, "grad_norm": 1.3422905206680298, "learning_rate": 9.468614718614719e-06, "loss": 39.7972, "step": 8901 }, { "epoch": 211.955223880597, "grad_norm": 1.3426543474197388, "learning_rate": 9.46753246753247e-06, "loss": 39.2953, "step": 8902 }, { "epoch": 211.97910447761194, "grad_norm": 1.3068506717681885, "learning_rate": 9.466450216450216e-06, "loss": 40.3414, "step": 8903 }, { "epoch": 212.0, "grad_norm": 1.3434913158416748, "learning_rate": 9.465367965367967e-06, "loss": 33.4515, "step": 8904 }, { "epoch": 212.02388059701494, "grad_norm": 1.3355939388275146, "learning_rate": 9.464285714285714e-06, "loss": 39.0124, "step": 8905 }, { "epoch": 212.04776119402985, "grad_norm": 1.3453831672668457, "learning_rate": 9.463203463203465e-06, "loss": 38.8503, "step": 8906 }, { "epoch": 212.07164179104478, "grad_norm": 1.3010910749435425, "learning_rate": 9.462121212121212e-06, "loss": 39.5674, "step": 8907 }, { "epoch": 212.0955223880597, "grad_norm": 1.3236496448516846, "learning_rate": 9.461038961038963e-06, "loss": 38.6606, "step": 8908 }, { "epoch": 212.11940298507463, "grad_norm": 1.3276854753494263, "learning_rate": 9.45995670995671e-06, "loss": 39.1128, "step": 8909 }, { "epoch": 212.14328358208957, "grad_norm": 1.3068641424179077, "learning_rate": 9.45887445887446e-06, "loss": 40.4073, "step": 8910 }, { "epoch": 212.16716417910447, "grad_norm": 1.353392481803894, "learning_rate": 9.45779220779221e-06, "loss": 39.2665, "step": 8911 }, { "epoch": 212.1910447761194, "grad_norm": 1.3634825944900513, "learning_rate": 9.456709956709958e-06, "loss": 37.9814, "step": 8912 }, { "epoch": 212.21492537313432, "grad_norm": 1.3438831567764282, "learning_rate": 9.455627705627707e-06, "loss": 39.055, "step": 8913 }, { "epoch": 212.23880597014926, "grad_norm": 1.3813879489898682, "learning_rate": 9.454545454545456e-06, "loss": 40.424, "step": 8914 }, { "epoch": 212.26268656716417, "grad_norm": 1.334423303604126, "learning_rate": 9.453463203463205e-06, "loss": 39.0668, "step": 8915 }, { "epoch": 212.2865671641791, "grad_norm": 1.3723580837249756, "learning_rate": 9.452380952380952e-06, "loss": 39.0465, "step": 8916 }, { "epoch": 212.31044776119404, "grad_norm": 1.3792967796325684, "learning_rate": 9.451298701298703e-06, "loss": 39.9163, "step": 8917 }, { "epoch": 212.33432835820895, "grad_norm": 1.3295979499816895, "learning_rate": 9.45021645021645e-06, "loss": 40.1618, "step": 8918 }, { "epoch": 212.3582089552239, "grad_norm": 1.3429745435714722, "learning_rate": 9.4491341991342e-06, "loss": 40.606, "step": 8919 }, { "epoch": 212.3820895522388, "grad_norm": 1.3466758728027344, "learning_rate": 9.448051948051948e-06, "loss": 41.3839, "step": 8920 }, { "epoch": 212.40597014925373, "grad_norm": 1.3131448030471802, "learning_rate": 9.446969696969698e-06, "loss": 38.7256, "step": 8921 }, { "epoch": 212.42985074626867, "grad_norm": 1.3689318895339966, "learning_rate": 9.445887445887445e-06, "loss": 39.0087, "step": 8922 }, { "epoch": 212.45373134328358, "grad_norm": NaN, "learning_rate": 9.444805194805196e-06, "loss": 45.4252, "step": 8923 }, { "epoch": 212.47761194029852, "grad_norm": 1.3685146570205688, "learning_rate": 9.444805194805196e-06, "loss": 40.1791, "step": 8924 }, { "epoch": 212.50149253731342, "grad_norm": 1.4143842458724976, "learning_rate": 9.443722943722945e-06, "loss": 39.4412, "step": 8925 }, { "epoch": 212.52537313432836, "grad_norm": 1.282865047454834, "learning_rate": 9.442640692640694e-06, "loss": 39.7054, "step": 8926 }, { "epoch": 212.54925373134327, "grad_norm": 1.3511202335357666, "learning_rate": 9.441558441558443e-06, "loss": 40.4047, "step": 8927 }, { "epoch": 212.5731343283582, "grad_norm": 1.3146388530731201, "learning_rate": 9.440476190476192e-06, "loss": 39.8258, "step": 8928 }, { "epoch": 212.59701492537314, "grad_norm": 1.3665481805801392, "learning_rate": 9.43939393939394e-06, "loss": 39.0631, "step": 8929 }, { "epoch": 212.62089552238805, "grad_norm": 1.329295039176941, "learning_rate": 9.43831168831169e-06, "loss": 38.8771, "step": 8930 }, { "epoch": 212.644776119403, "grad_norm": 1.3332879543304443, "learning_rate": 9.437229437229438e-06, "loss": 41.3729, "step": 8931 }, { "epoch": 212.6686567164179, "grad_norm": 1.3403010368347168, "learning_rate": 9.436147186147187e-06, "loss": 40.6079, "step": 8932 }, { "epoch": 212.69253731343284, "grad_norm": 1.3525370359420776, "learning_rate": 9.435064935064936e-06, "loss": 39.8912, "step": 8933 }, { "epoch": 212.71641791044777, "grad_norm": 1.377712368965149, "learning_rate": 9.433982683982685e-06, "loss": 39.9188, "step": 8934 }, { "epoch": 212.74029850746268, "grad_norm": 1.360937476158142, "learning_rate": 9.432900432900434e-06, "loss": 40.7723, "step": 8935 }, { "epoch": 212.76417910447762, "grad_norm": 1.3387051820755005, "learning_rate": 9.431818181818183e-06, "loss": 40.34, "step": 8936 }, { "epoch": 212.78805970149253, "grad_norm": 1.408441185951233, "learning_rate": 9.430735930735932e-06, "loss": 38.885, "step": 8937 }, { "epoch": 212.81194029850747, "grad_norm": 1.3505593538284302, "learning_rate": 9.42965367965368e-06, "loss": 40.2979, "step": 8938 }, { "epoch": 212.83582089552237, "grad_norm": 1.3580641746520996, "learning_rate": 9.42857142857143e-06, "loss": 39.8609, "step": 8939 }, { "epoch": 212.8597014925373, "grad_norm": 1.3514484167099, "learning_rate": 9.427489177489178e-06, "loss": 40.2001, "step": 8940 }, { "epoch": 212.88358208955225, "grad_norm": 1.3772848844528198, "learning_rate": 9.426406926406927e-06, "loss": 40.0857, "step": 8941 }, { "epoch": 212.90746268656716, "grad_norm": 1.3272401094436646, "learning_rate": 9.425324675324676e-06, "loss": 40.5724, "step": 8942 }, { "epoch": 212.9313432835821, "grad_norm": 1.4338486194610596, "learning_rate": 9.424242424242425e-06, "loss": 39.7216, "step": 8943 }, { "epoch": 212.955223880597, "grad_norm": 1.412129521369934, "learning_rate": 9.423160173160174e-06, "loss": 40.3045, "step": 8944 }, { "epoch": 212.97910447761194, "grad_norm": 1.3190884590148926, "learning_rate": 9.422077922077923e-06, "loss": 40.5574, "step": 8945 }, { "epoch": 213.0, "grad_norm": 1.3599454164505005, "learning_rate": 9.420995670995672e-06, "loss": 34.3712, "step": 8946 }, { "epoch": 213.02388059701494, "grad_norm": 1.3559000492095947, "learning_rate": 9.41991341991342e-06, "loss": 40.4262, "step": 8947 }, { "epoch": 213.04776119402985, "grad_norm": 1.3577172756195068, "learning_rate": 9.41883116883117e-06, "loss": 39.6471, "step": 8948 }, { "epoch": 213.07164179104478, "grad_norm": 1.3207542896270752, "learning_rate": 9.417748917748919e-06, "loss": 39.7613, "step": 8949 }, { "epoch": 213.0955223880597, "grad_norm": 1.334489107131958, "learning_rate": 9.416666666666667e-06, "loss": 39.7505, "step": 8950 }, { "epoch": 213.11940298507463, "grad_norm": 1.3397247791290283, "learning_rate": 9.415584415584416e-06, "loss": 39.2873, "step": 8951 }, { "epoch": 213.14328358208957, "grad_norm": 1.3671300411224365, "learning_rate": 9.414502164502165e-06, "loss": 39.6956, "step": 8952 }, { "epoch": 213.16716417910447, "grad_norm": 1.3556796312332153, "learning_rate": 9.413419913419914e-06, "loss": 39.1597, "step": 8953 }, { "epoch": 213.1910447761194, "grad_norm": 1.3313400745391846, "learning_rate": 9.412337662337663e-06, "loss": 39.6383, "step": 8954 }, { "epoch": 213.21492537313432, "grad_norm": 1.3293853998184204, "learning_rate": 9.411255411255412e-06, "loss": 41.2624, "step": 8955 }, { "epoch": 213.23880597014926, "grad_norm": 1.3340810537338257, "learning_rate": 9.41017316017316e-06, "loss": 38.0156, "step": 8956 }, { "epoch": 213.26268656716417, "grad_norm": 1.300318956375122, "learning_rate": 9.40909090909091e-06, "loss": 39.5233, "step": 8957 }, { "epoch": 213.2865671641791, "grad_norm": 1.3536571264266968, "learning_rate": 9.408008658008659e-06, "loss": 37.9463, "step": 8958 }, { "epoch": 213.31044776119404, "grad_norm": 1.3359050750732422, "learning_rate": 9.406926406926408e-06, "loss": 40.3051, "step": 8959 }, { "epoch": 213.33432835820895, "grad_norm": 1.3663358688354492, "learning_rate": 9.405844155844156e-06, "loss": 39.1775, "step": 8960 }, { "epoch": 213.3582089552239, "grad_norm": 1.3484711647033691, "learning_rate": 9.404761904761905e-06, "loss": 40.0564, "step": 8961 }, { "epoch": 213.3820895522388, "grad_norm": 1.3027324676513672, "learning_rate": 9.403679653679654e-06, "loss": 40.568, "step": 8962 }, { "epoch": 213.40597014925373, "grad_norm": 1.318832516670227, "learning_rate": 9.402597402597403e-06, "loss": 39.9125, "step": 8963 }, { "epoch": 213.42985074626867, "grad_norm": 1.3286821842193604, "learning_rate": 9.401515151515152e-06, "loss": 40.126, "step": 8964 }, { "epoch": 213.45373134328358, "grad_norm": 1.3409137725830078, "learning_rate": 9.400432900432901e-06, "loss": 38.9455, "step": 8965 }, { "epoch": 213.47761194029852, "grad_norm": 1.3027654886245728, "learning_rate": 9.39935064935065e-06, "loss": 40.565, "step": 8966 }, { "epoch": 213.50149253731342, "grad_norm": 1.3529239892959595, "learning_rate": 9.398268398268399e-06, "loss": 39.7341, "step": 8967 }, { "epoch": 213.52537313432836, "grad_norm": 1.3772238492965698, "learning_rate": 9.397186147186148e-06, "loss": 39.9695, "step": 8968 }, { "epoch": 213.54925373134327, "grad_norm": 1.3500471115112305, "learning_rate": 9.396103896103896e-06, "loss": 39.8333, "step": 8969 }, { "epoch": 213.5731343283582, "grad_norm": 1.3577601909637451, "learning_rate": 9.395021645021645e-06, "loss": 40.4502, "step": 8970 }, { "epoch": 213.59701492537314, "grad_norm": 1.339919924736023, "learning_rate": 9.393939393939396e-06, "loss": 41.4176, "step": 8971 }, { "epoch": 213.62089552238805, "grad_norm": 1.3703076839447021, "learning_rate": 9.392857142857143e-06, "loss": 40.3812, "step": 8972 }, { "epoch": 213.644776119403, "grad_norm": 1.37937593460083, "learning_rate": 9.391774891774894e-06, "loss": 38.5018, "step": 8973 }, { "epoch": 213.6686567164179, "grad_norm": 1.3360378742218018, "learning_rate": 9.390692640692641e-06, "loss": 40.3603, "step": 8974 }, { "epoch": 213.69253731343284, "grad_norm": 1.3575552701950073, "learning_rate": 9.38961038961039e-06, "loss": 39.7386, "step": 8975 }, { "epoch": 213.71641791044777, "grad_norm": 1.8888208866119385, "learning_rate": 9.388528138528139e-06, "loss": 40.6066, "step": 8976 }, { "epoch": 213.74029850746268, "grad_norm": 1.3716565370559692, "learning_rate": 9.387445887445888e-06, "loss": 39.6671, "step": 8977 }, { "epoch": 213.76417910447762, "grad_norm": 1.3315191268920898, "learning_rate": 9.386363636363637e-06, "loss": 39.7819, "step": 8978 }, { "epoch": 213.78805970149253, "grad_norm": 1.3595314025878906, "learning_rate": 9.385281385281385e-06, "loss": 39.4238, "step": 8979 }, { "epoch": 213.81194029850747, "grad_norm": NaN, "learning_rate": 9.384199134199134e-06, "loss": 58.6407, "step": 8980 }, { "epoch": 213.83582089552237, "grad_norm": 1.3674556016921997, "learning_rate": 9.384199134199134e-06, "loss": 39.2648, "step": 8981 }, { "epoch": 213.8597014925373, "grad_norm": 1.3363852500915527, "learning_rate": 9.383116883116883e-06, "loss": 40.1371, "step": 8982 }, { "epoch": 213.88358208955225, "grad_norm": 1.3804627656936646, "learning_rate": 9.382034632034632e-06, "loss": 39.106, "step": 8983 }, { "epoch": 213.90746268656716, "grad_norm": 1.3442333936691284, "learning_rate": 9.380952380952381e-06, "loss": 39.6008, "step": 8984 }, { "epoch": 213.9313432835821, "grad_norm": NaN, "learning_rate": 9.379870129870132e-06, "loss": 44.658, "step": 8985 }, { "epoch": 213.955223880597, "grad_norm": 1.3218203783035278, "learning_rate": 9.379870129870132e-06, "loss": 39.019, "step": 8986 }, { "epoch": 213.97910447761194, "grad_norm": 1.302197813987732, "learning_rate": 9.378787878787879e-06, "loss": 40.7242, "step": 8987 }, { "epoch": 214.0, "grad_norm": 1.3489106893539429, "learning_rate": 9.37770562770563e-06, "loss": 34.9582, "step": 8988 }, { "epoch": 214.02388059701494, "grad_norm": 1.4915056228637695, "learning_rate": 9.376623376623377e-06, "loss": 39.4501, "step": 8989 }, { "epoch": 214.04776119402985, "grad_norm": 1.3133703470230103, "learning_rate": 9.375541125541127e-06, "loss": 38.315, "step": 8990 }, { "epoch": 214.07164179104478, "grad_norm": 1.3273446559906006, "learning_rate": 9.374458874458874e-06, "loss": 38.7395, "step": 8991 }, { "epoch": 214.0955223880597, "grad_norm": 1.329453706741333, "learning_rate": 9.373376623376625e-06, "loss": 39.7661, "step": 8992 }, { "epoch": 214.11940298507463, "grad_norm": 1.3508855104446411, "learning_rate": 9.372294372294372e-06, "loss": 40.2091, "step": 8993 }, { "epoch": 214.14328358208957, "grad_norm": 1.3705601692199707, "learning_rate": 9.371212121212123e-06, "loss": 39.9828, "step": 8994 }, { "epoch": 214.16716417910447, "grad_norm": 1.3605984449386597, "learning_rate": 9.37012987012987e-06, "loss": 40.7268, "step": 8995 }, { "epoch": 214.1910447761194, "grad_norm": 1.3213472366333008, "learning_rate": 9.36904761904762e-06, "loss": 39.4732, "step": 8996 }, { "epoch": 214.21492537313432, "grad_norm": 1.3174699544906616, "learning_rate": 9.36796536796537e-06, "loss": 40.7893, "step": 8997 }, { "epoch": 214.23880597014926, "grad_norm": 1.3328733444213867, "learning_rate": 9.366883116883117e-06, "loss": 39.2769, "step": 8998 }, { "epoch": 214.26268656716417, "grad_norm": 1.327453851699829, "learning_rate": 9.365800865800867e-06, "loss": 37.9806, "step": 8999 }, { "epoch": 214.2865671641791, "grad_norm": 1.337369680404663, "learning_rate": 9.364718614718615e-06, "loss": 40.0108, "step": 9000 }, { "epoch": 214.31044776119404, "grad_norm": 1.322012186050415, "learning_rate": 9.363636363636365e-06, "loss": 38.4517, "step": 9001 }, { "epoch": 214.33432835820895, "grad_norm": 1.3652749061584473, "learning_rate": 9.362554112554112e-06, "loss": 38.8118, "step": 9002 }, { "epoch": 214.3582089552239, "grad_norm": 1.4127944707870483, "learning_rate": 9.361471861471863e-06, "loss": 40.3012, "step": 9003 }, { "epoch": 214.3820895522388, "grad_norm": 1.338257908821106, "learning_rate": 9.36038961038961e-06, "loss": 40.0174, "step": 9004 }, { "epoch": 214.40597014925373, "grad_norm": 1.3670746088027954, "learning_rate": 9.35930735930736e-06, "loss": 39.2451, "step": 9005 }, { "epoch": 214.42985074626867, "grad_norm": 1.298861026763916, "learning_rate": 9.358225108225108e-06, "loss": 39.9266, "step": 9006 }, { "epoch": 214.45373134328358, "grad_norm": 1.3324276208877563, "learning_rate": 9.357142857142859e-06, "loss": 38.5087, "step": 9007 }, { "epoch": 214.47761194029852, "grad_norm": 1.343599796295166, "learning_rate": 9.356060606060606e-06, "loss": 40.8025, "step": 9008 }, { "epoch": 214.50149253731342, "grad_norm": 1.3826006650924683, "learning_rate": 9.354978354978356e-06, "loss": 40.533, "step": 9009 }, { "epoch": 214.52537313432836, "grad_norm": 1.3311599493026733, "learning_rate": 9.353896103896105e-06, "loss": 40.6142, "step": 9010 }, { "epoch": 214.54925373134327, "grad_norm": 1.327028512954712, "learning_rate": 9.352813852813854e-06, "loss": 39.9879, "step": 9011 }, { "epoch": 214.5731343283582, "grad_norm": 1.3517987728118896, "learning_rate": 9.351731601731603e-06, "loss": 40.1727, "step": 9012 }, { "epoch": 214.59701492537314, "grad_norm": 1.3386281728744507, "learning_rate": 9.350649350649352e-06, "loss": 39.5321, "step": 9013 }, { "epoch": 214.62089552238805, "grad_norm": 1.4054770469665527, "learning_rate": 9.3495670995671e-06, "loss": 39.9914, "step": 9014 }, { "epoch": 214.644776119403, "grad_norm": 1.34827721118927, "learning_rate": 9.34848484848485e-06, "loss": 40.3139, "step": 9015 }, { "epoch": 214.6686567164179, "grad_norm": 1.3876510858535767, "learning_rate": 9.347402597402599e-06, "loss": 39.4312, "step": 9016 }, { "epoch": 214.69253731343284, "grad_norm": 1.3400264978408813, "learning_rate": 9.346320346320346e-06, "loss": 40.8177, "step": 9017 }, { "epoch": 214.71641791044777, "grad_norm": 1.3003102540969849, "learning_rate": 9.345238095238096e-06, "loss": 39.4, "step": 9018 }, { "epoch": 214.74029850746268, "grad_norm": 1.3501209020614624, "learning_rate": 9.344155844155844e-06, "loss": 40.0755, "step": 9019 }, { "epoch": 214.76417910447762, "grad_norm": 1.3377944231033325, "learning_rate": 9.343073593073594e-06, "loss": 40.073, "step": 9020 }, { "epoch": 214.78805970149253, "grad_norm": 1.3108179569244385, "learning_rate": 9.341991341991343e-06, "loss": 40.8813, "step": 9021 }, { "epoch": 214.81194029850747, "grad_norm": 1.3006434440612793, "learning_rate": 9.340909090909092e-06, "loss": 37.5852, "step": 9022 }, { "epoch": 214.83582089552237, "grad_norm": 1.3525774478912354, "learning_rate": 9.339826839826841e-06, "loss": 39.5002, "step": 9023 }, { "epoch": 214.8597014925373, "grad_norm": 1.3206485509872437, "learning_rate": 9.33874458874459e-06, "loss": 40.4594, "step": 9024 }, { "epoch": 214.88358208955225, "grad_norm": 1.3458542823791504, "learning_rate": 9.337662337662339e-06, "loss": 39.7528, "step": 9025 }, { "epoch": 214.90746268656716, "grad_norm": 1.30648934841156, "learning_rate": 9.336580086580088e-06, "loss": 40.2317, "step": 9026 }, { "epoch": 214.9313432835821, "grad_norm": 1.3070499897003174, "learning_rate": 9.335497835497837e-06, "loss": 40.0978, "step": 9027 }, { "epoch": 214.955223880597, "grad_norm": 1.3488596677780151, "learning_rate": 9.334415584415585e-06, "loss": 40.3251, "step": 9028 }, { "epoch": 214.97910447761194, "grad_norm": 1.3354851007461548, "learning_rate": 9.333333333333334e-06, "loss": 40.6238, "step": 9029 }, { "epoch": 215.0, "grad_norm": 1.3880212306976318, "learning_rate": 9.332251082251083e-06, "loss": 33.8173, "step": 9030 }, { "epoch": 215.02388059701494, "grad_norm": 1.3306854963302612, "learning_rate": 9.331168831168832e-06, "loss": 40.2506, "step": 9031 }, { "epoch": 215.04776119402985, "grad_norm": 1.410836100578308, "learning_rate": 9.330086580086581e-06, "loss": 40.0879, "step": 9032 }, { "epoch": 215.07164179104478, "grad_norm": 1.3237667083740234, "learning_rate": 9.32900432900433e-06, "loss": 40.7321, "step": 9033 }, { "epoch": 215.0955223880597, "grad_norm": 1.3942846059799194, "learning_rate": 9.327922077922079e-06, "loss": 39.1465, "step": 9034 }, { "epoch": 215.11940298507463, "grad_norm": 1.393647313117981, "learning_rate": 9.326839826839828e-06, "loss": 39.1681, "step": 9035 }, { "epoch": 215.14328358208957, "grad_norm": 1.31729257106781, "learning_rate": 9.325757575757577e-06, "loss": 39.1958, "step": 9036 }, { "epoch": 215.16716417910447, "grad_norm": 1.3377078771591187, "learning_rate": 9.324675324675326e-06, "loss": 39.1978, "step": 9037 }, { "epoch": 215.1910447761194, "grad_norm": 1.339669942855835, "learning_rate": 9.323593073593074e-06, "loss": 39.1063, "step": 9038 }, { "epoch": 215.21492537313432, "grad_norm": 1.3493188619613647, "learning_rate": 9.322510822510823e-06, "loss": 39.8546, "step": 9039 }, { "epoch": 215.23880597014926, "grad_norm": 1.332404375076294, "learning_rate": 9.321428571428572e-06, "loss": 39.8477, "step": 9040 }, { "epoch": 215.26268656716417, "grad_norm": 1.3353917598724365, "learning_rate": 9.320346320346321e-06, "loss": 39.0215, "step": 9041 }, { "epoch": 215.2865671641791, "grad_norm": 1.3325833082199097, "learning_rate": 9.31926406926407e-06, "loss": 40.384, "step": 9042 }, { "epoch": 215.31044776119404, "grad_norm": 1.3510289192199707, "learning_rate": 9.318181818181819e-06, "loss": 40.7971, "step": 9043 }, { "epoch": 215.33432835820895, "grad_norm": 1.3564817905426025, "learning_rate": 9.317099567099568e-06, "loss": 39.9598, "step": 9044 }, { "epoch": 215.3582089552239, "grad_norm": 1.3814705610275269, "learning_rate": 9.316017316017317e-06, "loss": 40.4716, "step": 9045 }, { "epoch": 215.3820895522388, "grad_norm": 1.3852641582489014, "learning_rate": 9.314935064935066e-06, "loss": 39.6939, "step": 9046 }, { "epoch": 215.40597014925373, "grad_norm": 1.338158130645752, "learning_rate": 9.313852813852815e-06, "loss": 41.3239, "step": 9047 }, { "epoch": 215.42985074626867, "grad_norm": 1.3076518774032593, "learning_rate": 9.312770562770563e-06, "loss": 39.4471, "step": 9048 }, { "epoch": 215.45373134328358, "grad_norm": 1.3881725072860718, "learning_rate": 9.311688311688312e-06, "loss": 39.6732, "step": 9049 }, { "epoch": 215.47761194029852, "grad_norm": 1.3573355674743652, "learning_rate": 9.310606060606061e-06, "loss": 40.1852, "step": 9050 }, { "epoch": 215.50149253731342, "grad_norm": 1.3749816417694092, "learning_rate": 9.30952380952381e-06, "loss": 38.5196, "step": 9051 }, { "epoch": 215.52537313432836, "grad_norm": 1.326401710510254, "learning_rate": 9.308441558441559e-06, "loss": 40.6563, "step": 9052 }, { "epoch": 215.54925373134327, "grad_norm": 1.3568955659866333, "learning_rate": 9.307359307359308e-06, "loss": 38.9979, "step": 9053 }, { "epoch": 215.5731343283582, "grad_norm": 1.3469583988189697, "learning_rate": 9.306277056277057e-06, "loss": 38.9274, "step": 9054 }, { "epoch": 215.59701492537314, "grad_norm": 1.322344183921814, "learning_rate": 9.305194805194806e-06, "loss": 40.3017, "step": 9055 }, { "epoch": 215.62089552238805, "grad_norm": 1.384060025215149, "learning_rate": 9.304112554112555e-06, "loss": 39.2384, "step": 9056 }, { "epoch": 215.644776119403, "grad_norm": 1.362900972366333, "learning_rate": 9.303030303030303e-06, "loss": 40.0412, "step": 9057 }, { "epoch": 215.6686567164179, "grad_norm": 1.3315205574035645, "learning_rate": 9.301948051948052e-06, "loss": 40.2008, "step": 9058 }, { "epoch": 215.69253731343284, "grad_norm": 1.3460419178009033, "learning_rate": 9.300865800865801e-06, "loss": 39.9605, "step": 9059 }, { "epoch": 215.71641791044777, "grad_norm": 1.3318532705307007, "learning_rate": 9.29978354978355e-06, "loss": 38.7016, "step": 9060 }, { "epoch": 215.74029850746268, "grad_norm": 1.3352954387664795, "learning_rate": 9.298701298701299e-06, "loss": 38.7922, "step": 9061 }, { "epoch": 215.76417910447762, "grad_norm": 1.334065556526184, "learning_rate": 9.297619047619048e-06, "loss": 40.6281, "step": 9062 }, { "epoch": 215.78805970149253, "grad_norm": 1.451830267906189, "learning_rate": 9.296536796536797e-06, "loss": 39.4192, "step": 9063 }, { "epoch": 215.81194029850747, "grad_norm": 1.3362349271774292, "learning_rate": 9.295454545454546e-06, "loss": 39.2861, "step": 9064 }, { "epoch": 215.83582089552237, "grad_norm": 1.3368558883666992, "learning_rate": 9.294372294372295e-06, "loss": 39.7179, "step": 9065 }, { "epoch": 215.8597014925373, "grad_norm": 1.4828766584396362, "learning_rate": 9.293290043290044e-06, "loss": 38.8801, "step": 9066 }, { "epoch": 215.88358208955225, "grad_norm": 1.3434488773345947, "learning_rate": 9.292207792207792e-06, "loss": 40.8995, "step": 9067 }, { "epoch": 215.90746268656716, "grad_norm": 1.364312767982483, "learning_rate": 9.291125541125541e-06, "loss": 39.3966, "step": 9068 }, { "epoch": 215.9313432835821, "grad_norm": 1.3534233570098877, "learning_rate": 9.290043290043292e-06, "loss": 40.3243, "step": 9069 }, { "epoch": 215.955223880597, "grad_norm": 1.3646320104599, "learning_rate": 9.28896103896104e-06, "loss": 41.0206, "step": 9070 }, { "epoch": 215.97910447761194, "grad_norm": 1.3334481716156006, "learning_rate": 9.28787878787879e-06, "loss": 39.4332, "step": 9071 }, { "epoch": 216.0, "grad_norm": 1.3559198379516602, "learning_rate": 9.286796536796537e-06, "loss": 33.8881, "step": 9072 }, { "epoch": 216.02388059701494, "grad_norm": 1.336771011352539, "learning_rate": 9.285714285714288e-06, "loss": 39.8469, "step": 9073 }, { "epoch": 216.04776119402985, "grad_norm": 1.3626152276992798, "learning_rate": 9.284632034632035e-06, "loss": 40.1127, "step": 9074 }, { "epoch": 216.07164179104478, "grad_norm": 1.391513705253601, "learning_rate": 9.283549783549785e-06, "loss": 40.5782, "step": 9075 }, { "epoch": 216.0955223880597, "grad_norm": NaN, "learning_rate": 9.282467532467533e-06, "loss": 53.6119, "step": 9076 }, { "epoch": 216.11940298507463, "grad_norm": 1.3934030532836914, "learning_rate": 9.282467532467533e-06, "loss": 39.4224, "step": 9077 }, { "epoch": 216.14328358208957, "grad_norm": 1.3931070566177368, "learning_rate": 9.281385281385281e-06, "loss": 39.8738, "step": 9078 }, { "epoch": 216.16716417910447, "grad_norm": 1.3415019512176514, "learning_rate": 9.28030303030303e-06, "loss": 39.4451, "step": 9079 }, { "epoch": 216.1910447761194, "grad_norm": 1.3595831394195557, "learning_rate": 9.27922077922078e-06, "loss": 40.0157, "step": 9080 }, { "epoch": 216.21492537313432, "grad_norm": 1.336120367050171, "learning_rate": 9.27813852813853e-06, "loss": 40.0802, "step": 9081 }, { "epoch": 216.23880597014926, "grad_norm": 1.3023812770843506, "learning_rate": 9.277056277056277e-06, "loss": 39.6208, "step": 9082 }, { "epoch": 216.26268656716417, "grad_norm": 1.3892810344696045, "learning_rate": 9.275974025974028e-06, "loss": 40.0269, "step": 9083 }, { "epoch": 216.2865671641791, "grad_norm": 1.3825849294662476, "learning_rate": 9.274891774891775e-06, "loss": 40.3472, "step": 9084 }, { "epoch": 216.31044776119404, "grad_norm": 1.406028389930725, "learning_rate": 9.273809523809525e-06, "loss": 38.5797, "step": 9085 }, { "epoch": 216.33432835820895, "grad_norm": 1.4058095216751099, "learning_rate": 9.272727272727273e-06, "loss": 39.7803, "step": 9086 }, { "epoch": 216.3582089552239, "grad_norm": 1.3649135828018188, "learning_rate": 9.271645021645023e-06, "loss": 39.4718, "step": 9087 }, { "epoch": 216.3820895522388, "grad_norm": 1.3512439727783203, "learning_rate": 9.27056277056277e-06, "loss": 39.4745, "step": 9088 }, { "epoch": 216.40597014925373, "grad_norm": 1.353190541267395, "learning_rate": 9.269480519480521e-06, "loss": 39.3095, "step": 9089 }, { "epoch": 216.42985074626867, "grad_norm": 1.4016547203063965, "learning_rate": 9.268398268398268e-06, "loss": 40.3309, "step": 9090 }, { "epoch": 216.45373134328358, "grad_norm": 1.3306682109832764, "learning_rate": 9.267316017316019e-06, "loss": 39.9745, "step": 9091 }, { "epoch": 216.47761194029852, "grad_norm": 1.352518916130066, "learning_rate": 9.266233766233766e-06, "loss": 37.9271, "step": 9092 }, { "epoch": 216.50149253731342, "grad_norm": 1.4220489263534546, "learning_rate": 9.265151515151517e-06, "loss": 39.7268, "step": 9093 }, { "epoch": 216.52537313432836, "grad_norm": 1.4446529150009155, "learning_rate": 9.264069264069266e-06, "loss": 39.3724, "step": 9094 }, { "epoch": 216.54925373134327, "grad_norm": 1.4269300699234009, "learning_rate": 9.262987012987014e-06, "loss": 39.63, "step": 9095 }, { "epoch": 216.5731343283582, "grad_norm": 1.3725675344467163, "learning_rate": 9.261904761904763e-06, "loss": 39.482, "step": 9096 }, { "epoch": 216.59701492537314, "grad_norm": 1.366041660308838, "learning_rate": 9.26082251082251e-06, "loss": 40.6056, "step": 9097 }, { "epoch": 216.62089552238805, "grad_norm": 1.3656901121139526, "learning_rate": 9.259740259740261e-06, "loss": 39.6143, "step": 9098 }, { "epoch": 216.644776119403, "grad_norm": 3.4700894355773926, "learning_rate": 9.258658008658008e-06, "loss": 41.5866, "step": 9099 }, { "epoch": 216.6686567164179, "grad_norm": 1.403554916381836, "learning_rate": 9.257575757575759e-06, "loss": 39.8252, "step": 9100 }, { "epoch": 216.69253731343284, "grad_norm": 1.3879982233047485, "learning_rate": 9.256493506493506e-06, "loss": 40.3014, "step": 9101 }, { "epoch": 216.71641791044777, "grad_norm": 1.3849835395812988, "learning_rate": 9.255411255411257e-06, "loss": 39.4989, "step": 9102 }, { "epoch": 216.74029850746268, "grad_norm": 1.4224824905395508, "learning_rate": 9.254329004329004e-06, "loss": 40.3796, "step": 9103 }, { "epoch": 216.76417910447762, "grad_norm": 1.4047346115112305, "learning_rate": 9.253246753246755e-06, "loss": 39.3522, "step": 9104 }, { "epoch": 216.78805970149253, "grad_norm": 1.4233030080795288, "learning_rate": 9.252164502164503e-06, "loss": 40.6143, "step": 9105 }, { "epoch": 216.81194029850747, "grad_norm": 1.4060536623001099, "learning_rate": 9.251082251082252e-06, "loss": 38.9633, "step": 9106 }, { "epoch": 216.83582089552237, "grad_norm": 1.547845721244812, "learning_rate": 9.250000000000001e-06, "loss": 39.4946, "step": 9107 }, { "epoch": 216.8597014925373, "grad_norm": 1.3757669925689697, "learning_rate": 9.24891774891775e-06, "loss": 38.9133, "step": 9108 }, { "epoch": 216.88358208955225, "grad_norm": 1.3963185548782349, "learning_rate": 9.247835497835499e-06, "loss": 40.5793, "step": 9109 }, { "epoch": 216.90746268656716, "grad_norm": 1.4279513359069824, "learning_rate": 9.246753246753248e-06, "loss": 40.159, "step": 9110 }, { "epoch": 216.9313432835821, "grad_norm": 1.3721662759780884, "learning_rate": 9.245670995670997e-06, "loss": 39.876, "step": 9111 }, { "epoch": 216.955223880597, "grad_norm": 1.3789608478546143, "learning_rate": 9.244588744588746e-06, "loss": 38.8399, "step": 9112 }, { "epoch": 216.97910447761194, "grad_norm": 1.3438913822174072, "learning_rate": 9.243506493506495e-06, "loss": 39.0955, "step": 9113 }, { "epoch": 217.0, "grad_norm": 1.3944742679595947, "learning_rate": 9.242424242424244e-06, "loss": 35.9464, "step": 9114 }, { "epoch": 217.02388059701494, "grad_norm": 1.329647183418274, "learning_rate": 9.241341991341992e-06, "loss": 39.4925, "step": 9115 }, { "epoch": 217.04776119402985, "grad_norm": 1.3586174249649048, "learning_rate": 9.240259740259741e-06, "loss": 39.9323, "step": 9116 }, { "epoch": 217.07164179104478, "grad_norm": 1.4471688270568848, "learning_rate": 9.23917748917749e-06, "loss": 38.7313, "step": 9117 }, { "epoch": 217.0955223880597, "grad_norm": 1.4640713930130005, "learning_rate": 9.238095238095239e-06, "loss": 39.5009, "step": 9118 }, { "epoch": 217.11940298507463, "grad_norm": 1.3718169927597046, "learning_rate": 9.237012987012988e-06, "loss": 40.5599, "step": 9119 }, { "epoch": 217.14328358208957, "grad_norm": 1.5333696603775024, "learning_rate": 9.235930735930737e-06, "loss": 39.5873, "step": 9120 }, { "epoch": 217.16716417910447, "grad_norm": 1.3867549896240234, "learning_rate": 9.234848484848486e-06, "loss": 38.9422, "step": 9121 }, { "epoch": 217.1910447761194, "grad_norm": 1.366132378578186, "learning_rate": 9.233766233766235e-06, "loss": 39.7261, "step": 9122 }, { "epoch": 217.21492537313432, "grad_norm": 1.3905044794082642, "learning_rate": 9.232683982683984e-06, "loss": 40.404, "step": 9123 }, { "epoch": 217.23880597014926, "grad_norm": NaN, "learning_rate": 9.231601731601733e-06, "loss": 64.3719, "step": 9124 }, { "epoch": 217.26268656716417, "grad_norm": 1.5411043167114258, "learning_rate": 9.231601731601733e-06, "loss": 39.1581, "step": 9125 }, { "epoch": 217.2865671641791, "grad_norm": 1.622114658355713, "learning_rate": 9.230519480519481e-06, "loss": 39.7668, "step": 9126 }, { "epoch": 217.31044776119404, "grad_norm": 1.581152081489563, "learning_rate": 9.22943722943723e-06, "loss": 39.1766, "step": 9127 }, { "epoch": 217.33432835820895, "grad_norm": 1.409558892250061, "learning_rate": 9.22835497835498e-06, "loss": 38.6151, "step": 9128 }, { "epoch": 217.3582089552239, "grad_norm": 1.444053053855896, "learning_rate": 9.227272727272728e-06, "loss": 40.7099, "step": 9129 }, { "epoch": 217.3820895522388, "grad_norm": 1.426204800605774, "learning_rate": 9.226190476190477e-06, "loss": 39.7897, "step": 9130 }, { "epoch": 217.40597014925373, "grad_norm": 1.483691692352295, "learning_rate": 9.225108225108226e-06, "loss": 40.2674, "step": 9131 }, { "epoch": 217.42985074626867, "grad_norm": 1.6373711824417114, "learning_rate": 9.224025974025975e-06, "loss": 40.3815, "step": 9132 }, { "epoch": 217.45373134328358, "grad_norm": 1.3852986097335815, "learning_rate": 9.222943722943724e-06, "loss": 38.9465, "step": 9133 }, { "epoch": 217.47761194029852, "grad_norm": 1.4581992626190186, "learning_rate": 9.221861471861473e-06, "loss": 41.1057, "step": 9134 }, { "epoch": 217.50149253731342, "grad_norm": 1.4802358150482178, "learning_rate": 9.220779220779221e-06, "loss": 40.3395, "step": 9135 }, { "epoch": 217.52537313432836, "grad_norm": 1.4292584657669067, "learning_rate": 9.21969696969697e-06, "loss": 40.0737, "step": 9136 }, { "epoch": 217.54925373134327, "grad_norm": 1.46249258518219, "learning_rate": 9.21861471861472e-06, "loss": 39.956, "step": 9137 }, { "epoch": 217.5731343283582, "grad_norm": 1.3715879917144775, "learning_rate": 9.217532467532468e-06, "loss": 39.9268, "step": 9138 }, { "epoch": 217.59701492537314, "grad_norm": 1.5566140413284302, "learning_rate": 9.216450216450217e-06, "loss": 40.5994, "step": 9139 }, { "epoch": 217.62089552238805, "grad_norm": 1.5534056425094604, "learning_rate": 9.215367965367966e-06, "loss": 39.2361, "step": 9140 }, { "epoch": 217.644776119403, "grad_norm": 1.4296181201934814, "learning_rate": 9.214285714285715e-06, "loss": 39.1562, "step": 9141 }, { "epoch": 217.6686567164179, "grad_norm": 1.4214874505996704, "learning_rate": 9.213203463203464e-06, "loss": 38.5081, "step": 9142 }, { "epoch": 217.69253731343284, "grad_norm": 1.4804039001464844, "learning_rate": 9.212121212121213e-06, "loss": 39.3875, "step": 9143 }, { "epoch": 217.71641791044777, "grad_norm": 1.4534378051757812, "learning_rate": 9.211038961038962e-06, "loss": 39.9466, "step": 9144 }, { "epoch": 217.74029850746268, "grad_norm": 1.4959384202957153, "learning_rate": 9.20995670995671e-06, "loss": 40.3325, "step": 9145 }, { "epoch": 217.76417910447762, "grad_norm": 1.525162935256958, "learning_rate": 9.20887445887446e-06, "loss": 40.5685, "step": 9146 }, { "epoch": 217.78805970149253, "grad_norm": 1.61495840549469, "learning_rate": 9.207792207792208e-06, "loss": 38.3835, "step": 9147 }, { "epoch": 217.81194029850747, "grad_norm": 1.4879366159439087, "learning_rate": 9.206709956709957e-06, "loss": 40.3265, "step": 9148 }, { "epoch": 217.83582089552237, "grad_norm": 1.4466155767440796, "learning_rate": 9.205627705627706e-06, "loss": 38.7716, "step": 9149 }, { "epoch": 217.8597014925373, "grad_norm": 1.445427417755127, "learning_rate": 9.204545454545455e-06, "loss": 40.4317, "step": 9150 }, { "epoch": 217.88358208955225, "grad_norm": 1.435113549232483, "learning_rate": 9.203463203463204e-06, "loss": 40.5428, "step": 9151 }, { "epoch": 217.90746268656716, "grad_norm": 1.497413992881775, "learning_rate": 9.202380952380953e-06, "loss": 39.6474, "step": 9152 }, { "epoch": 217.9313432835821, "grad_norm": 1.5198593139648438, "learning_rate": 9.201298701298702e-06, "loss": 40.1925, "step": 9153 }, { "epoch": 217.955223880597, "grad_norm": 1.4224162101745605, "learning_rate": 9.200216450216452e-06, "loss": 40.4922, "step": 9154 }, { "epoch": 217.97910447761194, "grad_norm": NaN, "learning_rate": 9.1991341991342e-06, "loss": 48.693, "step": 9155 }, { "epoch": 218.0, "grad_norm": 2.4985740184783936, "learning_rate": 9.1991341991342e-06, "loss": 35.4938, "step": 9156 }, { "epoch": 218.02388059701494, "grad_norm": 2.175889015197754, "learning_rate": 9.19805194805195e-06, "loss": 37.9161, "step": 9157 }, { "epoch": 218.04776119402985, "grad_norm": 1.9953263998031616, "learning_rate": 9.196969696969697e-06, "loss": 39.4, "step": 9158 }, { "epoch": 218.07164179104478, "grad_norm": 1.8765965700149536, "learning_rate": 9.195887445887446e-06, "loss": 40.1134, "step": 9159 }, { "epoch": 218.0955223880597, "grad_norm": 1.8430962562561035, "learning_rate": 9.194805194805195e-06, "loss": 40.2433, "step": 9160 }, { "epoch": 218.11940298507463, "grad_norm": 1.8104327917099, "learning_rate": 9.193722943722944e-06, "loss": 39.6009, "step": 9161 }, { "epoch": 218.14328358208957, "grad_norm": 1.872787594795227, "learning_rate": 9.192640692640693e-06, "loss": 40.579, "step": 9162 }, { "epoch": 218.16716417910447, "grad_norm": 1.8182744979858398, "learning_rate": 9.191558441558442e-06, "loss": 39.6129, "step": 9163 }, { "epoch": 218.1910447761194, "grad_norm": 1.8917019367218018, "learning_rate": 9.19047619047619e-06, "loss": 41.399, "step": 9164 }, { "epoch": 218.21492537313432, "grad_norm": 1.793714165687561, "learning_rate": 9.18939393939394e-06, "loss": 38.8495, "step": 9165 }, { "epoch": 218.23880597014926, "grad_norm": 1.8559178113937378, "learning_rate": 9.188311688311688e-06, "loss": 39.6443, "step": 9166 }, { "epoch": 218.26268656716417, "grad_norm": 1.660852074623108, "learning_rate": 9.187229437229437e-06, "loss": 39.135, "step": 9167 }, { "epoch": 218.2865671641791, "grad_norm": 1.639266014099121, "learning_rate": 9.186147186147188e-06, "loss": 38.9482, "step": 9168 }, { "epoch": 218.31044776119404, "grad_norm": NaN, "learning_rate": 9.185064935064935e-06, "loss": 56.9183, "step": 9169 }, { "epoch": 218.33432835820895, "grad_norm": 4.15780782699585, "learning_rate": 9.185064935064935e-06, "loss": 39.607, "step": 9170 }, { "epoch": 218.3582089552239, "grad_norm": 4.9406538009643555, "learning_rate": 9.183982683982686e-06, "loss": 40.779, "step": 9171 }, { "epoch": 218.3820895522388, "grad_norm": 5.652462005615234, "learning_rate": 9.182900432900433e-06, "loss": 40.8601, "step": 9172 }, { "epoch": 218.40597014925373, "grad_norm": 4.318502426147461, "learning_rate": 9.181818181818184e-06, "loss": 39.741, "step": 9173 }, { "epoch": 218.42985074626867, "grad_norm": 3.929030179977417, "learning_rate": 9.18073593073593e-06, "loss": 40.0164, "step": 9174 }, { "epoch": 218.45373134328358, "grad_norm": 3.436290740966797, "learning_rate": 9.179653679653681e-06, "loss": 39.5881, "step": 9175 }, { "epoch": 218.47761194029852, "grad_norm": 2.7700858116149902, "learning_rate": 9.178571428571429e-06, "loss": 38.6039, "step": 9176 }, { "epoch": 218.50149253731342, "grad_norm": 2.514906167984009, "learning_rate": 9.177489177489179e-06, "loss": 39.5665, "step": 9177 }, { "epoch": 218.52537313432836, "grad_norm": 2.1688272953033447, "learning_rate": 9.176406926406926e-06, "loss": 39.5439, "step": 9178 }, { "epoch": 218.54925373134327, "grad_norm": 2.2301292419433594, "learning_rate": 9.175324675324675e-06, "loss": 39.234, "step": 9179 }, { "epoch": 218.5731343283582, "grad_norm": 2.486435890197754, "learning_rate": 9.174242424242426e-06, "loss": 39.1853, "step": 9180 }, { "epoch": 218.59701492537314, "grad_norm": 2.3895621299743652, "learning_rate": 9.173160173160173e-06, "loss": 41.89, "step": 9181 }, { "epoch": 218.62089552238805, "grad_norm": 2.5333752632141113, "learning_rate": 9.172077922077924e-06, "loss": 40.1371, "step": 9182 }, { "epoch": 218.644776119403, "grad_norm": 2.8631839752197266, "learning_rate": 9.17099567099567e-06, "loss": 39.8522, "step": 9183 }, { "epoch": 218.6686567164179, "grad_norm": 2.6793742179870605, "learning_rate": 9.169913419913421e-06, "loss": 39.3778, "step": 9184 }, { "epoch": 218.69253731343284, "grad_norm": 2.610787868499756, "learning_rate": 9.168831168831169e-06, "loss": 40.1908, "step": 9185 }, { "epoch": 218.71641791044777, "grad_norm": 2.448359727859497, "learning_rate": 9.16774891774892e-06, "loss": 40.0004, "step": 9186 }, { "epoch": 218.74029850746268, "grad_norm": 2.5323498249053955, "learning_rate": 9.166666666666666e-06, "loss": 41.2811, "step": 9187 }, { "epoch": 218.76417910447762, "grad_norm": 2.3829596042633057, "learning_rate": 9.165584415584417e-06, "loss": 39.3114, "step": 9188 }, { "epoch": 218.78805970149253, "grad_norm": 2.416592836380005, "learning_rate": 9.164502164502164e-06, "loss": 40.8509, "step": 9189 }, { "epoch": 218.81194029850747, "grad_norm": 2.5917739868164062, "learning_rate": 9.163419913419915e-06, "loss": 41.1992, "step": 9190 }, { "epoch": 218.83582089552237, "grad_norm": 2.2490100860595703, "learning_rate": 9.162337662337664e-06, "loss": 40.2017, "step": 9191 }, { "epoch": 218.8597014925373, "grad_norm": 2.169957160949707, "learning_rate": 9.161255411255413e-06, "loss": 40.5796, "step": 9192 }, { "epoch": 218.88358208955225, "grad_norm": 2.208155870437622, "learning_rate": 9.160173160173162e-06, "loss": 39.8097, "step": 9193 }, { "epoch": 218.90746268656716, "grad_norm": 2.2819838523864746, "learning_rate": 9.15909090909091e-06, "loss": 41.8433, "step": 9194 }, { "epoch": 218.9313432835821, "grad_norm": 2.2946970462799072, "learning_rate": 9.15800865800866e-06, "loss": 40.7407, "step": 9195 }, { "epoch": 218.955223880597, "grad_norm": 2.2233545780181885, "learning_rate": 9.156926406926408e-06, "loss": 39.2542, "step": 9196 }, { "epoch": 218.97910447761194, "grad_norm": 1.9721580743789673, "learning_rate": 9.155844155844157e-06, "loss": 38.9175, "step": 9197 }, { "epoch": 219.0, "grad_norm": 2.0686604976654053, "learning_rate": 9.154761904761906e-06, "loss": 35.0976, "step": 9198 }, { "epoch": 219.02388059701494, "grad_norm": 2.3669822216033936, "learning_rate": 9.153679653679655e-06, "loss": 39.3837, "step": 9199 }, { "epoch": 219.04776119402985, "grad_norm": 2.108299493789673, "learning_rate": 9.152597402597402e-06, "loss": 39.2208, "step": 9200 }, { "epoch": 219.07164179104478, "grad_norm": 2.1985113620758057, "learning_rate": 9.151515151515153e-06, "loss": 39.5135, "step": 9201 }, { "epoch": 219.0955223880597, "grad_norm": 2.031193971633911, "learning_rate": 9.1504329004329e-06, "loss": 40.1037, "step": 9202 }, { "epoch": 219.11940298507463, "grad_norm": 2.326023817062378, "learning_rate": 9.14935064935065e-06, "loss": 40.9226, "step": 9203 }, { "epoch": 219.14328358208957, "grad_norm": 2.1149914264678955, "learning_rate": 9.1482683982684e-06, "loss": 40.2011, "step": 9204 }, { "epoch": 219.16716417910447, "grad_norm": 2.049164056777954, "learning_rate": 9.147186147186148e-06, "loss": 40.8925, "step": 9205 }, { "epoch": 219.1910447761194, "grad_norm": 2.0835018157958984, "learning_rate": 9.146103896103897e-06, "loss": 40.3624, "step": 9206 }, { "epoch": 219.21492537313432, "grad_norm": 2.314244270324707, "learning_rate": 9.145021645021646e-06, "loss": 39.2769, "step": 9207 }, { "epoch": 219.23880597014926, "grad_norm": 2.2117438316345215, "learning_rate": 9.143939393939395e-06, "loss": 40.6167, "step": 9208 }, { "epoch": 219.26268656716417, "grad_norm": 2.2879300117492676, "learning_rate": 9.142857142857144e-06, "loss": 41.4365, "step": 9209 }, { "epoch": 219.2865671641791, "grad_norm": NaN, "learning_rate": 9.141774891774893e-06, "loss": 44.2164, "step": 9210 }, { "epoch": 219.31044776119404, "grad_norm": NaN, "learning_rate": 9.141774891774893e-06, "loss": 69.3225, "step": 9211 }, { "epoch": 219.33432835820895, "grad_norm": 20.744626998901367, "learning_rate": 9.141774891774893e-06, "loss": 39.2742, "step": 9212 }, { "epoch": 219.3582089552239, "grad_norm": 21.917734146118164, "learning_rate": 9.140692640692642e-06, "loss": 39.6348, "step": 9213 }, { "epoch": 219.3820895522388, "grad_norm": 20.52790641784668, "learning_rate": 9.13961038961039e-06, "loss": 39.5043, "step": 9214 }, { "epoch": 219.40597014925373, "grad_norm": 20.698400497436523, "learning_rate": 9.13852813852814e-06, "loss": 40.0361, "step": 9215 }, { "epoch": 219.42985074626867, "grad_norm": 23.098644256591797, "learning_rate": 9.137445887445888e-06, "loss": 41.1001, "step": 9216 }, { "epoch": 219.45373134328358, "grad_norm": 20.13895606994629, "learning_rate": 9.136363636363637e-06, "loss": 40.4156, "step": 9217 }, { "epoch": 219.47761194029852, "grad_norm": 18.971960067749023, "learning_rate": 9.135281385281386e-06, "loss": 39.9461, "step": 9218 }, { "epoch": 219.50149253731342, "grad_norm": 19.195049285888672, "learning_rate": 9.134199134199135e-06, "loss": 41.1863, "step": 9219 }, { "epoch": 219.52537313432836, "grad_norm": 17.76563262939453, "learning_rate": 9.133116883116884e-06, "loss": 40.569, "step": 9220 }, { "epoch": 219.54925373134327, "grad_norm": 16.531476974487305, "learning_rate": 9.132034632034633e-06, "loss": 40.7633, "step": 9221 }, { "epoch": 219.5731343283582, "grad_norm": 15.487322807312012, "learning_rate": 9.130952380952382e-06, "loss": 40.8067, "step": 9222 }, { "epoch": 219.59701492537314, "grad_norm": 16.228761672973633, "learning_rate": 9.12987012987013e-06, "loss": 41.0559, "step": 9223 }, { "epoch": 219.62089552238805, "grad_norm": 14.402935981750488, "learning_rate": 9.12878787878788e-06, "loss": 41.6231, "step": 9224 }, { "epoch": 219.644776119403, "grad_norm": 13.5623779296875, "learning_rate": 9.127705627705628e-06, "loss": 41.4884, "step": 9225 }, { "epoch": 219.6686567164179, "grad_norm": 11.131492614746094, "learning_rate": 9.126623376623377e-06, "loss": 41.0945, "step": 9226 }, { "epoch": 219.69253731343284, "grad_norm": 10.318458557128906, "learning_rate": 9.125541125541126e-06, "loss": 41.1345, "step": 9227 }, { "epoch": 219.71641791044777, "grad_norm": 8.876760482788086, "learning_rate": 9.124458874458875e-06, "loss": 40.7139, "step": 9228 }, { "epoch": 219.74029850746268, "grad_norm": 8.628320693969727, "learning_rate": 9.123376623376624e-06, "loss": 40.4201, "step": 9229 }, { "epoch": 219.76417910447762, "grad_norm": 8.397369384765625, "learning_rate": 9.122294372294373e-06, "loss": 42.1933, "step": 9230 }, { "epoch": 219.78805970149253, "grad_norm": 8.5767240524292, "learning_rate": 9.121212121212122e-06, "loss": 43.1593, "step": 9231 }, { "epoch": 219.81194029850747, "grad_norm": 7.392801761627197, "learning_rate": 9.12012987012987e-06, "loss": 41.1837, "step": 9232 }, { "epoch": 219.83582089552237, "grad_norm": 8.099268913269043, "learning_rate": 9.11904761904762e-06, "loss": 42.1279, "step": 9233 }, { "epoch": 219.8597014925373, "grad_norm": 8.394611358642578, "learning_rate": 9.117965367965369e-06, "loss": 42.1667, "step": 9234 }, { "epoch": 219.88358208955225, "grad_norm": 7.783156394958496, "learning_rate": 9.116883116883117e-06, "loss": 41.8635, "step": 9235 }, { "epoch": 219.90746268656716, "grad_norm": 8.130783081054688, "learning_rate": 9.115800865800866e-06, "loss": 42.1189, "step": 9236 }, { "epoch": 219.9313432835821, "grad_norm": 8.73431396484375, "learning_rate": 9.114718614718615e-06, "loss": 42.2205, "step": 9237 }, { "epoch": 219.955223880597, "grad_norm": 7.640553951263428, "learning_rate": 9.113636363636364e-06, "loss": 41.9356, "step": 9238 }, { "epoch": 219.97910447761194, "grad_norm": 7.003914833068848, "learning_rate": 9.112554112554113e-06, "loss": 41.9095, "step": 9239 }, { "epoch": 220.0, "grad_norm": 7.6365647315979, "learning_rate": 9.111471861471862e-06, "loss": 37.0264, "step": 9240 }, { "epoch": 220.0, "step": 9240, "total_flos": 4.574119339331077e+17, "train_loss": 3.637602469208953, "train_runtime": 25292.859, "train_samples_per_second": 46.552, "train_steps_per_second": 0.365 }, { "epoch": 220.02388059701494, "grad_norm": 2.302000045776367, "learning_rate": 1e-05, "loss": 39.6722, "step": 9241 }, { "epoch": 220.04776119402985, "grad_norm": 2.017524003982544, "learning_rate": 9.99896480331263e-06, "loss": 39.9667, "step": 9242 }, { "epoch": 220.07164179104478, "grad_norm": 1.9442919492721558, "learning_rate": 9.99792960662526e-06, "loss": 39.4298, "step": 9243 }, { "epoch": 220.0955223880597, "grad_norm": 1.8519662618637085, "learning_rate": 9.99689440993789e-06, "loss": 40.0619, "step": 9244 }, { "epoch": 220.11940298507463, "grad_norm": 1.5320680141448975, "learning_rate": 9.995859213250519e-06, "loss": 40.4193, "step": 9245 }, { "epoch": 220.14328358208957, "grad_norm": 1.4897253513336182, "learning_rate": 9.994824016563148e-06, "loss": 39.762, "step": 9246 }, { "epoch": 220.16716417910447, "grad_norm": 1.3850762844085693, "learning_rate": 9.993788819875776e-06, "loss": 40.0303, "step": 9247 }, { "epoch": 220.1910447761194, "grad_norm": 1.4307444095611572, "learning_rate": 9.992753623188408e-06, "loss": 39.7919, "step": 9248 }, { "epoch": 220.21492537313432, "grad_norm": 1.386444091796875, "learning_rate": 9.991718426501035e-06, "loss": 40.8309, "step": 9249 }, { "epoch": 220.23880597014926, "grad_norm": 1.4181225299835205, "learning_rate": 9.990683229813667e-06, "loss": 40.3473, "step": 9250 }, { "epoch": 220.26268656716417, "grad_norm": 1.477706789970398, "learning_rate": 9.989648033126294e-06, "loss": 40.2811, "step": 9251 }, { "epoch": 220.2865671641791, "grad_norm": 1.4879395961761475, "learning_rate": 9.988612836438924e-06, "loss": 39.6741, "step": 9252 }, { "epoch": 220.31044776119404, "grad_norm": 1.5024008750915527, "learning_rate": 9.987577639751553e-06, "loss": 38.6801, "step": 9253 }, { "epoch": 220.33432835820895, "grad_norm": 1.4948691129684448, "learning_rate": 9.986542443064183e-06, "loss": 38.9284, "step": 9254 }, { "epoch": 220.3582089552239, "grad_norm": 1.5872490406036377, "learning_rate": 9.985507246376813e-06, "loss": 40.15, "step": 9255 }, { "epoch": 220.3820895522388, "grad_norm": 1.4201587438583374, "learning_rate": 9.984472049689442e-06, "loss": 40.3195, "step": 9256 }, { "epoch": 220.40597014925373, "grad_norm": 1.3493770360946655, "learning_rate": 9.983436853002072e-06, "loss": 39.6486, "step": 9257 }, { "epoch": 220.42985074626867, "grad_norm": 1.3375205993652344, "learning_rate": 9.982401656314701e-06, "loss": 40.5857, "step": 9258 }, { "epoch": 220.45373134328358, "grad_norm": 1.395685076713562, "learning_rate": 9.981366459627329e-06, "loss": 39.6548, "step": 9259 }, { "epoch": 220.47761194029852, "grad_norm": 1.3741755485534668, "learning_rate": 9.98033126293996e-06, "loss": 39.9829, "step": 9260 }, { "epoch": 220.50149253731342, "grad_norm": 1.3706995248794556, "learning_rate": 9.979296066252588e-06, "loss": 39.2526, "step": 9261 }, { "epoch": 220.52537313432836, "grad_norm": 1.376741647720337, "learning_rate": 9.978260869565218e-06, "loss": 39.4763, "step": 9262 }, { "epoch": 220.54925373134327, "grad_norm": 1.417517900466919, "learning_rate": 9.977225672877847e-06, "loss": 38.1743, "step": 9263 }, { "epoch": 220.5731343283582, "grad_norm": 1.38371741771698, "learning_rate": 9.976190476190477e-06, "loss": 39.3984, "step": 9264 }, { "epoch": 220.59701492537314, "grad_norm": 1.373542308807373, "learning_rate": 9.975155279503106e-06, "loss": 40.9435, "step": 9265 }, { "epoch": 220.62089552238805, "grad_norm": 1.4091132879257202, "learning_rate": 9.974120082815736e-06, "loss": 39.9887, "step": 9266 }, { "epoch": 220.644776119403, "grad_norm": 1.3743854761123657, "learning_rate": 9.973084886128365e-06, "loss": 39.4039, "step": 9267 }, { "epoch": 220.6686567164179, "grad_norm": 1.4115084409713745, "learning_rate": 9.972049689440995e-06, "loss": 40.4023, "step": 9268 }, { "epoch": 220.69253731343284, "grad_norm": 1.3553569316864014, "learning_rate": 9.971014492753624e-06, "loss": 40.0597, "step": 9269 }, { "epoch": 220.71641791044777, "grad_norm": 1.3552768230438232, "learning_rate": 9.969979296066254e-06, "loss": 40.0063, "step": 9270 }, { "epoch": 220.74029850746268, "grad_norm": 1.3495231866836548, "learning_rate": 9.968944099378883e-06, "loss": 39.718, "step": 9271 }, { "epoch": 220.76417910447762, "grad_norm": 1.3305625915527344, "learning_rate": 9.967908902691513e-06, "loss": 39.4308, "step": 9272 }, { "epoch": 220.78805970149253, "grad_norm": 1.3628684282302856, "learning_rate": 9.966873706004142e-06, "loss": 39.1449, "step": 9273 }, { "epoch": 220.81194029850747, "grad_norm": NaN, "learning_rate": 9.96583850931677e-06, "loss": 56.2704, "step": 9274 }, { "epoch": 220.83582089552237, "grad_norm": 1.3664478063583374, "learning_rate": 9.96583850931677e-06, "loss": 39.4922, "step": 9275 }, { "epoch": 220.8597014925373, "grad_norm": 1.3731492757797241, "learning_rate": 9.964803312629401e-06, "loss": 39.7957, "step": 9276 }, { "epoch": 220.88358208955225, "grad_norm": 1.3781698942184448, "learning_rate": 9.96376811594203e-06, "loss": 39.8558, "step": 9277 }, { "epoch": 220.90746268656716, "grad_norm": 1.346747636795044, "learning_rate": 9.962732919254659e-06, "loss": 40.5152, "step": 9278 }, { "epoch": 220.9313432835821, "grad_norm": 1.358534574508667, "learning_rate": 9.961697722567288e-06, "loss": 41.1693, "step": 9279 }, { "epoch": 220.955223880597, "grad_norm": 1.3258297443389893, "learning_rate": 9.960662525879918e-06, "loss": 40.024, "step": 9280 }, { "epoch": 220.97910447761194, "grad_norm": 1.3708223104476929, "learning_rate": 9.959627329192547e-06, "loss": 39.7126, "step": 9281 }, { "epoch": 221.0, "grad_norm": 1.3248589038848877, "learning_rate": 9.958592132505177e-06, "loss": 34.4994, "step": 9282 }, { "epoch": 221.02388059701494, "grad_norm": 1.3621748685836792, "learning_rate": 9.957556935817806e-06, "loss": 39.7078, "step": 9283 }, { "epoch": 221.04776119402985, "grad_norm": 1.3379899263381958, "learning_rate": 9.956521739130436e-06, "loss": 39.8794, "step": 9284 }, { "epoch": 221.07164179104478, "grad_norm": 1.329922080039978, "learning_rate": 9.955486542443065e-06, "loss": 39.953, "step": 9285 }, { "epoch": 221.0955223880597, "grad_norm": 1.3478859663009644, "learning_rate": 9.954451345755695e-06, "loss": 39.9806, "step": 9286 }, { "epoch": 221.11940298507463, "grad_norm": 1.3376243114471436, "learning_rate": 9.953416149068323e-06, "loss": 41.5858, "step": 9287 }, { "epoch": 221.14328358208957, "grad_norm": 1.342517375946045, "learning_rate": 9.952380952380954e-06, "loss": 40.1735, "step": 9288 }, { "epoch": 221.16716417910447, "grad_norm": 1.3431588411331177, "learning_rate": 9.951345755693582e-06, "loss": 40.2461, "step": 9289 }, { "epoch": 221.1910447761194, "grad_norm": 1.3400155305862427, "learning_rate": 9.950310559006211e-06, "loss": 39.6553, "step": 9290 }, { "epoch": 221.21492537313432, "grad_norm": 1.3422187566757202, "learning_rate": 9.949275362318841e-06, "loss": 40.0659, "step": 9291 }, { "epoch": 221.23880597014926, "grad_norm": 1.3204926252365112, "learning_rate": 9.94824016563147e-06, "loss": 38.7737, "step": 9292 }, { "epoch": 221.26268656716417, "grad_norm": 1.3227367401123047, "learning_rate": 9.9472049689441e-06, "loss": 40.9589, "step": 9293 }, { "epoch": 221.2865671641791, "grad_norm": 1.3856098651885986, "learning_rate": 9.94616977225673e-06, "loss": 40.4813, "step": 9294 }, { "epoch": 221.31044776119404, "grad_norm": 1.383323073387146, "learning_rate": 9.945134575569359e-06, "loss": 39.0804, "step": 9295 }, { "epoch": 221.33432835820895, "grad_norm": 1.332831859588623, "learning_rate": 9.944099378881989e-06, "loss": 40.0222, "step": 9296 }, { "epoch": 221.3582089552239, "grad_norm": 1.3961765766143799, "learning_rate": 9.943064182194618e-06, "loss": 38.5023, "step": 9297 }, { "epoch": 221.3820895522388, "grad_norm": 1.3364187479019165, "learning_rate": 9.942028985507248e-06, "loss": 39.4072, "step": 9298 }, { "epoch": 221.40597014925373, "grad_norm": 1.3630396127700806, "learning_rate": 9.940993788819877e-06, "loss": 38.9947, "step": 9299 }, { "epoch": 221.42985074626867, "grad_norm": 1.4053266048431396, "learning_rate": 9.939958592132507e-06, "loss": 40.0567, "step": 9300 }, { "epoch": 221.45373134328358, "grad_norm": 1.3649506568908691, "learning_rate": 9.938923395445136e-06, "loss": 40.7096, "step": 9301 }, { "epoch": 221.47761194029852, "grad_norm": 1.3154598474502563, "learning_rate": 9.937888198757764e-06, "loss": 40.7743, "step": 9302 }, { "epoch": 221.50149253731342, "grad_norm": 1.3447788953781128, "learning_rate": 9.936853002070395e-06, "loss": 40.1588, "step": 9303 }, { "epoch": 221.52537313432836, "grad_norm": 1.3709872961044312, "learning_rate": 9.935817805383023e-06, "loss": 39.7125, "step": 9304 }, { "epoch": 221.54925373134327, "grad_norm": 1.3538366556167603, "learning_rate": 9.934782608695653e-06, "loss": 40.0253, "step": 9305 }, { "epoch": 221.5731343283582, "grad_norm": 1.2993680238723755, "learning_rate": 9.933747412008282e-06, "loss": 38.7265, "step": 9306 }, { "epoch": 221.59701492537314, "grad_norm": 1.3672246932983398, "learning_rate": 9.932712215320912e-06, "loss": 39.8465, "step": 9307 }, { "epoch": 221.62089552238805, "grad_norm": 1.3850135803222656, "learning_rate": 9.931677018633541e-06, "loss": 39.6241, "step": 9308 }, { "epoch": 221.644776119403, "grad_norm": NaN, "learning_rate": 9.93064182194617e-06, "loss": 60.1733, "step": 9309 }, { "epoch": 221.6686567164179, "grad_norm": 1.3334814310073853, "learning_rate": 9.93064182194617e-06, "loss": 40.1222, "step": 9310 }, { "epoch": 221.69253731343284, "grad_norm": 1.379608392715454, "learning_rate": 9.9296066252588e-06, "loss": 39.1111, "step": 9311 }, { "epoch": 221.71641791044777, "grad_norm": 1.3958364725112915, "learning_rate": 9.92857142857143e-06, "loss": 38.8112, "step": 9312 }, { "epoch": 221.74029850746268, "grad_norm": 1.3423357009887695, "learning_rate": 9.927536231884058e-06, "loss": 39.9999, "step": 9313 }, { "epoch": 221.76417910447762, "grad_norm": 1.3566471338272095, "learning_rate": 9.926501035196689e-06, "loss": 40.4121, "step": 9314 }, { "epoch": 221.78805970149253, "grad_norm": 1.3513799905776978, "learning_rate": 9.925465838509317e-06, "loss": 39.5709, "step": 9315 }, { "epoch": 221.81194029850747, "grad_norm": 1.394000768661499, "learning_rate": 9.924430641821948e-06, "loss": 39.8688, "step": 9316 }, { "epoch": 221.83582089552237, "grad_norm": 1.2899487018585205, "learning_rate": 9.923395445134576e-06, "loss": 40.6667, "step": 9317 }, { "epoch": 221.8597014925373, "grad_norm": 1.327786922454834, "learning_rate": 9.922360248447205e-06, "loss": 39.617, "step": 9318 }, { "epoch": 221.88358208955225, "grad_norm": 1.3295845985412598, "learning_rate": 9.921325051759835e-06, "loss": 39.2651, "step": 9319 }, { "epoch": 221.90746268656716, "grad_norm": 1.3454692363739014, "learning_rate": 9.920289855072464e-06, "loss": 39.2003, "step": 9320 }, { "epoch": 221.9313432835821, "grad_norm": 1.3409383296966553, "learning_rate": 9.919254658385094e-06, "loss": 40.9716, "step": 9321 }, { "epoch": 221.955223880597, "grad_norm": 1.3285244703292847, "learning_rate": 9.918219461697723e-06, "loss": 39.0024, "step": 9322 }, { "epoch": 221.97910447761194, "grad_norm": 1.3286458253860474, "learning_rate": 9.917184265010353e-06, "loss": 39.7707, "step": 9323 }, { "epoch": 222.0, "grad_norm": 1.3819799423217773, "learning_rate": 9.916149068322982e-06, "loss": 34.2378, "step": 9324 }, { "epoch": 222.02388059701494, "grad_norm": 1.3509794473648071, "learning_rate": 9.915113871635612e-06, "loss": 40.2872, "step": 9325 }, { "epoch": 222.04776119402985, "grad_norm": 1.3424546718597412, "learning_rate": 9.914078674948242e-06, "loss": 40.0262, "step": 9326 }, { "epoch": 222.07164179104478, "grad_norm": 1.3807897567749023, "learning_rate": 9.913043478260871e-06, "loss": 39.0513, "step": 9327 }, { "epoch": 222.0955223880597, "grad_norm": 1.4170589447021484, "learning_rate": 9.912008281573499e-06, "loss": 39.6408, "step": 9328 }, { "epoch": 222.11940298507463, "grad_norm": 1.3338369131088257, "learning_rate": 9.91097308488613e-06, "loss": 39.694, "step": 9329 }, { "epoch": 222.14328358208957, "grad_norm": 1.3710956573486328, "learning_rate": 9.909937888198758e-06, "loss": 40.1567, "step": 9330 }, { "epoch": 222.16716417910447, "grad_norm": 1.3363817930221558, "learning_rate": 9.90890269151139e-06, "loss": 39.3773, "step": 9331 }, { "epoch": 222.1910447761194, "grad_norm": 1.3334870338439941, "learning_rate": 9.907867494824017e-06, "loss": 40.0773, "step": 9332 }, { "epoch": 222.21492537313432, "grad_norm": 1.356014609336853, "learning_rate": 9.906832298136647e-06, "loss": 40.8435, "step": 9333 }, { "epoch": 222.23880597014926, "grad_norm": 1.3485386371612549, "learning_rate": 9.905797101449276e-06, "loss": 38.7301, "step": 9334 }, { "epoch": 222.26268656716417, "grad_norm": 1.3876549005508423, "learning_rate": 9.904761904761906e-06, "loss": 40.4419, "step": 9335 }, { "epoch": 222.2865671641791, "grad_norm": 1.3414806127548218, "learning_rate": 9.903726708074535e-06, "loss": 41.2245, "step": 9336 }, { "epoch": 222.31044776119404, "grad_norm": 1.323938012123108, "learning_rate": 9.902691511387165e-06, "loss": 39.8885, "step": 9337 }, { "epoch": 222.33432835820895, "grad_norm": 1.3299669027328491, "learning_rate": 9.901656314699794e-06, "loss": 41.1118, "step": 9338 }, { "epoch": 222.3582089552239, "grad_norm": 1.3636963367462158, "learning_rate": 9.900621118012424e-06, "loss": 40.3035, "step": 9339 }, { "epoch": 222.3820895522388, "grad_norm": 1.3413939476013184, "learning_rate": 9.899585921325052e-06, "loss": 40.6182, "step": 9340 }, { "epoch": 222.40597014925373, "grad_norm": 1.3693950176239014, "learning_rate": 9.898550724637683e-06, "loss": 39.5796, "step": 9341 }, { "epoch": 222.42985074626867, "grad_norm": 1.3450459241867065, "learning_rate": 9.89751552795031e-06, "loss": 40.1988, "step": 9342 }, { "epoch": 222.45373134328358, "grad_norm": 1.3646029233932495, "learning_rate": 9.89648033126294e-06, "loss": 39.583, "step": 9343 }, { "epoch": 222.47761194029852, "grad_norm": 1.3441942930221558, "learning_rate": 9.89544513457557e-06, "loss": 40.2406, "step": 9344 }, { "epoch": 222.50149253731342, "grad_norm": 1.3629807233810425, "learning_rate": 9.8944099378882e-06, "loss": 39.5223, "step": 9345 }, { "epoch": 222.52537313432836, "grad_norm": 1.4315780401229858, "learning_rate": 9.893374741200829e-06, "loss": 39.7699, "step": 9346 }, { "epoch": 222.54925373134327, "grad_norm": 1.371588945388794, "learning_rate": 9.892339544513458e-06, "loss": 39.9492, "step": 9347 }, { "epoch": 222.5731343283582, "grad_norm": 1.3592742681503296, "learning_rate": 9.891304347826088e-06, "loss": 40.3272, "step": 9348 }, { "epoch": 222.59701492537314, "grad_norm": 1.3526972532272339, "learning_rate": 9.890269151138717e-06, "loss": 39.8537, "step": 9349 }, { "epoch": 222.62089552238805, "grad_norm": 1.3735369443893433, "learning_rate": 9.889233954451347e-06, "loss": 39.0468, "step": 9350 }, { "epoch": 222.644776119403, "grad_norm": 1.3303518295288086, "learning_rate": 9.888198757763976e-06, "loss": 40.0241, "step": 9351 }, { "epoch": 222.6686567164179, "grad_norm": 1.3692716360092163, "learning_rate": 9.887163561076606e-06, "loss": 40.3762, "step": 9352 }, { "epoch": 222.69253731343284, "grad_norm": 1.3779349327087402, "learning_rate": 9.886128364389235e-06, "loss": 37.8934, "step": 9353 }, { "epoch": 222.71641791044777, "grad_norm": 1.3655319213867188, "learning_rate": 9.885093167701865e-06, "loss": 39.2565, "step": 9354 }, { "epoch": 222.74029850746268, "grad_norm": 1.3726075887680054, "learning_rate": 9.884057971014493e-06, "loss": 39.0184, "step": 9355 }, { "epoch": 222.76417910447762, "grad_norm": 1.4033812284469604, "learning_rate": 9.883022774327124e-06, "loss": 38.8554, "step": 9356 }, { "epoch": 222.78805970149253, "grad_norm": 1.3323819637298584, "learning_rate": 9.881987577639752e-06, "loss": 38.8176, "step": 9357 }, { "epoch": 222.81194029850747, "grad_norm": 1.325208067893982, "learning_rate": 9.880952380952381e-06, "loss": 40.8779, "step": 9358 }, { "epoch": 222.83582089552237, "grad_norm": 1.3496633768081665, "learning_rate": 9.879917184265011e-06, "loss": 39.3748, "step": 9359 }, { "epoch": 222.8597014925373, "grad_norm": 1.3506627082824707, "learning_rate": 9.87888198757764e-06, "loss": 40.1319, "step": 9360 }, { "epoch": 222.88358208955225, "grad_norm": 1.327614188194275, "learning_rate": 9.87784679089027e-06, "loss": 39.5126, "step": 9361 }, { "epoch": 222.90746268656716, "grad_norm": 1.342043399810791, "learning_rate": 9.8768115942029e-06, "loss": 39.5937, "step": 9362 }, { "epoch": 222.9313432835821, "grad_norm": 1.3317949771881104, "learning_rate": 9.875776397515529e-06, "loss": 41.1932, "step": 9363 }, { "epoch": 222.955223880597, "grad_norm": 1.3268064260482788, "learning_rate": 9.874741200828159e-06, "loss": 39.4096, "step": 9364 }, { "epoch": 222.97910447761194, "grad_norm": 1.3501805067062378, "learning_rate": 9.873706004140788e-06, "loss": 38.4269, "step": 9365 }, { "epoch": 223.0, "grad_norm": NaN, "learning_rate": 9.872670807453418e-06, "loss": 30.7269, "step": 9366 }, { "epoch": 223.02388059701494, "grad_norm": 1.3817167282104492, "learning_rate": 9.872670807453418e-06, "loss": 39.4638, "step": 9367 }, { "epoch": 223.04776119402985, "grad_norm": 1.5060032606124878, "learning_rate": 9.871635610766045e-06, "loss": 38.6305, "step": 9368 }, { "epoch": 223.07164179104478, "grad_norm": 1.3341575860977173, "learning_rate": 9.870600414078677e-06, "loss": 38.3004, "step": 9369 }, { "epoch": 223.0955223880597, "grad_norm": 1.3524519205093384, "learning_rate": 9.869565217391304e-06, "loss": 41.1425, "step": 9370 }, { "epoch": 223.11940298507463, "grad_norm": 1.3362363576889038, "learning_rate": 9.868530020703934e-06, "loss": 40.5623, "step": 9371 }, { "epoch": 223.14328358208957, "grad_norm": 1.3613203763961792, "learning_rate": 9.867494824016564e-06, "loss": 40.3176, "step": 9372 }, { "epoch": 223.16716417910447, "grad_norm": 1.352175235748291, "learning_rate": 9.866459627329193e-06, "loss": 39.6056, "step": 9373 }, { "epoch": 223.1910447761194, "grad_norm": 1.369200587272644, "learning_rate": 9.865424430641823e-06, "loss": 39.2645, "step": 9374 }, { "epoch": 223.21492537313432, "grad_norm": 1.3377455472946167, "learning_rate": 9.864389233954452e-06, "loss": 40.558, "step": 9375 }, { "epoch": 223.23880597014926, "grad_norm": 1.3350834846496582, "learning_rate": 9.863354037267082e-06, "loss": 40.4568, "step": 9376 }, { "epoch": 223.26268656716417, "grad_norm": 1.3495473861694336, "learning_rate": 9.862318840579711e-06, "loss": 40.3202, "step": 9377 }, { "epoch": 223.2865671641791, "grad_norm": 1.3731878995895386, "learning_rate": 9.861283643892339e-06, "loss": 39.8454, "step": 9378 }, { "epoch": 223.31044776119404, "grad_norm": 1.3669441938400269, "learning_rate": 9.86024844720497e-06, "loss": 40.2257, "step": 9379 }, { "epoch": 223.33432835820895, "grad_norm": 1.35543954372406, "learning_rate": 9.8592132505176e-06, "loss": 39.003, "step": 9380 }, { "epoch": 223.3582089552239, "grad_norm": 1.3647159337997437, "learning_rate": 9.85817805383023e-06, "loss": 38.4334, "step": 9381 }, { "epoch": 223.3820895522388, "grad_norm": 1.345618724822998, "learning_rate": 9.857142857142859e-06, "loss": 40.7493, "step": 9382 }, { "epoch": 223.40597014925373, "grad_norm": 1.3294236660003662, "learning_rate": 9.856107660455487e-06, "loss": 40.0867, "step": 9383 }, { "epoch": 223.42985074626867, "grad_norm": 1.3343178033828735, "learning_rate": 9.855072463768118e-06, "loss": 41.3932, "step": 9384 }, { "epoch": 223.45373134328358, "grad_norm": 1.3220396041870117, "learning_rate": 9.854037267080746e-06, "loss": 39.9507, "step": 9385 }, { "epoch": 223.47761194029852, "grad_norm": 1.3620572090148926, "learning_rate": 9.853002070393375e-06, "loss": 39.897, "step": 9386 }, { "epoch": 223.50149253731342, "grad_norm": 1.3153690099716187, "learning_rate": 9.851966873706005e-06, "loss": 40.3923, "step": 9387 }, { "epoch": 223.52537313432836, "grad_norm": 1.5035969018936157, "learning_rate": 9.850931677018634e-06, "loss": 38.649, "step": 9388 }, { "epoch": 223.54925373134327, "grad_norm": 1.458274245262146, "learning_rate": 9.849896480331264e-06, "loss": 39.2399, "step": 9389 }, { "epoch": 223.5731343283582, "grad_norm": 1.3490139245986938, "learning_rate": 9.848861283643893e-06, "loss": 41.1793, "step": 9390 }, { "epoch": 223.59701492537314, "grad_norm": 1.3357422351837158, "learning_rate": 9.847826086956523e-06, "loss": 40.3342, "step": 9391 }, { "epoch": 223.62089552238805, "grad_norm": 1.391489863395691, "learning_rate": 9.846790890269152e-06, "loss": 39.8385, "step": 9392 }, { "epoch": 223.644776119403, "grad_norm": 1.3451342582702637, "learning_rate": 9.84575569358178e-06, "loss": 39.3728, "step": 9393 }, { "epoch": 223.6686567164179, "grad_norm": 1.383774757385254, "learning_rate": 9.844720496894411e-06, "loss": 39.5263, "step": 9394 }, { "epoch": 223.69253731343284, "grad_norm": 1.3914568424224854, "learning_rate": 9.84368530020704e-06, "loss": 39.6458, "step": 9395 }, { "epoch": 223.71641791044777, "grad_norm": 1.3256614208221436, "learning_rate": 9.84265010351967e-06, "loss": 40.1178, "step": 9396 }, { "epoch": 223.74029850746268, "grad_norm": 1.3346333503723145, "learning_rate": 9.841614906832298e-06, "loss": 38.9045, "step": 9397 }, { "epoch": 223.76417910447762, "grad_norm": 1.3780453205108643, "learning_rate": 9.840579710144928e-06, "loss": 39.2458, "step": 9398 }, { "epoch": 223.78805970149253, "grad_norm": 1.3671326637268066, "learning_rate": 9.839544513457557e-06, "loss": 40.5997, "step": 9399 }, { "epoch": 223.81194029850747, "grad_norm": 1.3747466802597046, "learning_rate": 9.838509316770187e-06, "loss": 40.219, "step": 9400 }, { "epoch": 223.83582089552237, "grad_norm": 1.349100112915039, "learning_rate": 9.837474120082817e-06, "loss": 40.0458, "step": 9401 }, { "epoch": 223.8597014925373, "grad_norm": 1.3522534370422363, "learning_rate": 9.836438923395446e-06, "loss": 40.3942, "step": 9402 }, { "epoch": 223.88358208955225, "grad_norm": 1.3322561979293823, "learning_rate": 9.835403726708076e-06, "loss": 40.5969, "step": 9403 }, { "epoch": 223.90746268656716, "grad_norm": 1.3569977283477783, "learning_rate": 9.834368530020705e-06, "loss": 38.06, "step": 9404 }, { "epoch": 223.9313432835821, "grad_norm": 1.360408902168274, "learning_rate": 9.833333333333333e-06, "loss": 39.5886, "step": 9405 }, { "epoch": 223.955223880597, "grad_norm": 1.4009556770324707, "learning_rate": 9.832298136645964e-06, "loss": 39.3023, "step": 9406 }, { "epoch": 223.97910447761194, "grad_norm": 1.3627221584320068, "learning_rate": 9.831262939958594e-06, "loss": 40.1107, "step": 9407 }, { "epoch": 224.0, "grad_norm": 1.330525279045105, "learning_rate": 9.830227743271222e-06, "loss": 34.0934, "step": 9408 }, { "epoch": 224.02388059701494, "grad_norm": 1.4609096050262451, "learning_rate": 9.829192546583853e-06, "loss": 40.418, "step": 9409 }, { "epoch": 224.04776119402985, "grad_norm": 1.3481663465499878, "learning_rate": 9.82815734989648e-06, "loss": 38.9197, "step": 9410 }, { "epoch": 224.07164179104478, "grad_norm": 1.3755154609680176, "learning_rate": 9.827122153209112e-06, "loss": 40.3158, "step": 9411 }, { "epoch": 224.0955223880597, "grad_norm": 1.3303650617599487, "learning_rate": 9.82608695652174e-06, "loss": 39.6685, "step": 9412 }, { "epoch": 224.11940298507463, "grad_norm": 1.3427590131759644, "learning_rate": 9.82505175983437e-06, "loss": 41.5359, "step": 9413 }, { "epoch": 224.14328358208957, "grad_norm": 1.3307251930236816, "learning_rate": 9.824016563146999e-06, "loss": 39.1274, "step": 9414 }, { "epoch": 224.16716417910447, "grad_norm": 1.3369216918945312, "learning_rate": 9.822981366459628e-06, "loss": 40.2033, "step": 9415 }, { "epoch": 224.1910447761194, "grad_norm": 1.3735458850860596, "learning_rate": 9.821946169772258e-06, "loss": 39.8309, "step": 9416 }, { "epoch": 224.21492537313432, "grad_norm": 1.3338803052902222, "learning_rate": 9.820910973084887e-06, "loss": 39.7462, "step": 9417 }, { "epoch": 224.23880597014926, "grad_norm": 1.3553518056869507, "learning_rate": 9.819875776397517e-06, "loss": 40.0705, "step": 9418 }, { "epoch": 224.26268656716417, "grad_norm": 1.3575600385665894, "learning_rate": 9.818840579710146e-06, "loss": 40.3958, "step": 9419 }, { "epoch": 224.2865671641791, "grad_norm": 1.3784122467041016, "learning_rate": 9.817805383022774e-06, "loss": 39.4826, "step": 9420 }, { "epoch": 224.31044776119404, "grad_norm": 1.3317824602127075, "learning_rate": 9.816770186335405e-06, "loss": 40.3363, "step": 9421 }, { "epoch": 224.33432835820895, "grad_norm": 1.3549480438232422, "learning_rate": 9.815734989648033e-06, "loss": 38.9411, "step": 9422 }, { "epoch": 224.3582089552239, "grad_norm": 1.3351539373397827, "learning_rate": 9.814699792960663e-06, "loss": 39.4817, "step": 9423 }, { "epoch": 224.3820895522388, "grad_norm": 1.3238543272018433, "learning_rate": 9.813664596273292e-06, "loss": 39.779, "step": 9424 }, { "epoch": 224.40597014925373, "grad_norm": 1.3776406049728394, "learning_rate": 9.812629399585922e-06, "loss": 39.7846, "step": 9425 }, { "epoch": 224.42985074626867, "grad_norm": 1.3349812030792236, "learning_rate": 9.811594202898551e-06, "loss": 38.9155, "step": 9426 }, { "epoch": 224.45373134328358, "grad_norm": 1.3931225538253784, "learning_rate": 9.810559006211181e-06, "loss": 39.1672, "step": 9427 }, { "epoch": 224.47761194029852, "grad_norm": 1.3243671655654907, "learning_rate": 9.80952380952381e-06, "loss": 38.8406, "step": 9428 }, { "epoch": 224.50149253731342, "grad_norm": 1.3782374858856201, "learning_rate": 9.80848861283644e-06, "loss": 39.6976, "step": 9429 }, { "epoch": 224.52537313432836, "grad_norm": 1.3303310871124268, "learning_rate": 9.80745341614907e-06, "loss": 39.9401, "step": 9430 }, { "epoch": 224.54925373134327, "grad_norm": 1.3791791200637817, "learning_rate": 9.806418219461699e-06, "loss": 40.1488, "step": 9431 }, { "epoch": 224.5731343283582, "grad_norm": 1.356484293937683, "learning_rate": 9.805383022774327e-06, "loss": 39.4404, "step": 9432 }, { "epoch": 224.59701492537314, "grad_norm": 1.3540223836898804, "learning_rate": 9.804347826086958e-06, "loss": 38.5442, "step": 9433 }, { "epoch": 224.62089552238805, "grad_norm": 1.4389640092849731, "learning_rate": 9.803312629399588e-06, "loss": 39.6924, "step": 9434 }, { "epoch": 224.644776119403, "grad_norm": 1.5208790302276611, "learning_rate": 9.802277432712215e-06, "loss": 39.1476, "step": 9435 }, { "epoch": 224.6686567164179, "grad_norm": 1.3979154825210571, "learning_rate": 9.801242236024847e-06, "loss": 39.754, "step": 9436 }, { "epoch": 224.69253731343284, "grad_norm": 1.4714206457138062, "learning_rate": 9.800207039337474e-06, "loss": 41.1398, "step": 9437 }, { "epoch": 224.71641791044777, "grad_norm": 1.3452849388122559, "learning_rate": 9.799171842650104e-06, "loss": 39.8695, "step": 9438 }, { "epoch": 224.74029850746268, "grad_norm": 1.3917577266693115, "learning_rate": 9.798136645962734e-06, "loss": 39.3407, "step": 9439 }, { "epoch": 224.76417910447762, "grad_norm": 1.4335042238235474, "learning_rate": 9.797101449275363e-06, "loss": 40.5122, "step": 9440 }, { "epoch": 224.78805970149253, "grad_norm": 1.4221899509429932, "learning_rate": 9.796066252587993e-06, "loss": 38.8758, "step": 9441 }, { "epoch": 224.81194029850747, "grad_norm": 1.3371795415878296, "learning_rate": 9.795031055900622e-06, "loss": 41.6119, "step": 9442 }, { "epoch": 224.83582089552237, "grad_norm": 1.3698498010635376, "learning_rate": 9.793995859213252e-06, "loss": 40.8594, "step": 9443 }, { "epoch": 224.8597014925373, "grad_norm": 1.3486757278442383, "learning_rate": 9.792960662525881e-06, "loss": 40.4485, "step": 9444 }, { "epoch": 224.88358208955225, "grad_norm": 1.3386114835739136, "learning_rate": 9.79192546583851e-06, "loss": 40.4258, "step": 9445 }, { "epoch": 224.90746268656716, "grad_norm": 1.3274073600769043, "learning_rate": 9.79089026915114e-06, "loss": 39.3588, "step": 9446 }, { "epoch": 224.9313432835821, "grad_norm": 1.3853342533111572, "learning_rate": 9.789855072463768e-06, "loss": 39.5551, "step": 9447 }, { "epoch": 224.955223880597, "grad_norm": 1.3303059339523315, "learning_rate": 9.7888198757764e-06, "loss": 39.5977, "step": 9448 }, { "epoch": 224.97910447761194, "grad_norm": 1.4199415445327759, "learning_rate": 9.787784679089027e-06, "loss": 39.7217, "step": 9449 }, { "epoch": 225.0, "grad_norm": 1.3712670803070068, "learning_rate": 9.786749482401657e-06, "loss": 34.68, "step": 9450 }, { "epoch": 225.02388059701494, "grad_norm": 1.3518726825714111, "learning_rate": 9.785714285714286e-06, "loss": 38.6858, "step": 9451 }, { "epoch": 225.04776119402985, "grad_norm": 1.3356751203536987, "learning_rate": 9.784679089026916e-06, "loss": 39.7481, "step": 9452 }, { "epoch": 225.07164179104478, "grad_norm": 1.3628078699111938, "learning_rate": 9.783643892339545e-06, "loss": 40.811, "step": 9453 }, { "epoch": 225.0955223880597, "grad_norm": 1.3637073040008545, "learning_rate": 9.782608695652175e-06, "loss": 38.6023, "step": 9454 }, { "epoch": 225.11940298507463, "grad_norm": 1.3605315685272217, "learning_rate": 9.781573498964804e-06, "loss": 39.6238, "step": 9455 }, { "epoch": 225.14328358208957, "grad_norm": 1.363352656364441, "learning_rate": 9.780538302277434e-06, "loss": 40.8142, "step": 9456 }, { "epoch": 225.16716417910447, "grad_norm": 1.383967399597168, "learning_rate": 9.779503105590062e-06, "loss": 38.5884, "step": 9457 }, { "epoch": 225.1910447761194, "grad_norm": 1.3668333292007446, "learning_rate": 9.778467908902693e-06, "loss": 40.7995, "step": 9458 }, { "epoch": 225.21492537313432, "grad_norm": 1.3279043436050415, "learning_rate": 9.77743271221532e-06, "loss": 40.133, "step": 9459 }, { "epoch": 225.23880597014926, "grad_norm": 1.5569247007369995, "learning_rate": 9.776397515527952e-06, "loss": 39.4398, "step": 9460 }, { "epoch": 225.26268656716417, "grad_norm": 1.3701328039169312, "learning_rate": 9.77536231884058e-06, "loss": 39.1609, "step": 9461 }, { "epoch": 225.2865671641791, "grad_norm": 1.36750328540802, "learning_rate": 9.77432712215321e-06, "loss": 39.5907, "step": 9462 }, { "epoch": 225.31044776119404, "grad_norm": 1.3398995399475098, "learning_rate": 9.77329192546584e-06, "loss": 38.9433, "step": 9463 }, { "epoch": 225.33432835820895, "grad_norm": 1.3384202718734741, "learning_rate": 9.772256728778468e-06, "loss": 39.6315, "step": 9464 }, { "epoch": 225.3582089552239, "grad_norm": 1.3646833896636963, "learning_rate": 9.771221532091098e-06, "loss": 39.9842, "step": 9465 }, { "epoch": 225.3820895522388, "grad_norm": 1.3638331890106201, "learning_rate": 9.770186335403727e-06, "loss": 40.0879, "step": 9466 }, { "epoch": 225.40597014925373, "grad_norm": 1.3068257570266724, "learning_rate": 9.769151138716357e-06, "loss": 39.3211, "step": 9467 }, { "epoch": 225.42985074626867, "grad_norm": 1.377228856086731, "learning_rate": 9.768115942028986e-06, "loss": 40.4536, "step": 9468 }, { "epoch": 225.45373134328358, "grad_norm": NaN, "learning_rate": 9.767080745341616e-06, "loss": 33.7706, "step": 9469 }, { "epoch": 225.47761194029852, "grad_norm": 1.3635485172271729, "learning_rate": 9.767080745341616e-06, "loss": 39.846, "step": 9470 }, { "epoch": 225.50149253731342, "grad_norm": 1.3453335762023926, "learning_rate": 9.766045548654246e-06, "loss": 40.1681, "step": 9471 }, { "epoch": 225.52537313432836, "grad_norm": 1.3729277849197388, "learning_rate": 9.765010351966875e-06, "loss": 40.1945, "step": 9472 }, { "epoch": 225.54925373134327, "grad_norm": 1.3858537673950195, "learning_rate": 9.763975155279503e-06, "loss": 40.4992, "step": 9473 }, { "epoch": 225.5731343283582, "grad_norm": 1.4328856468200684, "learning_rate": 9.762939958592134e-06, "loss": 39.2575, "step": 9474 }, { "epoch": 225.59701492537314, "grad_norm": 1.3540445566177368, "learning_rate": 9.761904761904762e-06, "loss": 39.6057, "step": 9475 }, { "epoch": 225.62089552238805, "grad_norm": 1.3199269771575928, "learning_rate": 9.760869565217393e-06, "loss": 39.9042, "step": 9476 }, { "epoch": 225.644776119403, "grad_norm": 1.3895601034164429, "learning_rate": 9.759834368530021e-06, "loss": 40.6058, "step": 9477 }, { "epoch": 225.6686567164179, "grad_norm": 1.3236387968063354, "learning_rate": 9.75879917184265e-06, "loss": 40.6913, "step": 9478 }, { "epoch": 225.69253731343284, "grad_norm": 1.3146543502807617, "learning_rate": 9.75776397515528e-06, "loss": 39.6412, "step": 9479 }, { "epoch": 225.71641791044777, "grad_norm": 1.3559216260910034, "learning_rate": 9.75672877846791e-06, "loss": 39.6608, "step": 9480 }, { "epoch": 225.74029850746268, "grad_norm": 1.4100208282470703, "learning_rate": 9.755693581780539e-06, "loss": 38.2881, "step": 9481 }, { "epoch": 225.76417910447762, "grad_norm": 1.3736448287963867, "learning_rate": 9.754658385093169e-06, "loss": 39.5105, "step": 9482 }, { "epoch": 225.78805970149253, "grad_norm": 1.3664913177490234, "learning_rate": 9.753623188405798e-06, "loss": 38.1069, "step": 9483 }, { "epoch": 225.81194029850747, "grad_norm": 1.3691805601119995, "learning_rate": 9.752587991718428e-06, "loss": 40.9059, "step": 9484 }, { "epoch": 225.83582089552237, "grad_norm": 1.3579801321029663, "learning_rate": 9.751552795031056e-06, "loss": 40.2295, "step": 9485 }, { "epoch": 225.8597014925373, "grad_norm": 1.3696695566177368, "learning_rate": 9.750517598343687e-06, "loss": 40.0829, "step": 9486 }, { "epoch": 225.88358208955225, "grad_norm": 1.3515516519546509, "learning_rate": 9.749482401656315e-06, "loss": 40.7676, "step": 9487 }, { "epoch": 225.90746268656716, "grad_norm": 1.372778058052063, "learning_rate": 9.748447204968944e-06, "loss": 40.6155, "step": 9488 }, { "epoch": 225.9313432835821, "grad_norm": 1.3612407445907593, "learning_rate": 9.747412008281574e-06, "loss": 39.7877, "step": 9489 }, { "epoch": 225.955223880597, "grad_norm": 1.3391904830932617, "learning_rate": 9.746376811594203e-06, "loss": 40.2454, "step": 9490 }, { "epoch": 225.97910447761194, "grad_norm": 1.3563731908798218, "learning_rate": 9.745341614906834e-06, "loss": 39.3416, "step": 9491 }, { "epoch": 226.0, "grad_norm": 1.3340318202972412, "learning_rate": 9.744306418219462e-06, "loss": 35.6402, "step": 9492 }, { "epoch": 226.02388059701494, "grad_norm": 1.3596111536026, "learning_rate": 9.743271221532092e-06, "loss": 40.2947, "step": 9493 }, { "epoch": 226.04776119402985, "grad_norm": 1.3447941541671753, "learning_rate": 9.742236024844721e-06, "loss": 40.346, "step": 9494 }, { "epoch": 226.07164179104478, "grad_norm": 1.3512399196624756, "learning_rate": 9.74120082815735e-06, "loss": 39.9739, "step": 9495 }, { "epoch": 226.0955223880597, "grad_norm": 1.3226103782653809, "learning_rate": 9.74016563146998e-06, "loss": 40.7569, "step": 9496 }, { "epoch": 226.11940298507463, "grad_norm": 1.3632121086120605, "learning_rate": 9.73913043478261e-06, "loss": 39.787, "step": 9497 }, { "epoch": 226.14328358208957, "grad_norm": 1.3281737565994263, "learning_rate": 9.73809523809524e-06, "loss": 39.0761, "step": 9498 }, { "epoch": 226.16716417910447, "grad_norm": 1.3221168518066406, "learning_rate": 9.737060041407869e-06, "loss": 38.7001, "step": 9499 }, { "epoch": 226.1910447761194, "grad_norm": 1.35329008102417, "learning_rate": 9.736024844720497e-06, "loss": 40.2323, "step": 9500 }, { "epoch": 226.21492537313432, "grad_norm": 1.3375135660171509, "learning_rate": 9.734989648033128e-06, "loss": 40.8118, "step": 9501 }, { "epoch": 226.23880597014926, "grad_norm": 1.3540797233581543, "learning_rate": 9.733954451345756e-06, "loss": 39.5453, "step": 9502 }, { "epoch": 226.26268656716417, "grad_norm": 1.3196207284927368, "learning_rate": 9.732919254658385e-06, "loss": 41.1473, "step": 9503 }, { "epoch": 226.2865671641791, "grad_norm": 1.3396778106689453, "learning_rate": 9.731884057971015e-06, "loss": 40.4079, "step": 9504 }, { "epoch": 226.31044776119404, "grad_norm": 1.3516314029693604, "learning_rate": 9.730848861283644e-06, "loss": 38.8865, "step": 9505 }, { "epoch": 226.33432835820895, "grad_norm": 1.3708477020263672, "learning_rate": 9.729813664596274e-06, "loss": 39.5912, "step": 9506 }, { "epoch": 226.3582089552239, "grad_norm": 1.3488454818725586, "learning_rate": 9.728778467908903e-06, "loss": 40.0335, "step": 9507 }, { "epoch": 226.3820895522388, "grad_norm": 1.349735140800476, "learning_rate": 9.727743271221533e-06, "loss": 39.0833, "step": 9508 }, { "epoch": 226.40597014925373, "grad_norm": 1.3379067182540894, "learning_rate": 9.726708074534163e-06, "loss": 40.9578, "step": 9509 }, { "epoch": 226.42985074626867, "grad_norm": 1.3956648111343384, "learning_rate": 9.725672877846792e-06, "loss": 39.1091, "step": 9510 }, { "epoch": 226.45373134328358, "grad_norm": 1.3431787490844727, "learning_rate": 9.724637681159422e-06, "loss": 40.4197, "step": 9511 }, { "epoch": 226.47761194029852, "grad_norm": 1.309966802597046, "learning_rate": 9.72360248447205e-06, "loss": 39.6472, "step": 9512 }, { "epoch": 226.50149253731342, "grad_norm": 1.3312275409698486, "learning_rate": 9.72256728778468e-06, "loss": 39.4615, "step": 9513 }, { "epoch": 226.52537313432836, "grad_norm": 1.4223321676254272, "learning_rate": 9.721532091097308e-06, "loss": 40.649, "step": 9514 }, { "epoch": 226.54925373134327, "grad_norm": 1.3536654710769653, "learning_rate": 9.720496894409938e-06, "loss": 39.8109, "step": 9515 }, { "epoch": 226.5731343283582, "grad_norm": 1.3447026014328003, "learning_rate": 9.719461697722568e-06, "loss": 40.9862, "step": 9516 }, { "epoch": 226.59701492537314, "grad_norm": 1.3681148290634155, "learning_rate": 9.718426501035197e-06, "loss": 38.0596, "step": 9517 }, { "epoch": 226.62089552238805, "grad_norm": 1.3972299098968506, "learning_rate": 9.717391304347827e-06, "loss": 38.6584, "step": 9518 }, { "epoch": 226.644776119403, "grad_norm": 1.6811648607254028, "learning_rate": 9.716356107660456e-06, "loss": 40.8198, "step": 9519 }, { "epoch": 226.6686567164179, "grad_norm": 1.3388177156448364, "learning_rate": 9.715320910973086e-06, "loss": 40.5085, "step": 9520 }, { "epoch": 226.69253731343284, "grad_norm": NaN, "learning_rate": 9.714285714285715e-06, "loss": 42.7339, "step": 9521 }, { "epoch": 226.71641791044777, "grad_norm": 1.3415876626968384, "learning_rate": 9.714285714285715e-06, "loss": 40.3695, "step": 9522 }, { "epoch": 226.74029850746268, "grad_norm": 1.3391313552856445, "learning_rate": 9.713250517598345e-06, "loss": 40.2331, "step": 9523 }, { "epoch": 226.76417910447762, "grad_norm": 1.3154429197311401, "learning_rate": 9.712215320910974e-06, "loss": 40.5593, "step": 9524 }, { "epoch": 226.78805970149253, "grad_norm": 1.3965178728103638, "learning_rate": 9.711180124223604e-06, "loss": 39.0623, "step": 9525 }, { "epoch": 226.81194029850747, "grad_norm": 1.3359781503677368, "learning_rate": 9.710144927536233e-06, "loss": 40.4625, "step": 9526 }, { "epoch": 226.83582089552237, "grad_norm": 1.377581238746643, "learning_rate": 9.709109730848863e-06, "loss": 38.603, "step": 9527 }, { "epoch": 226.8597014925373, "grad_norm": 1.3465344905853271, "learning_rate": 9.70807453416149e-06, "loss": 39.934, "step": 9528 }, { "epoch": 226.88358208955225, "grad_norm": 1.3660857677459717, "learning_rate": 9.707039337474122e-06, "loss": 39.6095, "step": 9529 }, { "epoch": 226.90746268656716, "grad_norm": 1.3820102214813232, "learning_rate": 9.70600414078675e-06, "loss": 39.1692, "step": 9530 }, { "epoch": 226.9313432835821, "grad_norm": 1.3818198442459106, "learning_rate": 9.70496894409938e-06, "loss": 38.9501, "step": 9531 }, { "epoch": 226.955223880597, "grad_norm": 1.3291231393814087, "learning_rate": 9.703933747412009e-06, "loss": 38.4439, "step": 9532 }, { "epoch": 226.97910447761194, "grad_norm": 1.3103328943252563, "learning_rate": 9.702898550724638e-06, "loss": 40.0354, "step": 9533 }, { "epoch": 227.0, "grad_norm": 1.3655407428741455, "learning_rate": 9.701863354037268e-06, "loss": 34.9032, "step": 9534 }, { "epoch": 227.02388059701494, "grad_norm": 1.3328245878219604, "learning_rate": 9.700828157349897e-06, "loss": 40.2347, "step": 9535 }, { "epoch": 227.04776119402985, "grad_norm": 1.3490464687347412, "learning_rate": 9.699792960662527e-06, "loss": 40.3265, "step": 9536 }, { "epoch": 227.07164179104478, "grad_norm": 1.3231041431427002, "learning_rate": 9.698757763975156e-06, "loss": 41.0236, "step": 9537 }, { "epoch": 227.0955223880597, "grad_norm": 1.3664665222167969, "learning_rate": 9.697722567287784e-06, "loss": 39.764, "step": 9538 }, { "epoch": 227.11940298507463, "grad_norm": 1.3314025402069092, "learning_rate": 9.696687370600415e-06, "loss": 40.1923, "step": 9539 }, { "epoch": 227.14328358208957, "grad_norm": 1.3573304414749146, "learning_rate": 9.695652173913043e-06, "loss": 40.9041, "step": 9540 }, { "epoch": 227.16716417910447, "grad_norm": 1.3255659341812134, "learning_rate": 9.694616977225675e-06, "loss": 39.7282, "step": 9541 }, { "epoch": 227.1910447761194, "grad_norm": 1.3973102569580078, "learning_rate": 9.693581780538302e-06, "loss": 40.219, "step": 9542 }, { "epoch": 227.21492537313432, "grad_norm": 1.3499119281768799, "learning_rate": 9.692546583850932e-06, "loss": 39.5622, "step": 9543 }, { "epoch": 227.23880597014926, "grad_norm": 1.3315156698226929, "learning_rate": 9.691511387163561e-06, "loss": 38.6911, "step": 9544 }, { "epoch": 227.26268656716417, "grad_norm": 1.4629582166671753, "learning_rate": 9.690476190476191e-06, "loss": 39.6541, "step": 9545 }, { "epoch": 227.2865671641791, "grad_norm": 1.3431901931762695, "learning_rate": 9.68944099378882e-06, "loss": 40.8565, "step": 9546 }, { "epoch": 227.31044776119404, "grad_norm": 1.3500856161117554, "learning_rate": 9.68840579710145e-06, "loss": 40.2982, "step": 9547 }, { "epoch": 227.33432835820895, "grad_norm": 1.3292080163955688, "learning_rate": 9.68737060041408e-06, "loss": 40.8223, "step": 9548 }, { "epoch": 227.3582089552239, "grad_norm": 1.315557837486267, "learning_rate": 9.686335403726709e-06, "loss": 40.1859, "step": 9549 }, { "epoch": 227.3820895522388, "grad_norm": 1.3508491516113281, "learning_rate": 9.685300207039339e-06, "loss": 39.6468, "step": 9550 }, { "epoch": 227.40597014925373, "grad_norm": 1.362717628479004, "learning_rate": 9.684265010351968e-06, "loss": 39.377, "step": 9551 }, { "epoch": 227.42985074626867, "grad_norm": 1.3474128246307373, "learning_rate": 9.683229813664598e-06, "loss": 39.7749, "step": 9552 }, { "epoch": 227.45373134328358, "grad_norm": 1.3699764013290405, "learning_rate": 9.682194616977225e-06, "loss": 39.6095, "step": 9553 }, { "epoch": 227.47761194029852, "grad_norm": 1.351963758468628, "learning_rate": 9.681159420289857e-06, "loss": 39.6524, "step": 9554 }, { "epoch": 227.50149253731342, "grad_norm": 1.3547236919403076, "learning_rate": 9.680124223602485e-06, "loss": 40.1133, "step": 9555 }, { "epoch": 227.52537313432836, "grad_norm": 1.3331493139266968, "learning_rate": 9.679089026915116e-06, "loss": 38.7164, "step": 9556 }, { "epoch": 227.54925373134327, "grad_norm": 1.3639264106750488, "learning_rate": 9.678053830227744e-06, "loss": 40.8413, "step": 9557 }, { "epoch": 227.5731343283582, "grad_norm": 1.3239200115203857, "learning_rate": 9.677018633540373e-06, "loss": 40.1076, "step": 9558 }, { "epoch": 227.59701492537314, "grad_norm": 1.3183846473693848, "learning_rate": 9.675983436853003e-06, "loss": 40.3845, "step": 9559 }, { "epoch": 227.62089552238805, "grad_norm": 1.384840726852417, "learning_rate": 9.674948240165632e-06, "loss": 39.0537, "step": 9560 }, { "epoch": 227.644776119403, "grad_norm": 1.358710527420044, "learning_rate": 9.673913043478262e-06, "loss": 38.4107, "step": 9561 }, { "epoch": 227.6686567164179, "grad_norm": 1.333219289779663, "learning_rate": 9.672877846790891e-06, "loss": 39.7813, "step": 9562 }, { "epoch": 227.69253731343284, "grad_norm": 1.3333373069763184, "learning_rate": 9.67184265010352e-06, "loss": 39.5559, "step": 9563 }, { "epoch": 227.71641791044777, "grad_norm": 1.3581284284591675, "learning_rate": 9.67080745341615e-06, "loss": 39.0284, "step": 9564 }, { "epoch": 227.74029850746268, "grad_norm": 1.348687767982483, "learning_rate": 9.669772256728778e-06, "loss": 39.7929, "step": 9565 }, { "epoch": 227.76417910447762, "grad_norm": 1.3237048387527466, "learning_rate": 9.66873706004141e-06, "loss": 40.1222, "step": 9566 }, { "epoch": 227.78805970149253, "grad_norm": 1.3636786937713623, "learning_rate": 9.667701863354037e-06, "loss": 39.7174, "step": 9567 }, { "epoch": 227.81194029850747, "grad_norm": 1.3614082336425781, "learning_rate": 9.666666666666667e-06, "loss": 40.0849, "step": 9568 }, { "epoch": 227.83582089552237, "grad_norm": 1.3428670167922974, "learning_rate": 9.665631469979296e-06, "loss": 39.7408, "step": 9569 }, { "epoch": 227.8597014925373, "grad_norm": 1.3102476596832275, "learning_rate": 9.664596273291926e-06, "loss": 39.2901, "step": 9570 }, { "epoch": 227.88358208955225, "grad_norm": 1.35796320438385, "learning_rate": 9.663561076604555e-06, "loss": 39.8195, "step": 9571 }, { "epoch": 227.90746268656716, "grad_norm": 1.3382140398025513, "learning_rate": 9.662525879917185e-06, "loss": 39.5337, "step": 9572 }, { "epoch": 227.9313432835821, "grad_norm": 1.3213368654251099, "learning_rate": 9.661490683229814e-06, "loss": 38.6362, "step": 9573 }, { "epoch": 227.955223880597, "grad_norm": 1.3681060075759888, "learning_rate": 9.660455486542444e-06, "loss": 39.4632, "step": 9574 }, { "epoch": 227.97910447761194, "grad_norm": 1.3684992790222168, "learning_rate": 9.659420289855073e-06, "loss": 39.3785, "step": 9575 }, { "epoch": 228.0, "grad_norm": 1.327043056488037, "learning_rate": 9.658385093167703e-06, "loss": 34.7685, "step": 9576 }, { "epoch": 228.02388059701494, "grad_norm": 1.3496659994125366, "learning_rate": 9.657349896480332e-06, "loss": 39.8264, "step": 9577 }, { "epoch": 228.04776119402985, "grad_norm": 1.4417728185653687, "learning_rate": 9.656314699792962e-06, "loss": 39.711, "step": 9578 }, { "epoch": 228.07164179104478, "grad_norm": 1.338230848312378, "learning_rate": 9.655279503105592e-06, "loss": 40.0961, "step": 9579 }, { "epoch": 228.0955223880597, "grad_norm": 1.3586984872817993, "learning_rate": 9.65424430641822e-06, "loss": 39.2301, "step": 9580 }, { "epoch": 228.11940298507463, "grad_norm": 1.3672696352005005, "learning_rate": 9.65320910973085e-06, "loss": 38.9523, "step": 9581 }, { "epoch": 228.14328358208957, "grad_norm": 1.322885274887085, "learning_rate": 9.652173913043478e-06, "loss": 39.5072, "step": 9582 }, { "epoch": 228.16716417910447, "grad_norm": 1.3692727088928223, "learning_rate": 9.651138716356108e-06, "loss": 39.8186, "step": 9583 }, { "epoch": 228.1910447761194, "grad_norm": 1.3611680269241333, "learning_rate": 9.650103519668737e-06, "loss": 39.4437, "step": 9584 }, { "epoch": 228.21492537313432, "grad_norm": 1.3301541805267334, "learning_rate": 9.649068322981367e-06, "loss": 38.5667, "step": 9585 }, { "epoch": 228.23880597014926, "grad_norm": 1.3049309253692627, "learning_rate": 9.648033126293997e-06, "loss": 40.4449, "step": 9586 }, { "epoch": 228.26268656716417, "grad_norm": 1.3288193941116333, "learning_rate": 9.646997929606626e-06, "loss": 40.3638, "step": 9587 }, { "epoch": 228.2865671641791, "grad_norm": 1.3339934349060059, "learning_rate": 9.645962732919256e-06, "loss": 40.4632, "step": 9588 }, { "epoch": 228.31044776119404, "grad_norm": 1.3482518196105957, "learning_rate": 9.644927536231885e-06, "loss": 39.488, "step": 9589 }, { "epoch": 228.33432835820895, "grad_norm": 1.3517035245895386, "learning_rate": 9.643892339544515e-06, "loss": 40.4888, "step": 9590 }, { "epoch": 228.3582089552239, "grad_norm": 1.3346763849258423, "learning_rate": 9.642857142857144e-06, "loss": 39.0247, "step": 9591 }, { "epoch": 228.3820895522388, "grad_norm": 1.3066086769104004, "learning_rate": 9.641821946169772e-06, "loss": 40.5208, "step": 9592 }, { "epoch": 228.40597014925373, "grad_norm": 1.3271929025650024, "learning_rate": 9.640786749482403e-06, "loss": 40.1073, "step": 9593 }, { "epoch": 228.42985074626867, "grad_norm": 1.3825958967208862, "learning_rate": 9.639751552795031e-06, "loss": 39.1985, "step": 9594 }, { "epoch": 228.45373134328358, "grad_norm": 1.3625894784927368, "learning_rate": 9.63871635610766e-06, "loss": 40.3474, "step": 9595 }, { "epoch": 228.47761194029852, "grad_norm": 1.3357809782028198, "learning_rate": 9.63768115942029e-06, "loss": 40.0981, "step": 9596 }, { "epoch": 228.50149253731342, "grad_norm": 1.37427818775177, "learning_rate": 9.63664596273292e-06, "loss": 39.8691, "step": 9597 }, { "epoch": 228.52537313432836, "grad_norm": NaN, "learning_rate": 9.63561076604555e-06, "loss": 39.646, "step": 9598 }, { "epoch": 228.54925373134327, "grad_norm": 1.3784396648406982, "learning_rate": 9.63561076604555e-06, "loss": 39.312, "step": 9599 }, { "epoch": 228.5731343283582, "grad_norm": 1.38670015335083, "learning_rate": 9.634575569358179e-06, "loss": 39.5067, "step": 9600 }, { "epoch": 228.59701492537314, "grad_norm": 1.345752477645874, "learning_rate": 9.633540372670808e-06, "loss": 40.2929, "step": 9601 }, { "epoch": 228.62089552238805, "grad_norm": 1.3767436742782593, "learning_rate": 9.632505175983438e-06, "loss": 39.7058, "step": 9602 }, { "epoch": 228.644776119403, "grad_norm": 1.362173080444336, "learning_rate": 9.631469979296067e-06, "loss": 40.1101, "step": 9603 }, { "epoch": 228.6686567164179, "grad_norm": 1.356995701789856, "learning_rate": 9.630434782608697e-06, "loss": 39.8145, "step": 9604 }, { "epoch": 228.69253731343284, "grad_norm": 1.3331892490386963, "learning_rate": 9.629399585921326e-06, "loss": 37.9391, "step": 9605 }, { "epoch": 228.71641791044777, "grad_norm": 1.3248459100723267, "learning_rate": 9.628364389233956e-06, "loss": 39.406, "step": 9606 }, { "epoch": 228.74029850746268, "grad_norm": 1.3589906692504883, "learning_rate": 9.627329192546585e-06, "loss": 41.0822, "step": 9607 }, { "epoch": 228.76417910447762, "grad_norm": 1.331457495689392, "learning_rate": 9.626293995859213e-06, "loss": 40.6817, "step": 9608 }, { "epoch": 228.78805970149253, "grad_norm": 1.3475526571273804, "learning_rate": 9.625258799171844e-06, "loss": 41.0865, "step": 9609 }, { "epoch": 228.81194029850747, "grad_norm": 1.365019679069519, "learning_rate": 9.624223602484472e-06, "loss": 39.888, "step": 9610 }, { "epoch": 228.83582089552237, "grad_norm": 1.3595030307769775, "learning_rate": 9.623188405797102e-06, "loss": 38.7912, "step": 9611 }, { "epoch": 228.8597014925373, "grad_norm": 1.3471099138259888, "learning_rate": 9.622153209109731e-06, "loss": 38.5168, "step": 9612 }, { "epoch": 228.88358208955225, "grad_norm": 1.33576238155365, "learning_rate": 9.621118012422361e-06, "loss": 39.6171, "step": 9613 }, { "epoch": 228.90746268656716, "grad_norm": NaN, "learning_rate": 9.62008281573499e-06, "loss": 45.9485, "step": 9614 }, { "epoch": 228.9313432835821, "grad_norm": 1.362033486366272, "learning_rate": 9.62008281573499e-06, "loss": 40.2318, "step": 9615 }, { "epoch": 228.955223880597, "grad_norm": 1.385781168937683, "learning_rate": 9.61904761904762e-06, "loss": 39.5371, "step": 9616 }, { "epoch": 228.97910447761194, "grad_norm": 1.3362168073654175, "learning_rate": 9.61801242236025e-06, "loss": 39.9066, "step": 9617 }, { "epoch": 229.0, "grad_norm": 1.341375708580017, "learning_rate": 9.616977225672879e-06, "loss": 34.8441, "step": 9618 }, { "epoch": 229.02388059701494, "grad_norm": 1.3617026805877686, "learning_rate": 9.615942028985507e-06, "loss": 38.4933, "step": 9619 }, { "epoch": 229.04776119402985, "grad_norm": 1.3677945137023926, "learning_rate": 9.614906832298138e-06, "loss": 39.5071, "step": 9620 }, { "epoch": 229.07164179104478, "grad_norm": 1.3386653661727905, "learning_rate": 9.613871635610766e-06, "loss": 39.656, "step": 9621 }, { "epoch": 229.0955223880597, "grad_norm": 1.3384205102920532, "learning_rate": 9.612836438923397e-06, "loss": 39.6503, "step": 9622 }, { "epoch": 229.11940298507463, "grad_norm": 1.3204118013381958, "learning_rate": 9.611801242236025e-06, "loss": 40.2757, "step": 9623 }, { "epoch": 229.14328358208957, "grad_norm": 1.3410823345184326, "learning_rate": 9.610766045548654e-06, "loss": 39.8907, "step": 9624 }, { "epoch": 229.16716417910447, "grad_norm": 1.3400824069976807, "learning_rate": 9.609730848861284e-06, "loss": 39.1518, "step": 9625 }, { "epoch": 229.1910447761194, "grad_norm": 1.3465919494628906, "learning_rate": 9.608695652173914e-06, "loss": 40.5757, "step": 9626 }, { "epoch": 229.21492537313432, "grad_norm": 1.3479182720184326, "learning_rate": 9.607660455486543e-06, "loss": 40.214, "step": 9627 }, { "epoch": 229.23880597014926, "grad_norm": 1.3469041585922241, "learning_rate": 9.606625258799173e-06, "loss": 40.8142, "step": 9628 }, { "epoch": 229.26268656716417, "grad_norm": 1.3487439155578613, "learning_rate": 9.605590062111802e-06, "loss": 40.448, "step": 9629 }, { "epoch": 229.2865671641791, "grad_norm": 1.3353657722473145, "learning_rate": 9.604554865424432e-06, "loss": 39.1099, "step": 9630 }, { "epoch": 229.31044776119404, "grad_norm": NaN, "learning_rate": 9.603519668737061e-06, "loss": 54.7709, "step": 9631 }, { "epoch": 229.33432835820895, "grad_norm": 1.370018482208252, "learning_rate": 9.603519668737061e-06, "loss": 38.8651, "step": 9632 }, { "epoch": 229.3582089552239, "grad_norm": 1.3512285947799683, "learning_rate": 9.60248447204969e-06, "loss": 40.2204, "step": 9633 }, { "epoch": 229.3820895522388, "grad_norm": 1.356023907661438, "learning_rate": 9.60144927536232e-06, "loss": 39.9136, "step": 9634 }, { "epoch": 229.40597014925373, "grad_norm": 1.3658312559127808, "learning_rate": 9.600414078674948e-06, "loss": 38.9349, "step": 9635 }, { "epoch": 229.42985074626867, "grad_norm": 1.3770402669906616, "learning_rate": 9.59937888198758e-06, "loss": 39.7168, "step": 9636 }, { "epoch": 229.45373134328358, "grad_norm": 1.3463616371154785, "learning_rate": 9.598343685300207e-06, "loss": 39.9859, "step": 9637 }, { "epoch": 229.47761194029852, "grad_norm": 1.349575161933899, "learning_rate": 9.597308488612838e-06, "loss": 39.9547, "step": 9638 }, { "epoch": 229.50149253731342, "grad_norm": 1.3597240447998047, "learning_rate": 9.596273291925466e-06, "loss": 40.4211, "step": 9639 }, { "epoch": 229.52537313432836, "grad_norm": 1.3554342985153198, "learning_rate": 9.595238095238096e-06, "loss": 39.9895, "step": 9640 }, { "epoch": 229.54925373134327, "grad_norm": 1.3045212030410767, "learning_rate": 9.594202898550725e-06, "loss": 39.9614, "step": 9641 }, { "epoch": 229.5731343283582, "grad_norm": 1.3330425024032593, "learning_rate": 9.593167701863355e-06, "loss": 39.3869, "step": 9642 }, { "epoch": 229.59701492537314, "grad_norm": 1.3166472911834717, "learning_rate": 9.592132505175984e-06, "loss": 40.0115, "step": 9643 }, { "epoch": 229.62089552238805, "grad_norm": 1.301077961921692, "learning_rate": 9.591097308488614e-06, "loss": 39.1683, "step": 9644 }, { "epoch": 229.644776119403, "grad_norm": 1.3683960437774658, "learning_rate": 9.590062111801243e-06, "loss": 40.3337, "step": 9645 }, { "epoch": 229.6686567164179, "grad_norm": 1.312597393989563, "learning_rate": 9.589026915113873e-06, "loss": 39.0743, "step": 9646 }, { "epoch": 229.69253731343284, "grad_norm": 1.3745251893997192, "learning_rate": 9.5879917184265e-06, "loss": 40.3529, "step": 9647 }, { "epoch": 229.71641791044777, "grad_norm": 1.3252466917037964, "learning_rate": 9.586956521739132e-06, "loss": 39.9985, "step": 9648 }, { "epoch": 229.74029850746268, "grad_norm": 1.3190704584121704, "learning_rate": 9.58592132505176e-06, "loss": 40.2372, "step": 9649 }, { "epoch": 229.76417910447762, "grad_norm": 1.3425483703613281, "learning_rate": 9.58488612836439e-06, "loss": 39.3226, "step": 9650 }, { "epoch": 229.78805970149253, "grad_norm": 1.3766599893569946, "learning_rate": 9.583850931677019e-06, "loss": 39.246, "step": 9651 }, { "epoch": 229.81194029850747, "grad_norm": 1.3367522954940796, "learning_rate": 9.582815734989648e-06, "loss": 39.9684, "step": 9652 }, { "epoch": 229.83582089552237, "grad_norm": 1.3724370002746582, "learning_rate": 9.581780538302278e-06, "loss": 39.7352, "step": 9653 }, { "epoch": 229.8597014925373, "grad_norm": 1.343762755393982, "learning_rate": 9.580745341614907e-06, "loss": 40.2327, "step": 9654 }, { "epoch": 229.88358208955225, "grad_norm": 1.3285309076309204, "learning_rate": 9.579710144927537e-06, "loss": 39.2824, "step": 9655 }, { "epoch": 229.90746268656716, "grad_norm": 1.339512825012207, "learning_rate": 9.578674948240167e-06, "loss": 39.4302, "step": 9656 }, { "epoch": 229.9313432835821, "grad_norm": 1.3920446634292603, "learning_rate": 9.577639751552796e-06, "loss": 38.2276, "step": 9657 }, { "epoch": 229.955223880597, "grad_norm": 1.3495386838912964, "learning_rate": 9.576604554865426e-06, "loss": 39.768, "step": 9658 }, { "epoch": 229.97910447761194, "grad_norm": 1.342163324356079, "learning_rate": 9.575569358178055e-06, "loss": 40.5629, "step": 9659 }, { "epoch": 230.0, "grad_norm": 1.324639081954956, "learning_rate": 9.574534161490685e-06, "loss": 36.2474, "step": 9660 }, { "epoch": 230.0, "step": 9660, "total_flos": 4.784695863242847e+17, "train_loss": 1.7311483258786409, "train_runtime": 12643.4577, "train_samples_per_second": 97.359, "train_steps_per_second": 0.764 }, { "epoch": 230.02388059701494, "grad_norm": 2.2165043354034424, "learning_rate": 1e-05, "loss": 39.91, "step": 9661 }, { "epoch": 230.04776119402985, "grad_norm": 2.077378988265991, "learning_rate": 9.99904761904762e-06, "loss": 39.9395, "step": 9662 }, { "epoch": 230.07164179104478, "grad_norm": 1.9431217908859253, "learning_rate": 9.998095238095239e-06, "loss": 40.1437, "step": 9663 }, { "epoch": 230.0955223880597, "grad_norm": 1.7516640424728394, "learning_rate": 9.997142857142858e-06, "loss": 39.54, "step": 9664 }, { "epoch": 230.11940298507463, "grad_norm": 1.5745165348052979, "learning_rate": 9.996190476190476e-06, "loss": 40.6148, "step": 9665 }, { "epoch": 230.14328358208957, "grad_norm": 1.4591950178146362, "learning_rate": 9.995238095238095e-06, "loss": 40.5781, "step": 9666 }, { "epoch": 230.16716417910447, "grad_norm": 1.4156869649887085, "learning_rate": 9.994285714285716e-06, "loss": 40.3919, "step": 9667 }, { "epoch": 230.1910447761194, "grad_norm": 1.4844175577163696, "learning_rate": 9.993333333333333e-06, "loss": 39.1642, "step": 9668 }, { "epoch": 230.21492537313432, "grad_norm": 1.401119589805603, "learning_rate": 9.992380952380954e-06, "loss": 39.3483, "step": 9669 }, { "epoch": 230.23880597014926, "grad_norm": 1.4175094366073608, "learning_rate": 9.991428571428573e-06, "loss": 39.1473, "step": 9670 }, { "epoch": 230.26268656716417, "grad_norm": 1.466611385345459, "learning_rate": 9.990476190476191e-06, "loss": 39.7576, "step": 9671 }, { "epoch": 230.2865671641791, "grad_norm": 1.500809669494629, "learning_rate": 9.98952380952381e-06, "loss": 39.614, "step": 9672 }, { "epoch": 230.31044776119404, "grad_norm": 1.5656704902648926, "learning_rate": 9.98857142857143e-06, "loss": 39.7976, "step": 9673 }, { "epoch": 230.33432835820895, "grad_norm": 1.508496880531311, "learning_rate": 9.987619047619048e-06, "loss": 39.5882, "step": 9674 }, { "epoch": 230.3582089552239, "grad_norm": 1.449815273284912, "learning_rate": 9.986666666666667e-06, "loss": 39.7779, "step": 9675 }, { "epoch": 230.3820895522388, "grad_norm": 1.3801860809326172, "learning_rate": 9.985714285714286e-06, "loss": 39.7777, "step": 9676 }, { "epoch": 230.40597014925373, "grad_norm": 1.5229625701904297, "learning_rate": 9.984761904761907e-06, "loss": 39.4329, "step": 9677 }, { "epoch": 230.42985074626867, "grad_norm": 1.3847858905792236, "learning_rate": 9.983809523809524e-06, "loss": 41.0728, "step": 9678 }, { "epoch": 230.45373134328358, "grad_norm": 1.3858407735824585, "learning_rate": 9.982857142857144e-06, "loss": 39.8538, "step": 9679 }, { "epoch": 230.47761194029852, "grad_norm": 1.3731153011322021, "learning_rate": 9.981904761904763e-06, "loss": 39.6677, "step": 9680 }, { "epoch": 230.50149253731342, "grad_norm": 1.3447394371032715, "learning_rate": 9.980952380952382e-06, "loss": 40.8373, "step": 9681 }, { "epoch": 230.52537313432836, "grad_norm": 1.381157398223877, "learning_rate": 9.980000000000001e-06, "loss": 39.1289, "step": 9682 }, { "epoch": 230.54925373134327, "grad_norm": 1.3875540494918823, "learning_rate": 9.97904761904762e-06, "loss": 39.8555, "step": 9683 }, { "epoch": 230.5731343283582, "grad_norm": 1.4050652980804443, "learning_rate": 9.978095238095239e-06, "loss": 39.974, "step": 9684 }, { "epoch": 230.59701492537314, "grad_norm": 1.359092116355896, "learning_rate": 9.977142857142858e-06, "loss": 39.1765, "step": 9685 }, { "epoch": 230.62089552238805, "grad_norm": 1.3525128364562988, "learning_rate": 9.976190476190477e-06, "loss": 39.7759, "step": 9686 }, { "epoch": 230.644776119403, "grad_norm": 1.3912075757980347, "learning_rate": 9.975238095238095e-06, "loss": 40.6655, "step": 9687 }, { "epoch": 230.6686567164179, "grad_norm": 1.4028277397155762, "learning_rate": 9.974285714285716e-06, "loss": 39.508, "step": 9688 }, { "epoch": 230.69253731343284, "grad_norm": 1.3725173473358154, "learning_rate": 9.973333333333333e-06, "loss": 39.0736, "step": 9689 }, { "epoch": 230.71641791044777, "grad_norm": 1.3806560039520264, "learning_rate": 9.972380952380954e-06, "loss": 40.4848, "step": 9690 }, { "epoch": 230.74029850746268, "grad_norm": 1.3471945524215698, "learning_rate": 9.971428571428571e-06, "loss": 39.8429, "step": 9691 }, { "epoch": 230.76417910447762, "grad_norm": 1.3350354433059692, "learning_rate": 9.970476190476192e-06, "loss": 40.3292, "step": 9692 }, { "epoch": 230.78805970149253, "grad_norm": 1.3325014114379883, "learning_rate": 9.96952380952381e-06, "loss": 40.9846, "step": 9693 }, { "epoch": 230.81194029850747, "grad_norm": 1.3361607789993286, "learning_rate": 9.96857142857143e-06, "loss": 38.373, "step": 9694 }, { "epoch": 230.83582089552237, "grad_norm": 1.3524616956710815, "learning_rate": 9.967619047619048e-06, "loss": 40.1905, "step": 9695 }, { "epoch": 230.8597014925373, "grad_norm": 1.4148274660110474, "learning_rate": 9.966666666666667e-06, "loss": 39.3139, "step": 9696 }, { "epoch": 230.88358208955225, "grad_norm": 1.3747048377990723, "learning_rate": 9.965714285714286e-06, "loss": 39.0471, "step": 9697 }, { "epoch": 230.90746268656716, "grad_norm": 1.4271262884140015, "learning_rate": 9.964761904761907e-06, "loss": 40.1761, "step": 9698 }, { "epoch": 230.9313432835821, "grad_norm": 1.5008985996246338, "learning_rate": 9.963809523809524e-06, "loss": 40.5605, "step": 9699 }, { "epoch": 230.955223880597, "grad_norm": 1.3419324159622192, "learning_rate": 9.962857142857145e-06, "loss": 40.9123, "step": 9700 }, { "epoch": 230.97910447761194, "grad_norm": 1.3598048686981201, "learning_rate": 9.961904761904763e-06, "loss": 39.3603, "step": 9701 }, { "epoch": 231.0, "grad_norm": 1.3711756467819214, "learning_rate": 9.960952380952382e-06, "loss": 34.3071, "step": 9702 }, { "epoch": 231.02388059701494, "grad_norm": 1.3833802938461304, "learning_rate": 9.960000000000001e-06, "loss": 40.4697, "step": 9703 }, { "epoch": 231.04776119402985, "grad_norm": 1.3698652982711792, "learning_rate": 9.95904761904762e-06, "loss": 41.0127, "step": 9704 }, { "epoch": 231.07164179104478, "grad_norm": 1.3227285146713257, "learning_rate": 9.958095238095239e-06, "loss": 40.9239, "step": 9705 }, { "epoch": 231.0955223880597, "grad_norm": 1.3637263774871826, "learning_rate": 9.957142857142858e-06, "loss": 39.2752, "step": 9706 }, { "epoch": 231.11940298507463, "grad_norm": 1.3667349815368652, "learning_rate": 9.956190476190477e-06, "loss": 40.2914, "step": 9707 }, { "epoch": 231.14328358208957, "grad_norm": 1.3662141561508179, "learning_rate": 9.955238095238096e-06, "loss": 39.6599, "step": 9708 }, { "epoch": 231.16716417910447, "grad_norm": 1.3711718320846558, "learning_rate": 9.954285714285715e-06, "loss": 39.1501, "step": 9709 }, { "epoch": 231.1910447761194, "grad_norm": 1.387554407119751, "learning_rate": 9.953333333333333e-06, "loss": 40.0788, "step": 9710 }, { "epoch": 231.21492537313432, "grad_norm": 1.4017770290374756, "learning_rate": 9.952380952380954e-06, "loss": 40.028, "step": 9711 }, { "epoch": 231.23880597014926, "grad_norm": 1.3812487125396729, "learning_rate": 9.951428571428571e-06, "loss": 39.8307, "step": 9712 }, { "epoch": 231.26268656716417, "grad_norm": 1.3609212636947632, "learning_rate": 9.950476190476192e-06, "loss": 38.7208, "step": 9713 }, { "epoch": 231.2865671641791, "grad_norm": 1.35236394405365, "learning_rate": 9.94952380952381e-06, "loss": 38.5736, "step": 9714 }, { "epoch": 231.31044776119404, "grad_norm": 1.3419808149337769, "learning_rate": 9.94857142857143e-06, "loss": 40.1127, "step": 9715 }, { "epoch": 231.33432835820895, "grad_norm": 1.3415052890777588, "learning_rate": 9.947619047619049e-06, "loss": 40.5739, "step": 9716 }, { "epoch": 231.3582089552239, "grad_norm": 1.3404567241668701, "learning_rate": 9.946666666666667e-06, "loss": 38.4315, "step": 9717 }, { "epoch": 231.3820895522388, "grad_norm": 1.3025932312011719, "learning_rate": 9.945714285714286e-06, "loss": 40.4073, "step": 9718 }, { "epoch": 231.40597014925373, "grad_norm": 1.341050148010254, "learning_rate": 9.944761904761905e-06, "loss": 39.1742, "step": 9719 }, { "epoch": 231.42985074626867, "grad_norm": 1.307681918144226, "learning_rate": 9.943809523809524e-06, "loss": 38.7751, "step": 9720 }, { "epoch": 231.45373134328358, "grad_norm": 1.468979001045227, "learning_rate": 9.942857142857145e-06, "loss": 38.7507, "step": 9721 }, { "epoch": 231.47761194029852, "grad_norm": 1.3528684377670288, "learning_rate": 9.941904761904762e-06, "loss": 38.8433, "step": 9722 }, { "epoch": 231.50149253731342, "grad_norm": 1.3368147611618042, "learning_rate": 9.940952380952382e-06, "loss": 39.8478, "step": 9723 }, { "epoch": 231.52537313432836, "grad_norm": 1.3562647104263306, "learning_rate": 9.940000000000001e-06, "loss": 41.0683, "step": 9724 }, { "epoch": 231.54925373134327, "grad_norm": 1.3462185859680176, "learning_rate": 9.93904761904762e-06, "loss": 39.9929, "step": 9725 }, { "epoch": 231.5731343283582, "grad_norm": 1.4004838466644287, "learning_rate": 9.93809523809524e-06, "loss": 40.3039, "step": 9726 }, { "epoch": 231.59701492537314, "grad_norm": 1.3602378368377686, "learning_rate": 9.937142857142858e-06, "loss": 40.0972, "step": 9727 }, { "epoch": 231.62089552238805, "grad_norm": 1.3423093557357788, "learning_rate": 9.936190476190477e-06, "loss": 39.6629, "step": 9728 }, { "epoch": 231.644776119403, "grad_norm": 1.3681665658950806, "learning_rate": 9.935238095238096e-06, "loss": 38.8883, "step": 9729 }, { "epoch": 231.6686567164179, "grad_norm": 1.363445520401001, "learning_rate": 9.934285714285715e-06, "loss": 40.3684, "step": 9730 }, { "epoch": 231.69253731343284, "grad_norm": 1.3717706203460693, "learning_rate": 9.933333333333334e-06, "loss": 40.0941, "step": 9731 }, { "epoch": 231.71641791044777, "grad_norm": 1.3465790748596191, "learning_rate": 9.932380952380953e-06, "loss": 40.1694, "step": 9732 }, { "epoch": 231.74029850746268, "grad_norm": 1.4129822254180908, "learning_rate": 9.931428571428571e-06, "loss": 39.7319, "step": 9733 }, { "epoch": 231.76417910447762, "grad_norm": 1.3353614807128906, "learning_rate": 9.930476190476192e-06, "loss": 40.1441, "step": 9734 }, { "epoch": 231.78805970149253, "grad_norm": 1.3412938117980957, "learning_rate": 9.92952380952381e-06, "loss": 39.3, "step": 9735 }, { "epoch": 231.81194029850747, "grad_norm": 1.3273136615753174, "learning_rate": 9.92857142857143e-06, "loss": 40.6211, "step": 9736 }, { "epoch": 231.83582089552237, "grad_norm": 1.3248298168182373, "learning_rate": 9.927619047619049e-06, "loss": 40.1911, "step": 9737 }, { "epoch": 231.8597014925373, "grad_norm": 1.359377145767212, "learning_rate": 9.926666666666668e-06, "loss": 41.3342, "step": 9738 }, { "epoch": 231.88358208955225, "grad_norm": 1.3656814098358154, "learning_rate": 9.925714285714287e-06, "loss": 38.3967, "step": 9739 }, { "epoch": 231.90746268656716, "grad_norm": 1.3167600631713867, "learning_rate": 9.924761904761905e-06, "loss": 41.2593, "step": 9740 }, { "epoch": 231.9313432835821, "grad_norm": 1.3543676137924194, "learning_rate": 9.923809523809524e-06, "loss": 40.16, "step": 9741 }, { "epoch": 231.955223880597, "grad_norm": 1.3769943714141846, "learning_rate": 9.922857142857145e-06, "loss": 40.1777, "step": 9742 }, { "epoch": 231.97910447761194, "grad_norm": 1.3209176063537598, "learning_rate": 9.921904761904762e-06, "loss": 39.4868, "step": 9743 }, { "epoch": 232.0, "grad_norm": 1.3689621686935425, "learning_rate": 9.920952380952383e-06, "loss": 33.5905, "step": 9744 }, { "epoch": 232.02388059701494, "grad_norm": 1.3862109184265137, "learning_rate": 9.920000000000002e-06, "loss": 40.3901, "step": 9745 }, { "epoch": 232.04776119402985, "grad_norm": 1.3593745231628418, "learning_rate": 9.91904761904762e-06, "loss": 40.1192, "step": 9746 }, { "epoch": 232.07164179104478, "grad_norm": 1.347745656967163, "learning_rate": 9.91809523809524e-06, "loss": 39.5358, "step": 9747 }, { "epoch": 232.0955223880597, "grad_norm": 1.3528501987457275, "learning_rate": 9.917142857142857e-06, "loss": 40.2121, "step": 9748 }, { "epoch": 232.11940298507463, "grad_norm": 1.3461812734603882, "learning_rate": 9.916190476190477e-06, "loss": 39.9294, "step": 9749 }, { "epoch": 232.14328358208957, "grad_norm": 1.4147793054580688, "learning_rate": 9.915238095238096e-06, "loss": 38.2577, "step": 9750 }, { "epoch": 232.16716417910447, "grad_norm": 1.3528212308883667, "learning_rate": 9.914285714285715e-06, "loss": 39.197, "step": 9751 }, { "epoch": 232.1910447761194, "grad_norm": 1.3547859191894531, "learning_rate": 9.913333333333334e-06, "loss": 39.222, "step": 9752 }, { "epoch": 232.21492537313432, "grad_norm": 1.3635015487670898, "learning_rate": 9.912380952380953e-06, "loss": 40.2342, "step": 9753 }, { "epoch": 232.23880597014926, "grad_norm": 1.3333666324615479, "learning_rate": 9.911428571428572e-06, "loss": 41.0543, "step": 9754 }, { "epoch": 232.26268656716417, "grad_norm": 1.3469040393829346, "learning_rate": 9.910476190476192e-06, "loss": 39.8992, "step": 9755 }, { "epoch": 232.2865671641791, "grad_norm": 1.3848040103912354, "learning_rate": 9.90952380952381e-06, "loss": 40.1289, "step": 9756 }, { "epoch": 232.31044776119404, "grad_norm": 1.3418474197387695, "learning_rate": 9.90857142857143e-06, "loss": 39.2568, "step": 9757 }, { "epoch": 232.33432835820895, "grad_norm": 1.4243640899658203, "learning_rate": 9.907619047619049e-06, "loss": 40.0749, "step": 9758 }, { "epoch": 232.3582089552239, "grad_norm": 1.3422454595565796, "learning_rate": 9.906666666666668e-06, "loss": 39.8517, "step": 9759 }, { "epoch": 232.3820895522388, "grad_norm": 1.3334932327270508, "learning_rate": 9.905714285714287e-06, "loss": 39.0975, "step": 9760 }, { "epoch": 232.40597014925373, "grad_norm": 1.3798540830612183, "learning_rate": 9.904761904761906e-06, "loss": 39.4456, "step": 9761 }, { "epoch": 232.42985074626867, "grad_norm": 1.3527202606201172, "learning_rate": 9.903809523809524e-06, "loss": 38.6602, "step": 9762 }, { "epoch": 232.45373134328358, "grad_norm": 1.3233518600463867, "learning_rate": 9.902857142857143e-06, "loss": 39.3642, "step": 9763 }, { "epoch": 232.47761194029852, "grad_norm": 1.3564311265945435, "learning_rate": 9.901904761904762e-06, "loss": 39.8121, "step": 9764 }, { "epoch": 232.50149253731342, "grad_norm": 1.358676791191101, "learning_rate": 9.900952380952383e-06, "loss": 39.4628, "step": 9765 }, { "epoch": 232.52537313432836, "grad_norm": 1.3360592126846313, "learning_rate": 9.9e-06, "loss": 40.4735, "step": 9766 }, { "epoch": 232.54925373134327, "grad_norm": 1.3083208799362183, "learning_rate": 9.89904761904762e-06, "loss": 39.9826, "step": 9767 }, { "epoch": 232.5731343283582, "grad_norm": 1.3417452573776245, "learning_rate": 9.89809523809524e-06, "loss": 40.394, "step": 9768 }, { "epoch": 232.59701492537314, "grad_norm": 1.3331201076507568, "learning_rate": 9.897142857142858e-06, "loss": 40.5092, "step": 9769 }, { "epoch": 232.62089552238805, "grad_norm": 1.362632155418396, "learning_rate": 9.896190476190477e-06, "loss": 40.0627, "step": 9770 }, { "epoch": 232.644776119403, "grad_norm": 1.3329161405563354, "learning_rate": 9.895238095238096e-06, "loss": 39.4997, "step": 9771 }, { "epoch": 232.6686567164179, "grad_norm": 1.408402442932129, "learning_rate": 9.894285714285715e-06, "loss": 39.6208, "step": 9772 }, { "epoch": 232.69253731343284, "grad_norm": 1.3490171432495117, "learning_rate": 9.893333333333334e-06, "loss": 40.5537, "step": 9773 }, { "epoch": 232.71641791044777, "grad_norm": 1.356587290763855, "learning_rate": 9.892380952380953e-06, "loss": 39.8758, "step": 9774 }, { "epoch": 232.74029850746268, "grad_norm": 1.3745895624160767, "learning_rate": 9.891428571428572e-06, "loss": 40.506, "step": 9775 }, { "epoch": 232.76417910447762, "grad_norm": 1.3150873184204102, "learning_rate": 9.89047619047619e-06, "loss": 39.7114, "step": 9776 }, { "epoch": 232.78805970149253, "grad_norm": 1.353867769241333, "learning_rate": 9.88952380952381e-06, "loss": 40.4218, "step": 9777 }, { "epoch": 232.81194029850747, "grad_norm": 1.348471999168396, "learning_rate": 9.88857142857143e-06, "loss": 40.8583, "step": 9778 }, { "epoch": 232.83582089552237, "grad_norm": 1.3528724908828735, "learning_rate": 9.887619047619047e-06, "loss": 40.34, "step": 9779 }, { "epoch": 232.8597014925373, "grad_norm": 1.3667373657226562, "learning_rate": 9.886666666666668e-06, "loss": 39.735, "step": 9780 }, { "epoch": 232.88358208955225, "grad_norm": 1.3437446355819702, "learning_rate": 9.885714285714287e-06, "loss": 38.9492, "step": 9781 }, { "epoch": 232.90746268656716, "grad_norm": 1.3860501050949097, "learning_rate": 9.884761904761906e-06, "loss": 39.8145, "step": 9782 }, { "epoch": 232.9313432835821, "grad_norm": 1.4699469804763794, "learning_rate": 9.883809523809525e-06, "loss": 39.604, "step": 9783 }, { "epoch": 232.955223880597, "grad_norm": 1.405747413635254, "learning_rate": 9.882857142857144e-06, "loss": 40.3699, "step": 9784 }, { "epoch": 232.97910447761194, "grad_norm": 1.3640702962875366, "learning_rate": 9.881904761904762e-06, "loss": 38.8816, "step": 9785 }, { "epoch": 233.0, "grad_norm": 1.3873275518417358, "learning_rate": 9.880952380952381e-06, "loss": 34.8109, "step": 9786 }, { "epoch": 233.02388059701494, "grad_norm": 1.3540536165237427, "learning_rate": 9.88e-06, "loss": 39.3951, "step": 9787 }, { "epoch": 233.04776119402985, "grad_norm": 1.3459043502807617, "learning_rate": 9.879047619047621e-06, "loss": 40.7139, "step": 9788 }, { "epoch": 233.07164179104478, "grad_norm": 1.3521199226379395, "learning_rate": 9.878095238095238e-06, "loss": 40.0704, "step": 9789 }, { "epoch": 233.0955223880597, "grad_norm": 1.35564124584198, "learning_rate": 9.877142857142859e-06, "loss": 38.5559, "step": 9790 }, { "epoch": 233.11940298507463, "grad_norm": 1.363218069076538, "learning_rate": 9.876190476190478e-06, "loss": 39.8976, "step": 9791 }, { "epoch": 233.14328358208957, "grad_norm": 1.3904558420181274, "learning_rate": 9.875238095238095e-06, "loss": 38.8942, "step": 9792 }, { "epoch": 233.16716417910447, "grad_norm": 1.3521716594696045, "learning_rate": 9.874285714285715e-06, "loss": 40.2433, "step": 9793 }, { "epoch": 233.1910447761194, "grad_norm": NaN, "learning_rate": 9.873333333333334e-06, "loss": 43.7022, "step": 9794 }, { "epoch": 233.21492537313432, "grad_norm": 1.3875044584274292, "learning_rate": 9.873333333333334e-06, "loss": 40.4332, "step": 9795 }, { "epoch": 233.23880597014926, "grad_norm": 1.3495151996612549, "learning_rate": 9.872380952380953e-06, "loss": 40.9431, "step": 9796 }, { "epoch": 233.26268656716417, "grad_norm": 1.338716745376587, "learning_rate": 9.871428571428572e-06, "loss": 40.0249, "step": 9797 }, { "epoch": 233.2865671641791, "grad_norm": 1.3555275201797485, "learning_rate": 9.870476190476191e-06, "loss": 39.2163, "step": 9798 }, { "epoch": 233.31044776119404, "grad_norm": 1.3589537143707275, "learning_rate": 9.86952380952381e-06, "loss": 40.0138, "step": 9799 }, { "epoch": 233.33432835820895, "grad_norm": 1.3312993049621582, "learning_rate": 9.86857142857143e-06, "loss": 39.9532, "step": 9800 }, { "epoch": 233.3582089552239, "grad_norm": 1.3477972745895386, "learning_rate": 9.867619047619048e-06, "loss": 40.3484, "step": 9801 }, { "epoch": 233.3820895522388, "grad_norm": 1.3427386283874512, "learning_rate": 9.866666666666668e-06, "loss": 40.1496, "step": 9802 }, { "epoch": 233.40597014925373, "grad_norm": 1.3489850759506226, "learning_rate": 9.865714285714285e-06, "loss": 40.946, "step": 9803 }, { "epoch": 233.42985074626867, "grad_norm": 1.365297555923462, "learning_rate": 9.864761904761906e-06, "loss": 40.1906, "step": 9804 }, { "epoch": 233.45373134328358, "grad_norm": 1.4054220914840698, "learning_rate": 9.863809523809525e-06, "loss": 40.7376, "step": 9805 }, { "epoch": 233.47761194029852, "grad_norm": 1.3274292945861816, "learning_rate": 9.862857142857144e-06, "loss": 39.8259, "step": 9806 }, { "epoch": 233.50149253731342, "grad_norm": 1.3642646074295044, "learning_rate": 9.861904761904763e-06, "loss": 38.445, "step": 9807 }, { "epoch": 233.52537313432836, "grad_norm": 1.4033560752868652, "learning_rate": 9.860952380952382e-06, "loss": 39.7673, "step": 9808 }, { "epoch": 233.54925373134327, "grad_norm": 1.3153945207595825, "learning_rate": 9.86e-06, "loss": 41.0768, "step": 9809 }, { "epoch": 233.5731343283582, "grad_norm": 1.3591210842132568, "learning_rate": 9.859047619047621e-06, "loss": 39.3086, "step": 9810 }, { "epoch": 233.59701492537314, "grad_norm": 1.3604938983917236, "learning_rate": 9.858095238095238e-06, "loss": 38.6969, "step": 9811 }, { "epoch": 233.62089552238805, "grad_norm": 1.5229910612106323, "learning_rate": 9.857142857142859e-06, "loss": 40.1906, "step": 9812 }, { "epoch": 233.644776119403, "grad_norm": 1.3777074813842773, "learning_rate": 9.856190476190478e-06, "loss": 39.7301, "step": 9813 }, { "epoch": 233.6686567164179, "grad_norm": 1.3796005249023438, "learning_rate": 9.855238095238095e-06, "loss": 39.8421, "step": 9814 }, { "epoch": 233.69253731343284, "grad_norm": 1.3818491697311401, "learning_rate": 9.854285714285716e-06, "loss": 40.2521, "step": 9815 }, { "epoch": 233.71641791044777, "grad_norm": 1.340197205543518, "learning_rate": 9.853333333333334e-06, "loss": 39.7718, "step": 9816 }, { "epoch": 233.74029850746268, "grad_norm": 1.3663222789764404, "learning_rate": 9.852380952380953e-06, "loss": 38.9754, "step": 9817 }, { "epoch": 233.76417910447762, "grad_norm": 1.3341926336288452, "learning_rate": 9.851428571428572e-06, "loss": 38.8085, "step": 9818 }, { "epoch": 233.78805970149253, "grad_norm": 1.340659737586975, "learning_rate": 9.850476190476191e-06, "loss": 40.9268, "step": 9819 }, { "epoch": 233.81194029850747, "grad_norm": 1.3384400606155396, "learning_rate": 9.84952380952381e-06, "loss": 39.9596, "step": 9820 }, { "epoch": 233.83582089552237, "grad_norm": 1.3401648998260498, "learning_rate": 9.848571428571429e-06, "loss": 39.2684, "step": 9821 }, { "epoch": 233.8597014925373, "grad_norm": 1.3471065759658813, "learning_rate": 9.847619047619048e-06, "loss": 40.189, "step": 9822 }, { "epoch": 233.88358208955225, "grad_norm": 1.4028085470199585, "learning_rate": 9.846666666666668e-06, "loss": 39.1658, "step": 9823 }, { "epoch": 233.90746268656716, "grad_norm": 1.386995553970337, "learning_rate": 9.845714285714286e-06, "loss": 39.4623, "step": 9824 }, { "epoch": 233.9313432835821, "grad_norm": 1.3675050735473633, "learning_rate": 9.844761904761906e-06, "loss": 39.6466, "step": 9825 }, { "epoch": 233.955223880597, "grad_norm": 1.6145753860473633, "learning_rate": 9.843809523809525e-06, "loss": 40.5197, "step": 9826 }, { "epoch": 233.97910447761194, "grad_norm": 1.3707082271575928, "learning_rate": 9.842857142857144e-06, "loss": 39.2651, "step": 9827 }, { "epoch": 234.0, "grad_norm": 1.3543726205825806, "learning_rate": 9.841904761904763e-06, "loss": 34.634, "step": 9828 }, { "epoch": 234.02388059701494, "grad_norm": 1.3321771621704102, "learning_rate": 9.840952380952382e-06, "loss": 39.0447, "step": 9829 }, { "epoch": 234.04776119402985, "grad_norm": 1.390879511833191, "learning_rate": 9.84e-06, "loss": 39.2026, "step": 9830 }, { "epoch": 234.07164179104478, "grad_norm": 1.315916895866394, "learning_rate": 9.83904761904762e-06, "loss": 40.1689, "step": 9831 }, { "epoch": 234.0955223880597, "grad_norm": 1.4182188510894775, "learning_rate": 9.838095238095238e-06, "loss": 40.3967, "step": 9832 }, { "epoch": 234.11940298507463, "grad_norm": 1.3122001886367798, "learning_rate": 9.837142857142859e-06, "loss": 39.606, "step": 9833 }, { "epoch": 234.14328358208957, "grad_norm": 1.3323293924331665, "learning_rate": 9.836190476190476e-06, "loss": 39.2767, "step": 9834 }, { "epoch": 234.16716417910447, "grad_norm": 1.3350051641464233, "learning_rate": 9.835238095238097e-06, "loss": 38.5236, "step": 9835 }, { "epoch": 234.1910447761194, "grad_norm": 1.3960555791854858, "learning_rate": 9.834285714285716e-06, "loss": 40.8632, "step": 9836 }, { "epoch": 234.21492537313432, "grad_norm": 1.358507513999939, "learning_rate": 9.833333333333333e-06, "loss": 40.7468, "step": 9837 }, { "epoch": 234.23880597014926, "grad_norm": 1.3622143268585205, "learning_rate": 9.832380952380954e-06, "loss": 39.1087, "step": 9838 }, { "epoch": 234.26268656716417, "grad_norm": 1.343444585800171, "learning_rate": 9.831428571428572e-06, "loss": 39.9373, "step": 9839 }, { "epoch": 234.2865671641791, "grad_norm": 1.3697052001953125, "learning_rate": 9.830476190476191e-06, "loss": 40.4497, "step": 9840 }, { "epoch": 234.31044776119404, "grad_norm": 1.3444923162460327, "learning_rate": 9.82952380952381e-06, "loss": 38.8108, "step": 9841 }, { "epoch": 234.33432835820895, "grad_norm": 1.32020103931427, "learning_rate": 9.828571428571429e-06, "loss": 39.6421, "step": 9842 }, { "epoch": 234.3582089552239, "grad_norm": 1.3501728773117065, "learning_rate": 9.827619047619048e-06, "loss": 40.2795, "step": 9843 }, { "epoch": 234.3820895522388, "grad_norm": 1.358120083808899, "learning_rate": 9.826666666666667e-06, "loss": 39.9834, "step": 9844 }, { "epoch": 234.40597014925373, "grad_norm": 1.3721427917480469, "learning_rate": 9.825714285714286e-06, "loss": 41.326, "step": 9845 }, { "epoch": 234.42985074626867, "grad_norm": 1.4115592241287231, "learning_rate": 9.824761904761906e-06, "loss": 39.7365, "step": 9846 }, { "epoch": 234.45373134328358, "grad_norm": 1.4171772003173828, "learning_rate": 9.823809523809524e-06, "loss": 39.3115, "step": 9847 }, { "epoch": 234.47761194029852, "grad_norm": 1.3417232036590576, "learning_rate": 9.822857142857144e-06, "loss": 40.4695, "step": 9848 }, { "epoch": 234.50149253731342, "grad_norm": 1.3986181020736694, "learning_rate": 9.821904761904763e-06, "loss": 38.9654, "step": 9849 }, { "epoch": 234.52537313432836, "grad_norm": 1.3051246404647827, "learning_rate": 9.820952380952382e-06, "loss": 40.2906, "step": 9850 }, { "epoch": 234.54925373134327, "grad_norm": 1.3423545360565186, "learning_rate": 9.820000000000001e-06, "loss": 40.4228, "step": 9851 }, { "epoch": 234.5731343283582, "grad_norm": 2.008615493774414, "learning_rate": 9.81904761904762e-06, "loss": 40.0134, "step": 9852 }, { "epoch": 234.59701492537314, "grad_norm": 1.4757206439971924, "learning_rate": 9.818095238095239e-06, "loss": 38.9586, "step": 9853 }, { "epoch": 234.62089552238805, "grad_norm": 1.3561463356018066, "learning_rate": 9.81714285714286e-06, "loss": 39.2919, "step": 9854 }, { "epoch": 234.644776119403, "grad_norm": 1.3582581281661987, "learning_rate": 9.816190476190476e-06, "loss": 40.0873, "step": 9855 }, { "epoch": 234.6686567164179, "grad_norm": 1.315716028213501, "learning_rate": 9.815238095238097e-06, "loss": 39.2044, "step": 9856 }, { "epoch": 234.69253731343284, "grad_norm": 1.383757472038269, "learning_rate": 9.814285714285716e-06, "loss": 41.1679, "step": 9857 }, { "epoch": 234.71641791044777, "grad_norm": 1.36099374294281, "learning_rate": 9.813333333333333e-06, "loss": 40.2492, "step": 9858 }, { "epoch": 234.74029850746268, "grad_norm": 1.355684757232666, "learning_rate": 9.812380952380954e-06, "loss": 40.2276, "step": 9859 }, { "epoch": 234.76417910447762, "grad_norm": 1.314220666885376, "learning_rate": 9.811428571428571e-06, "loss": 40.8976, "step": 9860 }, { "epoch": 234.78805970149253, "grad_norm": 1.3388429880142212, "learning_rate": 9.810476190476191e-06, "loss": 38.9969, "step": 9861 }, { "epoch": 234.81194029850747, "grad_norm": 1.3340685367584229, "learning_rate": 9.80952380952381e-06, "loss": 39.1033, "step": 9862 }, { "epoch": 234.83582089552237, "grad_norm": 1.3498265743255615, "learning_rate": 9.80857142857143e-06, "loss": 40.0184, "step": 9863 }, { "epoch": 234.8597014925373, "grad_norm": 1.3796048164367676, "learning_rate": 9.807619047619048e-06, "loss": 39.8503, "step": 9864 }, { "epoch": 234.88358208955225, "grad_norm": 1.3413825035095215, "learning_rate": 9.806666666666667e-06, "loss": 40.3946, "step": 9865 }, { "epoch": 234.90746268656716, "grad_norm": 1.3847122192382812, "learning_rate": 9.805714285714286e-06, "loss": 39.4012, "step": 9866 }, { "epoch": 234.9313432835821, "grad_norm": 1.3608920574188232, "learning_rate": 9.804761904761907e-06, "loss": 39.647, "step": 9867 }, { "epoch": 234.955223880597, "grad_norm": 1.3602604866027832, "learning_rate": 9.803809523809524e-06, "loss": 38.6167, "step": 9868 }, { "epoch": 234.97910447761194, "grad_norm": 1.3030024766921997, "learning_rate": 9.802857142857144e-06, "loss": 38.5093, "step": 9869 }, { "epoch": 235.0, "grad_norm": 1.3493075370788574, "learning_rate": 9.801904761904763e-06, "loss": 36.1032, "step": 9870 }, { "epoch": 235.02388059701494, "grad_norm": 1.3588885068893433, "learning_rate": 9.800952380952382e-06, "loss": 39.5363, "step": 9871 }, { "epoch": 235.04776119402985, "grad_norm": 1.392738938331604, "learning_rate": 9.800000000000001e-06, "loss": 39.2977, "step": 9872 }, { "epoch": 235.07164179104478, "grad_norm": 1.356437087059021, "learning_rate": 9.79904761904762e-06, "loss": 40.2743, "step": 9873 }, { "epoch": 235.0955223880597, "grad_norm": 1.3646345138549805, "learning_rate": 9.798095238095239e-06, "loss": 38.8699, "step": 9874 }, { "epoch": 235.11940298507463, "grad_norm": 1.3869013786315918, "learning_rate": 9.797142857142858e-06, "loss": 39.986, "step": 9875 }, { "epoch": 235.14328358208957, "grad_norm": 1.3980484008789062, "learning_rate": 9.796190476190477e-06, "loss": 39.8375, "step": 9876 }, { "epoch": 235.16716417910447, "grad_norm": 1.338494896888733, "learning_rate": 9.795238095238097e-06, "loss": 40.2183, "step": 9877 }, { "epoch": 235.1910447761194, "grad_norm": 1.3661428689956665, "learning_rate": 9.794285714285714e-06, "loss": 40.0569, "step": 9878 }, { "epoch": 235.21492537313432, "grad_norm": 1.314531683921814, "learning_rate": 9.793333333333333e-06, "loss": 40.178, "step": 9879 }, { "epoch": 235.23880597014926, "grad_norm": 1.4930675029754639, "learning_rate": 9.792380952380954e-06, "loss": 40.0809, "step": 9880 }, { "epoch": 235.26268656716417, "grad_norm": 1.3491512537002563, "learning_rate": 9.791428571428571e-06, "loss": 39.7559, "step": 9881 }, { "epoch": 235.2865671641791, "grad_norm": 1.3694266080856323, "learning_rate": 9.790476190476192e-06, "loss": 40.3367, "step": 9882 }, { "epoch": 235.31044776119404, "grad_norm": 1.3635869026184082, "learning_rate": 9.78952380952381e-06, "loss": 39.7484, "step": 9883 }, { "epoch": 235.33432835820895, "grad_norm": 1.3861565589904785, "learning_rate": 9.78857142857143e-06, "loss": 39.4471, "step": 9884 }, { "epoch": 235.3582089552239, "grad_norm": 1.3376884460449219, "learning_rate": 9.787619047619048e-06, "loss": 39.722, "step": 9885 }, { "epoch": 235.3820895522388, "grad_norm": 1.3632980585098267, "learning_rate": 9.786666666666667e-06, "loss": 39.6363, "step": 9886 }, { "epoch": 235.40597014925373, "grad_norm": 1.387425422668457, "learning_rate": 9.785714285714286e-06, "loss": 40.2709, "step": 9887 }, { "epoch": 235.42985074626867, "grad_norm": 1.360609531402588, "learning_rate": 9.784761904761905e-06, "loss": 38.8288, "step": 9888 }, { "epoch": 235.45373134328358, "grad_norm": 1.3445138931274414, "learning_rate": 9.783809523809524e-06, "loss": 39.3517, "step": 9889 }, { "epoch": 235.47761194029852, "grad_norm": 1.3215570449829102, "learning_rate": 9.782857142857145e-06, "loss": 40.1725, "step": 9890 }, { "epoch": 235.50149253731342, "grad_norm": 1.3390685319900513, "learning_rate": 9.781904761904762e-06, "loss": 40.3981, "step": 9891 }, { "epoch": 235.52537313432836, "grad_norm": 1.3828147649765015, "learning_rate": 9.780952380952382e-06, "loss": 39.0123, "step": 9892 }, { "epoch": 235.54925373134327, "grad_norm": 1.3383002281188965, "learning_rate": 9.780000000000001e-06, "loss": 39.557, "step": 9893 }, { "epoch": 235.5731343283582, "grad_norm": 1.3444499969482422, "learning_rate": 9.77904761904762e-06, "loss": 40.5504, "step": 9894 }, { "epoch": 235.59701492537314, "grad_norm": 1.377517819404602, "learning_rate": 9.778095238095239e-06, "loss": 39.2727, "step": 9895 }, { "epoch": 235.62089552238805, "grad_norm": 1.3342403173446655, "learning_rate": 9.777142857142858e-06, "loss": 38.871, "step": 9896 }, { "epoch": 235.644776119403, "grad_norm": 1.3555727005004883, "learning_rate": 9.776190476190477e-06, "loss": 39.4367, "step": 9897 }, { "epoch": 235.6686567164179, "grad_norm": 1.4033169746398926, "learning_rate": 9.775238095238096e-06, "loss": 41.5585, "step": 9898 }, { "epoch": 235.69253731343284, "grad_norm": 1.3238251209259033, "learning_rate": 9.774285714285715e-06, "loss": 40.6642, "step": 9899 }, { "epoch": 235.71641791044777, "grad_norm": 1.3458904027938843, "learning_rate": 9.773333333333335e-06, "loss": 39.4602, "step": 9900 }, { "epoch": 235.74029850746268, "grad_norm": 1.3255900144577026, "learning_rate": 9.772380952380952e-06, "loss": 40.327, "step": 9901 }, { "epoch": 235.76417910447762, "grad_norm": 1.319736361503601, "learning_rate": 9.771428571428571e-06, "loss": 40.2837, "step": 9902 }, { "epoch": 235.78805970149253, "grad_norm": 1.360119104385376, "learning_rate": 9.770476190476192e-06, "loss": 40.1549, "step": 9903 }, { "epoch": 235.81194029850747, "grad_norm": 1.3451131582260132, "learning_rate": 9.769523809523809e-06, "loss": 39.9301, "step": 9904 }, { "epoch": 235.83582089552237, "grad_norm": 1.3332713842391968, "learning_rate": 9.76857142857143e-06, "loss": 39.6046, "step": 9905 }, { "epoch": 235.8597014925373, "grad_norm": 1.327057957649231, "learning_rate": 9.767619047619049e-06, "loss": 40.4055, "step": 9906 }, { "epoch": 235.88358208955225, "grad_norm": 1.3694969415664673, "learning_rate": 9.766666666666667e-06, "loss": 39.682, "step": 9907 }, { "epoch": 235.90746268656716, "grad_norm": NaN, "learning_rate": 9.765714285714286e-06, "loss": 57.8539, "step": 9908 }, { "epoch": 235.9313432835821, "grad_norm": 1.337732195854187, "learning_rate": 9.765714285714286e-06, "loss": 39.7394, "step": 9909 }, { "epoch": 235.955223880597, "grad_norm": 1.3237639665603638, "learning_rate": 9.764761904761905e-06, "loss": 38.9771, "step": 9910 }, { "epoch": 235.97910447761194, "grad_norm": 1.3744781017303467, "learning_rate": 9.763809523809524e-06, "loss": 39.8885, "step": 9911 }, { "epoch": 236.0, "grad_norm": 1.3877828121185303, "learning_rate": 9.762857142857145e-06, "loss": 34.201, "step": 9912 }, { "epoch": 236.02388059701494, "grad_norm": 1.324893832206726, "learning_rate": 9.761904761904762e-06, "loss": 40.7343, "step": 9913 }, { "epoch": 236.04776119402985, "grad_norm": 1.3420532941818237, "learning_rate": 9.760952380952383e-06, "loss": 39.9835, "step": 9914 }, { "epoch": 236.07164179104478, "grad_norm": 1.3197457790374756, "learning_rate": 9.760000000000001e-06, "loss": 39.3547, "step": 9915 }, { "epoch": 236.0955223880597, "grad_norm": 1.3196852207183838, "learning_rate": 9.75904761904762e-06, "loss": 40.5731, "step": 9916 }, { "epoch": 236.11940298507463, "grad_norm": 1.325675368309021, "learning_rate": 9.75809523809524e-06, "loss": 37.8116, "step": 9917 }, { "epoch": 236.14328358208957, "grad_norm": 1.3306727409362793, "learning_rate": 9.757142857142858e-06, "loss": 38.3022, "step": 9918 }, { "epoch": 236.16716417910447, "grad_norm": 1.326015591621399, "learning_rate": 9.756190476190477e-06, "loss": 39.7513, "step": 9919 }, { "epoch": 236.1910447761194, "grad_norm": 1.3396570682525635, "learning_rate": 9.755238095238096e-06, "loss": 40.4054, "step": 9920 }, { "epoch": 236.21492537313432, "grad_norm": 1.3323308229446411, "learning_rate": 9.754285714285715e-06, "loss": 39.4843, "step": 9921 }, { "epoch": 236.23880597014926, "grad_norm": 1.4030797481536865, "learning_rate": 9.753333333333335e-06, "loss": 40.6539, "step": 9922 }, { "epoch": 236.26268656716417, "grad_norm": 1.3771066665649414, "learning_rate": 9.752380952380953e-06, "loss": 41.0785, "step": 9923 }, { "epoch": 236.2865671641791, "grad_norm": 1.4003037214279175, "learning_rate": 9.751428571428571e-06, "loss": 39.9688, "step": 9924 }, { "epoch": 236.31044776119404, "grad_norm": 1.343637228012085, "learning_rate": 9.750476190476192e-06, "loss": 40.3446, "step": 9925 }, { "epoch": 236.33432835820895, "grad_norm": 1.3652777671813965, "learning_rate": 9.74952380952381e-06, "loss": 39.6218, "step": 9926 }, { "epoch": 236.3582089552239, "grad_norm": 1.348204255104065, "learning_rate": 9.74857142857143e-06, "loss": 38.3841, "step": 9927 }, { "epoch": 236.3820895522388, "grad_norm": 1.3659793138504028, "learning_rate": 9.747619047619049e-06, "loss": 40.3035, "step": 9928 }, { "epoch": 236.40597014925373, "grad_norm": 1.415696144104004, "learning_rate": 9.746666666666668e-06, "loss": 38.9408, "step": 9929 }, { "epoch": 236.42985074626867, "grad_norm": 1.3383935689926147, "learning_rate": 9.745714285714287e-06, "loss": 39.8326, "step": 9930 }, { "epoch": 236.45373134328358, "grad_norm": 1.3128776550292969, "learning_rate": 9.744761904761905e-06, "loss": 40.7269, "step": 9931 }, { "epoch": 236.47761194029852, "grad_norm": 1.3265994787216187, "learning_rate": 9.743809523809524e-06, "loss": 38.3291, "step": 9932 }, { "epoch": 236.50149253731342, "grad_norm": 1.413731575012207, "learning_rate": 9.742857142857143e-06, "loss": 40.4271, "step": 9933 }, { "epoch": 236.52537313432836, "grad_norm": 1.326066493988037, "learning_rate": 9.741904761904762e-06, "loss": 39.6158, "step": 9934 }, { "epoch": 236.54925373134327, "grad_norm": 1.3128072023391724, "learning_rate": 9.740952380952383e-06, "loss": 39.5038, "step": 9935 }, { "epoch": 236.5731343283582, "grad_norm": 1.37183678150177, "learning_rate": 9.74e-06, "loss": 40.4495, "step": 9936 }, { "epoch": 236.59701492537314, "grad_norm": 1.3488762378692627, "learning_rate": 9.73904761904762e-06, "loss": 40.9106, "step": 9937 }, { "epoch": 236.62089552238805, "grad_norm": 1.3205487728118896, "learning_rate": 9.73809523809524e-06, "loss": 40.5728, "step": 9938 }, { "epoch": 236.644776119403, "grad_norm": 1.337217092514038, "learning_rate": 9.737142857142858e-06, "loss": 38.768, "step": 9939 }, { "epoch": 236.6686567164179, "grad_norm": 1.3891152143478394, "learning_rate": 9.736190476190477e-06, "loss": 40.7221, "step": 9940 }, { "epoch": 236.69253731343284, "grad_norm": 1.3645548820495605, "learning_rate": 9.735238095238096e-06, "loss": 38.8475, "step": 9941 }, { "epoch": 236.71641791044777, "grad_norm": 1.3696390390396118, "learning_rate": 9.734285714285715e-06, "loss": 40.8396, "step": 9942 }, { "epoch": 236.74029850746268, "grad_norm": 1.3489562273025513, "learning_rate": 9.733333333333334e-06, "loss": 40.0687, "step": 9943 }, { "epoch": 236.76417910447762, "grad_norm": 1.333430528640747, "learning_rate": 9.732380952380953e-06, "loss": 40.2207, "step": 9944 }, { "epoch": 236.78805970149253, "grad_norm": 1.328062653541565, "learning_rate": 9.731428571428573e-06, "loss": 40.3726, "step": 9945 }, { "epoch": 236.81194029850747, "grad_norm": 1.3694713115692139, "learning_rate": 9.73047619047619e-06, "loss": 39.1221, "step": 9946 }, { "epoch": 236.83582089552237, "grad_norm": 1.334063172340393, "learning_rate": 9.72952380952381e-06, "loss": 39.199, "step": 9947 }, { "epoch": 236.8597014925373, "grad_norm": 1.454919457435608, "learning_rate": 9.72857142857143e-06, "loss": 39.4424, "step": 9948 }, { "epoch": 236.88358208955225, "grad_norm": 1.3772473335266113, "learning_rate": 9.727619047619047e-06, "loss": 39.4237, "step": 9949 }, { "epoch": 236.90746268656716, "grad_norm": 1.3148467540740967, "learning_rate": 9.726666666666668e-06, "loss": 39.6302, "step": 9950 }, { "epoch": 236.9313432835821, "grad_norm": 1.3349251747131348, "learning_rate": 9.725714285714287e-06, "loss": 38.6137, "step": 9951 }, { "epoch": 236.955223880597, "grad_norm": 1.3385717868804932, "learning_rate": 9.724761904761906e-06, "loss": 39.1001, "step": 9952 }, { "epoch": 236.97910447761194, "grad_norm": 1.3570889234542847, "learning_rate": 9.723809523809525e-06, "loss": 40.3011, "step": 9953 }, { "epoch": 237.0, "grad_norm": 1.3135221004486084, "learning_rate": 9.722857142857143e-06, "loss": 36.0209, "step": 9954 }, { "epoch": 237.02388059701494, "grad_norm": 1.3691216707229614, "learning_rate": 9.721904761904762e-06, "loss": 39.4968, "step": 9955 }, { "epoch": 237.04776119402985, "grad_norm": 1.3549951314926147, "learning_rate": 9.720952380952381e-06, "loss": 40.0115, "step": 9956 }, { "epoch": 237.07164179104478, "grad_norm": 1.3318511247634888, "learning_rate": 9.72e-06, "loss": 40.5549, "step": 9957 }, { "epoch": 237.0955223880597, "grad_norm": 1.4431164264678955, "learning_rate": 9.71904761904762e-06, "loss": 38.9153, "step": 9958 }, { "epoch": 237.11940298507463, "grad_norm": 1.3468984365463257, "learning_rate": 9.718095238095238e-06, "loss": 39.7531, "step": 9959 }, { "epoch": 237.14328358208957, "grad_norm": 1.3177355527877808, "learning_rate": 9.717142857142858e-06, "loss": 41.2301, "step": 9960 }, { "epoch": 237.16716417910447, "grad_norm": 1.4061927795410156, "learning_rate": 9.716190476190477e-06, "loss": 38.2518, "step": 9961 }, { "epoch": 237.1910447761194, "grad_norm": 1.313602328300476, "learning_rate": 9.715238095238096e-06, "loss": 39.7076, "step": 9962 }, { "epoch": 237.21492537313432, "grad_norm": 1.3236408233642578, "learning_rate": 9.714285714285715e-06, "loss": 40.1127, "step": 9963 }, { "epoch": 237.23880597014926, "grad_norm": 1.3389065265655518, "learning_rate": 9.713333333333334e-06, "loss": 39.0702, "step": 9964 }, { "epoch": 237.26268656716417, "grad_norm": 1.3380409479141235, "learning_rate": 9.712380952380953e-06, "loss": 39.0419, "step": 9965 }, { "epoch": 237.2865671641791, "grad_norm": 1.326429843902588, "learning_rate": 9.711428571428574e-06, "loss": 40.2047, "step": 9966 }, { "epoch": 237.31044776119404, "grad_norm": 1.3287544250488281, "learning_rate": 9.71047619047619e-06, "loss": 39.0321, "step": 9967 }, { "epoch": 237.33432835820895, "grad_norm": 1.3264899253845215, "learning_rate": 9.70952380952381e-06, "loss": 38.9382, "step": 9968 }, { "epoch": 237.3582089552239, "grad_norm": 1.3763290643692017, "learning_rate": 9.70857142857143e-06, "loss": 39.0357, "step": 9969 }, { "epoch": 237.3820895522388, "grad_norm": 1.3457845449447632, "learning_rate": 9.707619047619047e-06, "loss": 40.5872, "step": 9970 }, { "epoch": 237.40597014925373, "grad_norm": 1.341729760169983, "learning_rate": 9.706666666666668e-06, "loss": 39.6943, "step": 9971 }, { "epoch": 237.42985074626867, "grad_norm": 1.3637651205062866, "learning_rate": 9.705714285714287e-06, "loss": 39.2316, "step": 9972 }, { "epoch": 237.45373134328358, "grad_norm": 1.3732658624649048, "learning_rate": 9.704761904761906e-06, "loss": 39.7452, "step": 9973 }, { "epoch": 237.47761194029852, "grad_norm": 1.343896508216858, "learning_rate": 9.703809523809525e-06, "loss": 39.4845, "step": 9974 }, { "epoch": 237.50149253731342, "grad_norm": 1.3222217559814453, "learning_rate": 9.702857142857144e-06, "loss": 40.7974, "step": 9975 }, { "epoch": 237.52537313432836, "grad_norm": 1.361674427986145, "learning_rate": 9.701904761904763e-06, "loss": 39.8861, "step": 9976 }, { "epoch": 237.54925373134327, "grad_norm": 1.3517204523086548, "learning_rate": 9.700952380952381e-06, "loss": 38.7146, "step": 9977 }, { "epoch": 237.5731343283582, "grad_norm": 1.3491915464401245, "learning_rate": 9.7e-06, "loss": 39.9707, "step": 9978 }, { "epoch": 237.59701492537314, "grad_norm": 1.3408501148223877, "learning_rate": 9.699047619047621e-06, "loss": 41.2863, "step": 9979 }, { "epoch": 237.62089552238805, "grad_norm": 1.3572744131088257, "learning_rate": 9.698095238095238e-06, "loss": 40.807, "step": 9980 }, { "epoch": 237.644776119403, "grad_norm": 1.318699598312378, "learning_rate": 9.697142857142859e-06, "loss": 40.294, "step": 9981 }, { "epoch": 237.6686567164179, "grad_norm": 1.3654084205627441, "learning_rate": 9.696190476190478e-06, "loss": 39.2713, "step": 9982 }, { "epoch": 237.69253731343284, "grad_norm": 1.3765933513641357, "learning_rate": 9.695238095238096e-06, "loss": 40.4953, "step": 9983 }, { "epoch": 237.71641791044777, "grad_norm": 1.334284782409668, "learning_rate": 9.694285714285715e-06, "loss": 39.2019, "step": 9984 }, { "epoch": 237.74029850746268, "grad_norm": 1.3720570802688599, "learning_rate": 9.693333333333334e-06, "loss": 39.3527, "step": 9985 }, { "epoch": 237.76417910447762, "grad_norm": 1.3228733539581299, "learning_rate": 9.692380952380953e-06, "loss": 40.1475, "step": 9986 }, { "epoch": 237.78805970149253, "grad_norm": 1.3554023504257202, "learning_rate": 9.691428571428572e-06, "loss": 39.9287, "step": 9987 }, { "epoch": 237.81194029850747, "grad_norm": 1.348935842514038, "learning_rate": 9.690476190476191e-06, "loss": 40.2747, "step": 9988 }, { "epoch": 237.83582089552237, "grad_norm": 1.331253170967102, "learning_rate": 9.68952380952381e-06, "loss": 40.0914, "step": 9989 }, { "epoch": 237.8597014925373, "grad_norm": 1.399587631225586, "learning_rate": 9.688571428571429e-06, "loss": 40.6559, "step": 9990 }, { "epoch": 237.88358208955225, "grad_norm": 1.3307267427444458, "learning_rate": 9.687619047619048e-06, "loss": 40.6665, "step": 9991 }, { "epoch": 237.90746268656716, "grad_norm": 1.338732361793518, "learning_rate": 9.686666666666668e-06, "loss": 39.774, "step": 9992 }, { "epoch": 237.9313432835821, "grad_norm": 1.3694732189178467, "learning_rate": 9.685714285714285e-06, "loss": 39.6519, "step": 9993 }, { "epoch": 237.955223880597, "grad_norm": 1.4314966201782227, "learning_rate": 9.684761904761906e-06, "loss": 38.9049, "step": 9994 }, { "epoch": 237.97910447761194, "grad_norm": 1.3626419305801392, "learning_rate": 9.683809523809525e-06, "loss": 39.5957, "step": 9995 }, { "epoch": 238.0, "grad_norm": 1.3325542211532593, "learning_rate": 9.682857142857144e-06, "loss": 34.7667, "step": 9996 }, { "epoch": 238.02388059701494, "grad_norm": 1.350001573562622, "learning_rate": 9.681904761904763e-06, "loss": 40.3089, "step": 9997 }, { "epoch": 238.04776119402985, "grad_norm": 1.6460633277893066, "learning_rate": 9.680952380952382e-06, "loss": 39.1481, "step": 9998 }, { "epoch": 238.07164179104478, "grad_norm": 1.3845711946487427, "learning_rate": 9.68e-06, "loss": 38.8686, "step": 9999 }, { "epoch": 238.0955223880597, "grad_norm": 1.3522241115570068, "learning_rate": 9.67904761904762e-06, "loss": 40.0247, "step": 10000 }, { "epoch": 238.11940298507463, "grad_norm": 1.3413945436477661, "learning_rate": 9.678095238095238e-06, "loss": 40.7513, "step": 10001 }, { "epoch": 238.14328358208957, "grad_norm": 1.3722649812698364, "learning_rate": 9.677142857142859e-06, "loss": 38.7854, "step": 10002 }, { "epoch": 238.16716417910447, "grad_norm": 1.3973984718322754, "learning_rate": 9.676190476190476e-06, "loss": 41.0833, "step": 10003 }, { "epoch": 238.1910447761194, "grad_norm": 1.3636044263839722, "learning_rate": 9.675238095238097e-06, "loss": 40.2664, "step": 10004 }, { "epoch": 238.21492537313432, "grad_norm": 1.3602664470672607, "learning_rate": 9.674285714285716e-06, "loss": 39.5514, "step": 10005 }, { "epoch": 238.23880597014926, "grad_norm": 1.354398488998413, "learning_rate": 9.673333333333334e-06, "loss": 39.9766, "step": 10006 }, { "epoch": 238.26268656716417, "grad_norm": 1.369531273841858, "learning_rate": 9.672380952380953e-06, "loss": 40.7818, "step": 10007 }, { "epoch": 238.2865671641791, "grad_norm": 1.3389415740966797, "learning_rate": 9.671428571428572e-06, "loss": 40.5379, "step": 10008 }, { "epoch": 238.31044776119404, "grad_norm": 1.3280118703842163, "learning_rate": 9.670476190476191e-06, "loss": 39.5979, "step": 10009 }, { "epoch": 238.33432835820895, "grad_norm": 1.3562217950820923, "learning_rate": 9.66952380952381e-06, "loss": 40.05, "step": 10010 }, { "epoch": 238.3582089552239, "grad_norm": 1.5608984231948853, "learning_rate": 9.668571428571429e-06, "loss": 40.4936, "step": 10011 }, { "epoch": 238.3820895522388, "grad_norm": 1.3201907873153687, "learning_rate": 9.667619047619048e-06, "loss": 41.2498, "step": 10012 }, { "epoch": 238.40597014925373, "grad_norm": 1.338615894317627, "learning_rate": 9.666666666666667e-06, "loss": 39.5352, "step": 10013 }, { "epoch": 238.42985074626867, "grad_norm": 1.3746790885925293, "learning_rate": 9.665714285714286e-06, "loss": 39.4197, "step": 10014 }, { "epoch": 238.45373134328358, "grad_norm": 1.3997071981430054, "learning_rate": 9.664761904761906e-06, "loss": 41.1347, "step": 10015 }, { "epoch": 238.47761194029852, "grad_norm": 1.3331509828567505, "learning_rate": 9.663809523809523e-06, "loss": 38.9469, "step": 10016 }, { "epoch": 238.50149253731342, "grad_norm": 1.364647388458252, "learning_rate": 9.662857142857144e-06, "loss": 39.4769, "step": 10017 }, { "epoch": 238.52537313432836, "grad_norm": 1.3389769792556763, "learning_rate": 9.661904761904763e-06, "loss": 40.4294, "step": 10018 }, { "epoch": 238.54925373134327, "grad_norm": 1.422979712486267, "learning_rate": 9.660952380952382e-06, "loss": 39.4762, "step": 10019 }, { "epoch": 238.5731343283582, "grad_norm": 1.4212490320205688, "learning_rate": 9.66e-06, "loss": 39.7862, "step": 10020 }, { "epoch": 238.59701492537314, "grad_norm": 1.3415521383285522, "learning_rate": 9.65904761904762e-06, "loss": 40.7127, "step": 10021 }, { "epoch": 238.62089552238805, "grad_norm": 1.3148249387741089, "learning_rate": 9.658095238095238e-06, "loss": 40.5569, "step": 10022 }, { "epoch": 238.644776119403, "grad_norm": 1.4174193143844604, "learning_rate": 9.657142857142859e-06, "loss": 38.2799, "step": 10023 }, { "epoch": 238.6686567164179, "grad_norm": 1.3714053630828857, "learning_rate": 9.656190476190476e-06, "loss": 39.7593, "step": 10024 }, { "epoch": 238.69253731343284, "grad_norm": 1.911094069480896, "learning_rate": 9.655238095238097e-06, "loss": 40.8059, "step": 10025 }, { "epoch": 238.71641791044777, "grad_norm": 1.3364540338516235, "learning_rate": 9.654285714285716e-06, "loss": 40.4561, "step": 10026 }, { "epoch": 238.74029850746268, "grad_norm": 1.308134913444519, "learning_rate": 9.653333333333335e-06, "loss": 38.7757, "step": 10027 }, { "epoch": 238.76417910447762, "grad_norm": 1.361018180847168, "learning_rate": 9.652380952380954e-06, "loss": 38.7338, "step": 10028 }, { "epoch": 238.78805970149253, "grad_norm": 1.3720725774765015, "learning_rate": 9.651428571428572e-06, "loss": 38.8243, "step": 10029 }, { "epoch": 238.81194029850747, "grad_norm": 1.3732472658157349, "learning_rate": 9.650476190476191e-06, "loss": 37.9337, "step": 10030 }, { "epoch": 238.83582089552237, "grad_norm": NaN, "learning_rate": 9.64952380952381e-06, "loss": 66.613, "step": 10031 }, { "epoch": 238.8597014925373, "grad_norm": 1.3494549989700317, "learning_rate": 9.64952380952381e-06, "loss": 40.4674, "step": 10032 }, { "epoch": 238.88358208955225, "grad_norm": 1.3752506971359253, "learning_rate": 9.648571428571429e-06, "loss": 39.2314, "step": 10033 }, { "epoch": 238.90746268656716, "grad_norm": 1.3376414775848389, "learning_rate": 9.647619047619048e-06, "loss": 39.4232, "step": 10034 }, { "epoch": 238.9313432835821, "grad_norm": 1.3883917331695557, "learning_rate": 9.646666666666667e-06, "loss": 40.0781, "step": 10035 }, { "epoch": 238.955223880597, "grad_norm": 1.3449182510375977, "learning_rate": 9.645714285714286e-06, "loss": 39.6172, "step": 10036 }, { "epoch": 238.97910447761194, "grad_norm": 1.3528748750686646, "learning_rate": 9.644761904761906e-06, "loss": 39.1575, "step": 10037 }, { "epoch": 239.0, "grad_norm": 1.3071421384811401, "learning_rate": 9.643809523809524e-06, "loss": 35.0875, "step": 10038 }, { "epoch": 239.02388059701494, "grad_norm": 1.3398312330245972, "learning_rate": 9.642857142857144e-06, "loss": 38.7677, "step": 10039 }, { "epoch": 239.04776119402985, "grad_norm": 1.3422006368637085, "learning_rate": 9.641904761904763e-06, "loss": 40.3211, "step": 10040 }, { "epoch": 239.07164179104478, "grad_norm": 1.3400614261627197, "learning_rate": 9.640952380952382e-06, "loss": 40.7171, "step": 10041 }, { "epoch": 239.0955223880597, "grad_norm": 1.3559387922286987, "learning_rate": 9.640000000000001e-06, "loss": 41.419, "step": 10042 }, { "epoch": 239.11940298507463, "grad_norm": 1.355833888053894, "learning_rate": 9.63904761904762e-06, "loss": 39.0918, "step": 10043 }, { "epoch": 239.14328358208957, "grad_norm": 1.3266537189483643, "learning_rate": 9.638095238095239e-06, "loss": 38.7369, "step": 10044 }, { "epoch": 239.16716417910447, "grad_norm": 1.346779227256775, "learning_rate": 9.637142857142858e-06, "loss": 40.2505, "step": 10045 }, { "epoch": 239.1910447761194, "grad_norm": 1.3708893060684204, "learning_rate": 9.636190476190476e-06, "loss": 39.8867, "step": 10046 }, { "epoch": 239.21492537313432, "grad_norm": 1.3284021615982056, "learning_rate": 9.635238095238097e-06, "loss": 39.5707, "step": 10047 }, { "epoch": 239.23880597014926, "grad_norm": 1.3599109649658203, "learning_rate": 9.634285714285714e-06, "loss": 39.5911, "step": 10048 }, { "epoch": 239.26268656716417, "grad_norm": 1.3675540685653687, "learning_rate": 9.633333333333335e-06, "loss": 40.9816, "step": 10049 }, { "epoch": 239.2865671641791, "grad_norm": 1.3523657321929932, "learning_rate": 9.632380952380954e-06, "loss": 41.2698, "step": 10050 }, { "epoch": 239.31044776119404, "grad_norm": 1.3675880432128906, "learning_rate": 9.631428571428573e-06, "loss": 40.1079, "step": 10051 }, { "epoch": 239.33432835820895, "grad_norm": 1.368534803390503, "learning_rate": 9.630476190476192e-06, "loss": 39.5291, "step": 10052 }, { "epoch": 239.3582089552239, "grad_norm": 1.3716222047805786, "learning_rate": 9.62952380952381e-06, "loss": 39.7405, "step": 10053 }, { "epoch": 239.3820895522388, "grad_norm": 1.3316738605499268, "learning_rate": 9.62857142857143e-06, "loss": 38.7209, "step": 10054 }, { "epoch": 239.40597014925373, "grad_norm": 1.3373897075653076, "learning_rate": 9.627619047619048e-06, "loss": 40.9067, "step": 10055 }, { "epoch": 239.42985074626867, "grad_norm": 1.3613090515136719, "learning_rate": 9.626666666666667e-06, "loss": 39.6514, "step": 10056 }, { "epoch": 239.45373134328358, "grad_norm": 1.3271781206130981, "learning_rate": 9.625714285714286e-06, "loss": 39.4029, "step": 10057 }, { "epoch": 239.47761194029852, "grad_norm": 1.3501670360565186, "learning_rate": 9.624761904761905e-06, "loss": 40.5855, "step": 10058 }, { "epoch": 239.50149253731342, "grad_norm": 1.3491774797439575, "learning_rate": 9.623809523809524e-06, "loss": 40.2551, "step": 10059 }, { "epoch": 239.52537313432836, "grad_norm": 1.3649423122406006, "learning_rate": 9.622857142857144e-06, "loss": 39.2714, "step": 10060 }, { "epoch": 239.54925373134327, "grad_norm": 1.3268781900405884, "learning_rate": 9.621904761904762e-06, "loss": 38.6678, "step": 10061 }, { "epoch": 239.5731343283582, "grad_norm": 1.3371727466583252, "learning_rate": 9.620952380952382e-06, "loss": 40.2317, "step": 10062 }, { "epoch": 239.59701492537314, "grad_norm": 1.3777354955673218, "learning_rate": 9.620000000000001e-06, "loss": 41.4189, "step": 10063 }, { "epoch": 239.62089552238805, "grad_norm": 1.3598731756210327, "learning_rate": 9.61904761904762e-06, "loss": 38.3652, "step": 10064 }, { "epoch": 239.644776119403, "grad_norm": 1.3130141496658325, "learning_rate": 9.618095238095239e-06, "loss": 41.5396, "step": 10065 }, { "epoch": 239.6686567164179, "grad_norm": 1.3385881185531616, "learning_rate": 9.617142857142858e-06, "loss": 37.7411, "step": 10066 }, { "epoch": 239.69253731343284, "grad_norm": 1.4238348007202148, "learning_rate": 9.616190476190477e-06, "loss": 40.1608, "step": 10067 }, { "epoch": 239.71641791044777, "grad_norm": 1.3493056297302246, "learning_rate": 9.615238095238096e-06, "loss": 38.8459, "step": 10068 }, { "epoch": 239.74029850746268, "grad_norm": 1.33035409450531, "learning_rate": 9.614285714285714e-06, "loss": 39.2516, "step": 10069 }, { "epoch": 239.76417910447762, "grad_norm": 1.346795916557312, "learning_rate": 9.613333333333335e-06, "loss": 39.5483, "step": 10070 }, { "epoch": 239.78805970149253, "grad_norm": 1.3366193771362305, "learning_rate": 9.612380952380952e-06, "loss": 39.8718, "step": 10071 }, { "epoch": 239.81194029850747, "grad_norm": 1.3509900569915771, "learning_rate": 9.611428571428573e-06, "loss": 40.4434, "step": 10072 }, { "epoch": 239.83582089552237, "grad_norm": 1.3413617610931396, "learning_rate": 9.610476190476192e-06, "loss": 40.5654, "step": 10073 }, { "epoch": 239.8597014925373, "grad_norm": 1.3164697885513306, "learning_rate": 9.60952380952381e-06, "loss": 38.9603, "step": 10074 }, { "epoch": 239.88358208955225, "grad_norm": 1.3667457103729248, "learning_rate": 9.60857142857143e-06, "loss": 39.6333, "step": 10075 }, { "epoch": 239.90746268656716, "grad_norm": 1.3594368696212769, "learning_rate": 9.607619047619048e-06, "loss": 39.2914, "step": 10076 }, { "epoch": 239.9313432835821, "grad_norm": 1.3231528997421265, "learning_rate": 9.606666666666667e-06, "loss": 39.4939, "step": 10077 }, { "epoch": 239.955223880597, "grad_norm": 1.3591920137405396, "learning_rate": 9.605714285714286e-06, "loss": 38.7727, "step": 10078 }, { "epoch": 239.97910447761194, "grad_norm": 1.3377795219421387, "learning_rate": 9.604761904761905e-06, "loss": 40.1872, "step": 10079 }, { "epoch": 240.0, "grad_norm": 1.3046207427978516, "learning_rate": 9.603809523809524e-06, "loss": 34.0286, "step": 10080 }, { "epoch": 240.02388059701494, "grad_norm": 1.3753384351730347, "learning_rate": 9.602857142857145e-06, "loss": 39.4529, "step": 10081 }, { "epoch": 240.04776119402985, "grad_norm": 1.384800672531128, "learning_rate": 9.601904761904762e-06, "loss": 40.0345, "step": 10082 }, { "epoch": 240.07164179104478, "grad_norm": 1.3311532735824585, "learning_rate": 9.600952380952382e-06, "loss": 39.272, "step": 10083 }, { "epoch": 240.0955223880597, "grad_norm": 1.3380964994430542, "learning_rate": 9.600000000000001e-06, "loss": 39.8096, "step": 10084 }, { "epoch": 240.11940298507463, "grad_norm": 1.3464573621749878, "learning_rate": 9.59904761904762e-06, "loss": 40.1465, "step": 10085 }, { "epoch": 240.14328358208957, "grad_norm": 1.3739365339279175, "learning_rate": 9.598095238095239e-06, "loss": 38.9087, "step": 10086 }, { "epoch": 240.16716417910447, "grad_norm": 1.3386002779006958, "learning_rate": 9.597142857142858e-06, "loss": 40.0037, "step": 10087 }, { "epoch": 240.1910447761194, "grad_norm": 1.3719122409820557, "learning_rate": 9.596190476190477e-06, "loss": 40.6273, "step": 10088 }, { "epoch": 240.21492537313432, "grad_norm": 1.3550982475280762, "learning_rate": 9.595238095238096e-06, "loss": 39.9312, "step": 10089 }, { "epoch": 240.23880597014926, "grad_norm": 1.3379223346710205, "learning_rate": 9.594285714285715e-06, "loss": 38.4153, "step": 10090 }, { "epoch": 240.26268656716417, "grad_norm": 1.3957806825637817, "learning_rate": 9.593333333333335e-06, "loss": 40.0422, "step": 10091 }, { "epoch": 240.2865671641791, "grad_norm": 1.3606268167495728, "learning_rate": 9.592380952380952e-06, "loss": 40.3299, "step": 10092 }, { "epoch": 240.31044776119404, "grad_norm": 1.3040376901626587, "learning_rate": 9.591428571428573e-06, "loss": 39.9256, "step": 10093 }, { "epoch": 240.33432835820895, "grad_norm": 1.305344820022583, "learning_rate": 9.590476190476192e-06, "loss": 40.0776, "step": 10094 }, { "epoch": 240.3582089552239, "grad_norm": 1.3415625095367432, "learning_rate": 9.58952380952381e-06, "loss": 39.7741, "step": 10095 }, { "epoch": 240.3820895522388, "grad_norm": 1.3220099210739136, "learning_rate": 9.58857142857143e-06, "loss": 40.4555, "step": 10096 }, { "epoch": 240.40597014925373, "grad_norm": 1.3289704322814941, "learning_rate": 9.587619047619049e-06, "loss": 38.4487, "step": 10097 }, { "epoch": 240.42985074626867, "grad_norm": 1.3301759958267212, "learning_rate": 9.586666666666667e-06, "loss": 39.5931, "step": 10098 }, { "epoch": 240.45373134328358, "grad_norm": 1.337203860282898, "learning_rate": 9.585714285714286e-06, "loss": 38.9133, "step": 10099 }, { "epoch": 240.47761194029852, "grad_norm": 1.3302515745162964, "learning_rate": 9.584761904761905e-06, "loss": 39.1571, "step": 10100 }, { "epoch": 240.50149253731342, "grad_norm": 1.3361458778381348, "learning_rate": 9.583809523809524e-06, "loss": 39.7306, "step": 10101 }, { "epoch": 240.52537313432836, "grad_norm": 1.351824402809143, "learning_rate": 9.582857142857143e-06, "loss": 40.8392, "step": 10102 }, { "epoch": 240.54925373134327, "grad_norm": 1.3576737642288208, "learning_rate": 9.581904761904762e-06, "loss": 39.8841, "step": 10103 }, { "epoch": 240.5731343283582, "grad_norm": 1.5632132291793823, "learning_rate": 9.580952380952383e-06, "loss": 40.4551, "step": 10104 }, { "epoch": 240.59701492537314, "grad_norm": 1.3122270107269287, "learning_rate": 9.58e-06, "loss": 39.7935, "step": 10105 }, { "epoch": 240.62089552238805, "grad_norm": 1.35639488697052, "learning_rate": 9.57904761904762e-06, "loss": 41.1999, "step": 10106 }, { "epoch": 240.644776119403, "grad_norm": 1.311182975769043, "learning_rate": 9.57809523809524e-06, "loss": 39.9676, "step": 10107 }, { "epoch": 240.6686567164179, "grad_norm": 1.3349801301956177, "learning_rate": 9.577142857142858e-06, "loss": 40.5045, "step": 10108 }, { "epoch": 240.69253731343284, "grad_norm": 1.3920153379440308, "learning_rate": 9.576190476190477e-06, "loss": 39.41, "step": 10109 }, { "epoch": 240.71641791044777, "grad_norm": 1.400046706199646, "learning_rate": 9.575238095238096e-06, "loss": 41.1641, "step": 10110 }, { "epoch": 240.74029850746268, "grad_norm": 1.3463674783706665, "learning_rate": 9.574285714285715e-06, "loss": 38.8759, "step": 10111 }, { "epoch": 240.76417910447762, "grad_norm": 1.4032855033874512, "learning_rate": 9.573333333333334e-06, "loss": 39.9551, "step": 10112 }, { "epoch": 240.78805970149253, "grad_norm": 1.4310637712478638, "learning_rate": 9.572380952380953e-06, "loss": 39.1059, "step": 10113 }, { "epoch": 240.81194029850747, "grad_norm": 1.3730343580245972, "learning_rate": 9.571428571428573e-06, "loss": 39.4799, "step": 10114 }, { "epoch": 240.83582089552237, "grad_norm": 1.3235242366790771, "learning_rate": 9.57047619047619e-06, "loss": 38.2937, "step": 10115 }, { "epoch": 240.8597014925373, "grad_norm": 1.366331934928894, "learning_rate": 9.569523809523811e-06, "loss": 39.6515, "step": 10116 }, { "epoch": 240.88358208955225, "grad_norm": 1.3398170471191406, "learning_rate": 9.56857142857143e-06, "loss": 39.8471, "step": 10117 }, { "epoch": 240.90746268656716, "grad_norm": 1.3589614629745483, "learning_rate": 9.567619047619049e-06, "loss": 39.231, "step": 10118 }, { "epoch": 240.9313432835821, "grad_norm": 1.3521586656570435, "learning_rate": 9.566666666666668e-06, "loss": 39.5145, "step": 10119 }, { "epoch": 240.955223880597, "grad_norm": 1.3171756267547607, "learning_rate": 9.565714285714287e-06, "loss": 39.7858, "step": 10120 }, { "epoch": 240.97910447761194, "grad_norm": NaN, "learning_rate": 9.564761904761905e-06, "loss": 46.0052, "step": 10121 }, { "epoch": 241.0, "grad_norm": 1.394946813583374, "learning_rate": 9.564761904761905e-06, "loss": 35.014, "step": 10122 }, { "epoch": 241.02388059701494, "grad_norm": 1.3385505676269531, "learning_rate": 9.563809523809524e-06, "loss": 40.1038, "step": 10123 }, { "epoch": 241.04776119402985, "grad_norm": 1.354965090751648, "learning_rate": 9.562857142857143e-06, "loss": 39.788, "step": 10124 }, { "epoch": 241.07164179104478, "grad_norm": 1.4172148704528809, "learning_rate": 9.561904761904762e-06, "loss": 39.4323, "step": 10125 }, { "epoch": 241.0955223880597, "grad_norm": 1.3758008480072021, "learning_rate": 9.560952380952381e-06, "loss": 38.9985, "step": 10126 }, { "epoch": 241.11940298507463, "grad_norm": 1.3569586277008057, "learning_rate": 9.56e-06, "loss": 41.3691, "step": 10127 }, { "epoch": 241.14328358208957, "grad_norm": 1.2954137325286865, "learning_rate": 9.55904761904762e-06, "loss": 40.3496, "step": 10128 }, { "epoch": 241.16716417910447, "grad_norm": 1.3598424196243286, "learning_rate": 9.558095238095238e-06, "loss": 39.2826, "step": 10129 }, { "epoch": 241.1910447761194, "grad_norm": 1.3665971755981445, "learning_rate": 9.557142857142858e-06, "loss": 40.298, "step": 10130 }, { "epoch": 241.21492537313432, "grad_norm": 1.3831102848052979, "learning_rate": 9.556190476190477e-06, "loss": 38.2073, "step": 10131 }, { "epoch": 241.23880597014926, "grad_norm": 1.367124319076538, "learning_rate": 9.555238095238096e-06, "loss": 40.1861, "step": 10132 }, { "epoch": 241.26268656716417, "grad_norm": 1.3463385105133057, "learning_rate": 9.554285714285715e-06, "loss": 40.119, "step": 10133 }, { "epoch": 241.2865671641791, "grad_norm": 1.3403927087783813, "learning_rate": 9.553333333333334e-06, "loss": 39.4965, "step": 10134 }, { "epoch": 241.31044776119404, "grad_norm": 1.3577483892440796, "learning_rate": 9.552380952380953e-06, "loss": 38.7226, "step": 10135 }, { "epoch": 241.33432835820895, "grad_norm": 1.3568179607391357, "learning_rate": 9.551428571428573e-06, "loss": 40.3887, "step": 10136 }, { "epoch": 241.3582089552239, "grad_norm": 1.335292100906372, "learning_rate": 9.55047619047619e-06, "loss": 40.7288, "step": 10137 }, { "epoch": 241.3820895522388, "grad_norm": 1.335775375366211, "learning_rate": 9.549523809523811e-06, "loss": 40.0887, "step": 10138 }, { "epoch": 241.40597014925373, "grad_norm": 1.3527525663375854, "learning_rate": 9.54857142857143e-06, "loss": 40.5913, "step": 10139 }, { "epoch": 241.42985074626867, "grad_norm": 1.3148573637008667, "learning_rate": 9.547619047619049e-06, "loss": 39.9062, "step": 10140 }, { "epoch": 241.45373134328358, "grad_norm": 1.3413747549057007, "learning_rate": 9.546666666666668e-06, "loss": 38.3378, "step": 10141 }, { "epoch": 241.47761194029852, "grad_norm": 1.379338264465332, "learning_rate": 9.545714285714287e-06, "loss": 38.7231, "step": 10142 }, { "epoch": 241.50149253731342, "grad_norm": NaN, "learning_rate": 9.544761904761906e-06, "loss": 54.8682, "step": 10143 }, { "epoch": 241.52537313432836, "grad_norm": 1.3514318466186523, "learning_rate": 9.544761904761906e-06, "loss": 39.4054, "step": 10144 }, { "epoch": 241.54925373134327, "grad_norm": 1.3373392820358276, "learning_rate": 9.543809523809525e-06, "loss": 39.3932, "step": 10145 }, { "epoch": 241.5731343283582, "grad_norm": 1.377579927444458, "learning_rate": 9.542857142857143e-06, "loss": 38.9225, "step": 10146 }, { "epoch": 241.59701492537314, "grad_norm": 1.377698302268982, "learning_rate": 9.541904761904762e-06, "loss": 38.668, "step": 10147 }, { "epoch": 241.62089552238805, "grad_norm": 1.3710377216339111, "learning_rate": 9.540952380952381e-06, "loss": 40.5499, "step": 10148 }, { "epoch": 241.644776119403, "grad_norm": 1.4293427467346191, "learning_rate": 9.54e-06, "loss": 38.3944, "step": 10149 }, { "epoch": 241.6686567164179, "grad_norm": 1.3529118299484253, "learning_rate": 9.53904761904762e-06, "loss": 40.6977, "step": 10150 }, { "epoch": 241.69253731343284, "grad_norm": 1.3474223613739014, "learning_rate": 9.538095238095238e-06, "loss": 41.0103, "step": 10151 }, { "epoch": 241.71641791044777, "grad_norm": 1.3695334196090698, "learning_rate": 9.537142857142859e-06, "loss": 39.6032, "step": 10152 }, { "epoch": 241.74029850746268, "grad_norm": 1.3576816320419312, "learning_rate": 9.536190476190477e-06, "loss": 39.9718, "step": 10153 }, { "epoch": 241.76417910447762, "grad_norm": 1.325994849205017, "learning_rate": 9.535238095238096e-06, "loss": 39.7379, "step": 10154 }, { "epoch": 241.78805970149253, "grad_norm": 1.3426984548568726, "learning_rate": 9.534285714285715e-06, "loss": 40.296, "step": 10155 }, { "epoch": 241.81194029850747, "grad_norm": 1.3457149267196655, "learning_rate": 9.533333333333334e-06, "loss": 40.6087, "step": 10156 }, { "epoch": 241.83582089552237, "grad_norm": 1.3379700183868408, "learning_rate": 9.532380952380953e-06, "loss": 39.1812, "step": 10157 }, { "epoch": 241.8597014925373, "grad_norm": 1.3858633041381836, "learning_rate": 9.531428571428572e-06, "loss": 39.0843, "step": 10158 }, { "epoch": 241.88358208955225, "grad_norm": 1.343712568283081, "learning_rate": 9.53047619047619e-06, "loss": 39.4924, "step": 10159 }, { "epoch": 241.90746268656716, "grad_norm": 1.4119653701782227, "learning_rate": 9.529523809523811e-06, "loss": 40.4585, "step": 10160 }, { "epoch": 241.9313432835821, "grad_norm": 1.354597568511963, "learning_rate": 9.528571428571429e-06, "loss": 41.1899, "step": 10161 }, { "epoch": 241.955223880597, "grad_norm": 1.3248164653778076, "learning_rate": 9.52761904761905e-06, "loss": 41.1478, "step": 10162 }, { "epoch": 241.97910447761194, "grad_norm": 1.3953194618225098, "learning_rate": 9.526666666666668e-06, "loss": 38.3818, "step": 10163 }, { "epoch": 242.0, "grad_norm": 1.3757054805755615, "learning_rate": 9.525714285714287e-06, "loss": 34.7528, "step": 10164 }, { "epoch": 242.02388059701494, "grad_norm": 1.3356916904449463, "learning_rate": 9.524761904761906e-06, "loss": 40.921, "step": 10165 }, { "epoch": 242.04776119402985, "grad_norm": 1.348915696144104, "learning_rate": 9.523809523809525e-06, "loss": 40.5735, "step": 10166 }, { "epoch": 242.07164179104478, "grad_norm": 1.3434497117996216, "learning_rate": 9.522857142857144e-06, "loss": 38.7756, "step": 10167 }, { "epoch": 242.0955223880597, "grad_norm": 1.337519645690918, "learning_rate": 9.521904761904763e-06, "loss": 38.9566, "step": 10168 }, { "epoch": 242.11940298507463, "grad_norm": 1.2932915687561035, "learning_rate": 9.520952380952381e-06, "loss": 40.1132, "step": 10169 }, { "epoch": 242.14328358208957, "grad_norm": 1.3795896768569946, "learning_rate": 9.52e-06, "loss": 39.5488, "step": 10170 }, { "epoch": 242.16716417910447, "grad_norm": 1.3320775032043457, "learning_rate": 9.51904761904762e-06, "loss": 39.6367, "step": 10171 }, { "epoch": 242.1910447761194, "grad_norm": 1.3580714464187622, "learning_rate": 9.518095238095238e-06, "loss": 39.79, "step": 10172 }, { "epoch": 242.21492537313432, "grad_norm": 1.3177366256713867, "learning_rate": 9.517142857142859e-06, "loss": 40.2077, "step": 10173 }, { "epoch": 242.23880597014926, "grad_norm": 1.3467273712158203, "learning_rate": 9.516190476190476e-06, "loss": 39.1344, "step": 10174 }, { "epoch": 242.26268656716417, "grad_norm": 1.3820717334747314, "learning_rate": 9.515238095238097e-06, "loss": 39.5341, "step": 10175 }, { "epoch": 242.2865671641791, "grad_norm": 1.3518978357315063, "learning_rate": 9.514285714285715e-06, "loss": 38.3472, "step": 10176 }, { "epoch": 242.31044776119404, "grad_norm": 1.315094232559204, "learning_rate": 9.513333333333334e-06, "loss": 38.6729, "step": 10177 }, { "epoch": 242.33432835820895, "grad_norm": 1.3382830619812012, "learning_rate": 9.512380952380953e-06, "loss": 39.7524, "step": 10178 }, { "epoch": 242.3582089552239, "grad_norm": 1.4210933446884155, "learning_rate": 9.511428571428572e-06, "loss": 41.0884, "step": 10179 }, { "epoch": 242.3820895522388, "grad_norm": 1.327660083770752, "learning_rate": 9.510476190476191e-06, "loss": 40.3709, "step": 10180 }, { "epoch": 242.40597014925373, "grad_norm": 1.3846632242202759, "learning_rate": 9.50952380952381e-06, "loss": 39.9969, "step": 10181 }, { "epoch": 242.42985074626867, "grad_norm": 1.3436410427093506, "learning_rate": 9.508571428571429e-06, "loss": 39.7458, "step": 10182 }, { "epoch": 242.45373134328358, "grad_norm": 1.3935269117355347, "learning_rate": 9.50761904761905e-06, "loss": 39.9704, "step": 10183 }, { "epoch": 242.47761194029852, "grad_norm": 1.3554819822311401, "learning_rate": 9.506666666666667e-06, "loss": 40.2842, "step": 10184 }, { "epoch": 242.50149253731342, "grad_norm": 1.3457695245742798, "learning_rate": 9.505714285714287e-06, "loss": 38.3113, "step": 10185 }, { "epoch": 242.52537313432836, "grad_norm": 1.3350502252578735, "learning_rate": 9.504761904761906e-06, "loss": 39.9397, "step": 10186 }, { "epoch": 242.54925373134327, "grad_norm": 1.3252825736999512, "learning_rate": 9.503809523809523e-06, "loss": 40.4813, "step": 10187 }, { "epoch": 242.5731343283582, "grad_norm": 1.3411036729812622, "learning_rate": 9.502857142857144e-06, "loss": 40.5714, "step": 10188 }, { "epoch": 242.59701492537314, "grad_norm": 1.3021043539047241, "learning_rate": 9.501904761904763e-06, "loss": 38.1151, "step": 10189 }, { "epoch": 242.62089552238805, "grad_norm": 1.3361033201217651, "learning_rate": 9.500952380952382e-06, "loss": 39.6945, "step": 10190 }, { "epoch": 242.644776119403, "grad_norm": 1.3336172103881836, "learning_rate": 9.5e-06, "loss": 40.2183, "step": 10191 }, { "epoch": 242.6686567164179, "grad_norm": 1.3438589572906494, "learning_rate": 9.49904761904762e-06, "loss": 40.1104, "step": 10192 }, { "epoch": 242.69253731343284, "grad_norm": 1.3332549333572388, "learning_rate": 9.498095238095238e-06, "loss": 39.6026, "step": 10193 }, { "epoch": 242.71641791044777, "grad_norm": 1.3208324909210205, "learning_rate": 9.497142857142859e-06, "loss": 39.455, "step": 10194 }, { "epoch": 242.74029850746268, "grad_norm": 1.3234196901321411, "learning_rate": 9.496190476190476e-06, "loss": 39.3528, "step": 10195 }, { "epoch": 242.76417910447762, "grad_norm": 1.3628093004226685, "learning_rate": 9.495238095238097e-06, "loss": 39.1155, "step": 10196 }, { "epoch": 242.78805970149253, "grad_norm": 1.3254151344299316, "learning_rate": 9.494285714285716e-06, "loss": 41.1081, "step": 10197 }, { "epoch": 242.81194029850747, "grad_norm": 1.3576419353485107, "learning_rate": 9.493333333333334e-06, "loss": 39.7255, "step": 10198 }, { "epoch": 242.83582089552237, "grad_norm": 1.3148831129074097, "learning_rate": 9.492380952380953e-06, "loss": 39.3346, "step": 10199 }, { "epoch": 242.8597014925373, "grad_norm": 1.3787294626235962, "learning_rate": 9.491428571428572e-06, "loss": 40.9116, "step": 10200 }, { "epoch": 242.88358208955225, "grad_norm": 1.3658524751663208, "learning_rate": 9.490476190476191e-06, "loss": 39.2913, "step": 10201 }, { "epoch": 242.90746268656716, "grad_norm": 1.502685546875, "learning_rate": 9.48952380952381e-06, "loss": 39.8806, "step": 10202 }, { "epoch": 242.9313432835821, "grad_norm": 1.3787322044372559, "learning_rate": 9.488571428571429e-06, "loss": 40.0333, "step": 10203 }, { "epoch": 242.955223880597, "grad_norm": 1.3681365251541138, "learning_rate": 9.48761904761905e-06, "loss": 39.5917, "step": 10204 }, { "epoch": 242.97910447761194, "grad_norm": 1.3847987651824951, "learning_rate": 9.486666666666667e-06, "loss": 39.6156, "step": 10205 }, { "epoch": 243.0, "grad_norm": 1.2977824211120605, "learning_rate": 9.485714285714287e-06, "loss": 35.2914, "step": 10206 }, { "epoch": 243.02388059701494, "grad_norm": 1.328012228012085, "learning_rate": 9.484761904761906e-06, "loss": 39.1106, "step": 10207 }, { "epoch": 243.04776119402985, "grad_norm": 1.3340357542037964, "learning_rate": 9.483809523809525e-06, "loss": 39.4827, "step": 10208 }, { "epoch": 243.07164179104478, "grad_norm": 1.3896691799163818, "learning_rate": 9.482857142857144e-06, "loss": 39.7024, "step": 10209 }, { "epoch": 243.0955223880597, "grad_norm": 1.363438367843628, "learning_rate": 9.481904761904763e-06, "loss": 38.9778, "step": 10210 }, { "epoch": 243.11940298507463, "grad_norm": 1.3811618089675903, "learning_rate": 9.480952380952382e-06, "loss": 40.0185, "step": 10211 }, { "epoch": 243.14328358208957, "grad_norm": 1.3657798767089844, "learning_rate": 9.48e-06, "loss": 40.0734, "step": 10212 }, { "epoch": 243.16716417910447, "grad_norm": 1.3661551475524902, "learning_rate": 9.47904761904762e-06, "loss": 40.6951, "step": 10213 }, { "epoch": 243.1910447761194, "grad_norm": 1.3673831224441528, "learning_rate": 9.478095238095239e-06, "loss": 40.1577, "step": 10214 }, { "epoch": 243.21492537313432, "grad_norm": 1.3369941711425781, "learning_rate": 9.477142857142857e-06, "loss": 39.9079, "step": 10215 }, { "epoch": 243.23880597014926, "grad_norm": 1.3638155460357666, "learning_rate": 9.476190476190476e-06, "loss": 40.4406, "step": 10216 }, { "epoch": 243.26268656716417, "grad_norm": 1.3622713088989258, "learning_rate": 9.475238095238097e-06, "loss": 39.6756, "step": 10217 }, { "epoch": 243.2865671641791, "grad_norm": 1.3063126802444458, "learning_rate": 9.474285714285714e-06, "loss": 41.2002, "step": 10218 }, { "epoch": 243.31044776119404, "grad_norm": 1.3972375392913818, "learning_rate": 9.473333333333335e-06, "loss": 38.2947, "step": 10219 }, { "epoch": 243.33432835820895, "grad_norm": 1.3521735668182373, "learning_rate": 9.472380952380954e-06, "loss": 40.0237, "step": 10220 }, { "epoch": 243.3582089552239, "grad_norm": 1.349782943725586, "learning_rate": 9.471428571428572e-06, "loss": 40.074, "step": 10221 }, { "epoch": 243.3820895522388, "grad_norm": 1.35495924949646, "learning_rate": 9.470476190476191e-06, "loss": 39.019, "step": 10222 }, { "epoch": 243.40597014925373, "grad_norm": 1.3509241342544556, "learning_rate": 9.46952380952381e-06, "loss": 39.4437, "step": 10223 }, { "epoch": 243.42985074626867, "grad_norm": 1.364445447921753, "learning_rate": 9.46857142857143e-06, "loss": 39.3548, "step": 10224 }, { "epoch": 243.45373134328358, "grad_norm": 1.4013049602508545, "learning_rate": 9.467619047619048e-06, "loss": 38.032, "step": 10225 }, { "epoch": 243.47761194029852, "grad_norm": 1.3863365650177002, "learning_rate": 9.466666666666667e-06, "loss": 40.471, "step": 10226 }, { "epoch": 243.50149253731342, "grad_norm": 1.3517918586730957, "learning_rate": 9.465714285714288e-06, "loss": 40.3482, "step": 10227 }, { "epoch": 243.52537313432836, "grad_norm": 1.3591686487197876, "learning_rate": 9.464761904761905e-06, "loss": 40.376, "step": 10228 }, { "epoch": 243.54925373134327, "grad_norm": 1.329001545906067, "learning_rate": 9.463809523809525e-06, "loss": 39.0343, "step": 10229 }, { "epoch": 243.5731343283582, "grad_norm": 1.3354699611663818, "learning_rate": 9.462857142857144e-06, "loss": 40.4506, "step": 10230 }, { "epoch": 243.59701492537314, "grad_norm": 1.3438509702682495, "learning_rate": 9.461904761904761e-06, "loss": 39.8422, "step": 10231 }, { "epoch": 243.62089552238805, "grad_norm": 1.377761960029602, "learning_rate": 9.460952380952382e-06, "loss": 39.9337, "step": 10232 }, { "epoch": 243.644776119403, "grad_norm": 1.3720049858093262, "learning_rate": 9.460000000000001e-06, "loss": 39.7278, "step": 10233 }, { "epoch": 243.6686567164179, "grad_norm": 1.3201242685317993, "learning_rate": 9.45904761904762e-06, "loss": 39.5372, "step": 10234 }, { "epoch": 243.69253731343284, "grad_norm": 1.3201614618301392, "learning_rate": 9.458095238095239e-06, "loss": 39.4748, "step": 10235 }, { "epoch": 243.71641791044777, "grad_norm": 1.347362756729126, "learning_rate": 9.457142857142858e-06, "loss": 40.6591, "step": 10236 }, { "epoch": 243.74029850746268, "grad_norm": 1.342514991760254, "learning_rate": 9.456190476190476e-06, "loss": 39.3275, "step": 10237 }, { "epoch": 243.76417910447762, "grad_norm": 1.344639539718628, "learning_rate": 9.455238095238095e-06, "loss": 38.9312, "step": 10238 }, { "epoch": 243.78805970149253, "grad_norm": 1.3336161375045776, "learning_rate": 9.454285714285714e-06, "loss": 39.3336, "step": 10239 }, { "epoch": 243.81194029850747, "grad_norm": NaN, "learning_rate": 9.453333333333335e-06, "loss": 65.3465, "step": 10240 }, { "epoch": 243.83582089552237, "grad_norm": 1.3922655582427979, "learning_rate": 9.453333333333335e-06, "loss": 39.3098, "step": 10241 }, { "epoch": 243.8597014925373, "grad_norm": 1.3754903078079224, "learning_rate": 9.452380952380952e-06, "loss": 40.2301, "step": 10242 }, { "epoch": 243.88358208955225, "grad_norm": 1.361156702041626, "learning_rate": 9.451428571428573e-06, "loss": 39.9251, "step": 10243 }, { "epoch": 243.90746268656716, "grad_norm": 1.3672866821289062, "learning_rate": 9.450476190476192e-06, "loss": 39.2433, "step": 10244 }, { "epoch": 243.9313432835821, "grad_norm": 1.3543000221252441, "learning_rate": 9.44952380952381e-06, "loss": 41.1848, "step": 10245 }, { "epoch": 243.955223880597, "grad_norm": 1.32802414894104, "learning_rate": 9.44857142857143e-06, "loss": 39.0559, "step": 10246 }, { "epoch": 243.97910447761194, "grad_norm": 1.3989489078521729, "learning_rate": 9.447619047619048e-06, "loss": 39.5765, "step": 10247 }, { "epoch": 244.0, "grad_norm": 1.3782975673675537, "learning_rate": 9.446666666666667e-06, "loss": 35.0751, "step": 10248 }, { "epoch": 244.02388059701494, "grad_norm": 1.3099610805511475, "learning_rate": 9.445714285714288e-06, "loss": 40.2765, "step": 10249 }, { "epoch": 244.04776119402985, "grad_norm": 1.318139910697937, "learning_rate": 9.444761904761905e-06, "loss": 39.7183, "step": 10250 }, { "epoch": 244.07164179104478, "grad_norm": 1.3524892330169678, "learning_rate": 9.443809523809526e-06, "loss": 40.044, "step": 10251 }, { "epoch": 244.0955223880597, "grad_norm": 1.3500128984451294, "learning_rate": 9.442857142857144e-06, "loss": 38.8832, "step": 10252 }, { "epoch": 244.11940298507463, "grad_norm": 1.3701260089874268, "learning_rate": 9.441904761904762e-06, "loss": 39.8107, "step": 10253 }, { "epoch": 244.14328358208957, "grad_norm": 1.3541357517242432, "learning_rate": 9.440952380952382e-06, "loss": 40.2343, "step": 10254 }, { "epoch": 244.16716417910447, "grad_norm": 1.372961401939392, "learning_rate": 9.440000000000001e-06, "loss": 39.4735, "step": 10255 }, { "epoch": 244.1910447761194, "grad_norm": NaN, "learning_rate": 9.43904761904762e-06, "loss": 69.8166, "step": 10256 }, { "epoch": 244.21492537313432, "grad_norm": 1.3248546123504639, "learning_rate": 9.43904761904762e-06, "loss": 38.4931, "step": 10257 }, { "epoch": 244.23880597014926, "grad_norm": 1.3421120643615723, "learning_rate": 9.438095238095239e-06, "loss": 39.4754, "step": 10258 }, { "epoch": 244.26268656716417, "grad_norm": 1.3634469509124756, "learning_rate": 9.437142857142858e-06, "loss": 39.287, "step": 10259 }, { "epoch": 244.2865671641791, "grad_norm": 1.3733865022659302, "learning_rate": 9.436190476190477e-06, "loss": 39.5141, "step": 10260 }, { "epoch": 244.31044776119404, "grad_norm": 1.347477674484253, "learning_rate": 9.435238095238096e-06, "loss": 40.2354, "step": 10261 }, { "epoch": 244.33432835820895, "grad_norm": 1.3411870002746582, "learning_rate": 9.434285714285714e-06, "loss": 39.1989, "step": 10262 }, { "epoch": 244.3582089552239, "grad_norm": 1.4244188070297241, "learning_rate": 9.433333333333335e-06, "loss": 40.8027, "step": 10263 }, { "epoch": 244.3820895522388, "grad_norm": 1.3227014541625977, "learning_rate": 9.432380952380952e-06, "loss": 39.846, "step": 10264 }, { "epoch": 244.40597014925373, "grad_norm": 1.37709379196167, "learning_rate": 9.431428571428573e-06, "loss": 39.1325, "step": 10265 }, { "epoch": 244.42985074626867, "grad_norm": 1.4148951768875122, "learning_rate": 9.430476190476192e-06, "loss": 40.1607, "step": 10266 }, { "epoch": 244.45373134328358, "grad_norm": 1.3261719942092896, "learning_rate": 9.42952380952381e-06, "loss": 39.8652, "step": 10267 }, { "epoch": 244.47761194029852, "grad_norm": 1.3309087753295898, "learning_rate": 9.42857142857143e-06, "loss": 40.3522, "step": 10268 }, { "epoch": 244.50149253731342, "grad_norm": 1.425939917564392, "learning_rate": 9.427619047619048e-06, "loss": 39.9956, "step": 10269 }, { "epoch": 244.52537313432836, "grad_norm": 1.323218584060669, "learning_rate": 9.426666666666667e-06, "loss": 40.3208, "step": 10270 }, { "epoch": 244.54925373134327, "grad_norm": 1.3441535234451294, "learning_rate": 9.425714285714286e-06, "loss": 38.8674, "step": 10271 }, { "epoch": 244.5731343283582, "grad_norm": 1.3393616676330566, "learning_rate": 9.424761904761905e-06, "loss": 39.7414, "step": 10272 }, { "epoch": 244.59701492537314, "grad_norm": 1.348021388053894, "learning_rate": 9.423809523809526e-06, "loss": 39.6859, "step": 10273 }, { "epoch": 244.62089552238805, "grad_norm": 1.3835211992263794, "learning_rate": 9.422857142857143e-06, "loss": 40.6955, "step": 10274 }, { "epoch": 244.644776119403, "grad_norm": 1.3615269660949707, "learning_rate": 9.421904761904763e-06, "loss": 40.2337, "step": 10275 }, { "epoch": 244.6686567164179, "grad_norm": 1.346468210220337, "learning_rate": 9.420952380952382e-06, "loss": 39.2384, "step": 10276 }, { "epoch": 244.69253731343284, "grad_norm": 1.3493832349777222, "learning_rate": 9.42e-06, "loss": 38.9843, "step": 10277 }, { "epoch": 244.71641791044777, "grad_norm": 1.3454418182373047, "learning_rate": 9.41904761904762e-06, "loss": 39.8256, "step": 10278 }, { "epoch": 244.74029850746268, "grad_norm": 1.3407546281814575, "learning_rate": 9.418095238095239e-06, "loss": 40.7143, "step": 10279 }, { "epoch": 244.76417910447762, "grad_norm": 1.3867460489273071, "learning_rate": 9.417142857142858e-06, "loss": 39.8836, "step": 10280 }, { "epoch": 244.78805970149253, "grad_norm": 1.3426011800765991, "learning_rate": 9.416190476190477e-06, "loss": 40.1537, "step": 10281 }, { "epoch": 244.81194029850747, "grad_norm": 1.3480829000473022, "learning_rate": 9.415238095238096e-06, "loss": 39.9112, "step": 10282 }, { "epoch": 244.83582089552237, "grad_norm": 1.359261393547058, "learning_rate": 9.414285714285715e-06, "loss": 41.2185, "step": 10283 }, { "epoch": 244.8597014925373, "grad_norm": 1.3442498445510864, "learning_rate": 9.413333333333334e-06, "loss": 39.1735, "step": 10284 }, { "epoch": 244.88358208955225, "grad_norm": 1.3620028495788574, "learning_rate": 9.412380952380952e-06, "loss": 39.7481, "step": 10285 }, { "epoch": 244.90746268656716, "grad_norm": 1.3661936521530151, "learning_rate": 9.411428571428573e-06, "loss": 39.4522, "step": 10286 }, { "epoch": 244.9313432835821, "grad_norm": 1.3404555320739746, "learning_rate": 9.41047619047619e-06, "loss": 38.9953, "step": 10287 }, { "epoch": 244.955223880597, "grad_norm": 1.3390291929244995, "learning_rate": 9.40952380952381e-06, "loss": 40.0761, "step": 10288 }, { "epoch": 244.97910447761194, "grad_norm": 1.3647511005401611, "learning_rate": 9.40857142857143e-06, "loss": 39.0825, "step": 10289 }, { "epoch": 245.0, "grad_norm": 1.3580105304718018, "learning_rate": 9.407619047619049e-06, "loss": 33.8616, "step": 10290 }, { "epoch": 245.02388059701494, "grad_norm": 1.3261311054229736, "learning_rate": 9.406666666666668e-06, "loss": 39.8617, "step": 10291 }, { "epoch": 245.04776119402985, "grad_norm": 1.3887335062026978, "learning_rate": 9.405714285714286e-06, "loss": 38.3895, "step": 10292 }, { "epoch": 245.07164179104478, "grad_norm": 1.334335446357727, "learning_rate": 9.404761904761905e-06, "loss": 39.7776, "step": 10293 }, { "epoch": 245.0955223880597, "grad_norm": 1.2946217060089111, "learning_rate": 9.403809523809526e-06, "loss": 41.1845, "step": 10294 }, { "epoch": 245.11940298507463, "grad_norm": 1.331665277481079, "learning_rate": 9.402857142857143e-06, "loss": 39.6387, "step": 10295 }, { "epoch": 245.14328358208957, "grad_norm": 1.3450425863265991, "learning_rate": 9.401904761904764e-06, "loss": 39.813, "step": 10296 }, { "epoch": 245.16716417910447, "grad_norm": 1.3729890584945679, "learning_rate": 9.400952380952381e-06, "loss": 39.2698, "step": 10297 }, { "epoch": 245.1910447761194, "grad_norm": 1.307995080947876, "learning_rate": 9.4e-06, "loss": 39.7584, "step": 10298 }, { "epoch": 245.21492537313432, "grad_norm": 1.3427841663360596, "learning_rate": 9.39904761904762e-06, "loss": 39.921, "step": 10299 }, { "epoch": 245.23880597014926, "grad_norm": 1.4343734979629517, "learning_rate": 9.398095238095238e-06, "loss": 41.1727, "step": 10300 }, { "epoch": 245.26268656716417, "grad_norm": 1.3688098192214966, "learning_rate": 9.397142857142858e-06, "loss": 41.0167, "step": 10301 }, { "epoch": 245.2865671641791, "grad_norm": 1.3482954502105713, "learning_rate": 9.396190476190477e-06, "loss": 39.2241, "step": 10302 }, { "epoch": 245.31044776119404, "grad_norm": 1.3142026662826538, "learning_rate": 9.395238095238096e-06, "loss": 39.9134, "step": 10303 }, { "epoch": 245.33432835820895, "grad_norm": 1.3309519290924072, "learning_rate": 9.394285714285715e-06, "loss": 40.1054, "step": 10304 }, { "epoch": 245.3582089552239, "grad_norm": 1.332621455192566, "learning_rate": 9.393333333333334e-06, "loss": 40.1614, "step": 10305 }, { "epoch": 245.3820895522388, "grad_norm": 1.3765789270401, "learning_rate": 9.392380952380953e-06, "loss": 39.9788, "step": 10306 }, { "epoch": 245.40597014925373, "grad_norm": 1.3666057586669922, "learning_rate": 9.391428571428573e-06, "loss": 39.267, "step": 10307 }, { "epoch": 245.42985074626867, "grad_norm": 1.3742327690124512, "learning_rate": 9.39047619047619e-06, "loss": 38.6496, "step": 10308 }, { "epoch": 245.45373134328358, "grad_norm": 1.3336501121520996, "learning_rate": 9.389523809523811e-06, "loss": 40.4954, "step": 10309 }, { "epoch": 245.47761194029852, "grad_norm": 1.3270785808563232, "learning_rate": 9.38857142857143e-06, "loss": 39.4045, "step": 10310 }, { "epoch": 245.50149253731342, "grad_norm": 1.3653340339660645, "learning_rate": 9.387619047619049e-06, "loss": 40.0658, "step": 10311 }, { "epoch": 245.52537313432836, "grad_norm": NaN, "learning_rate": 9.386666666666668e-06, "loss": 48.9215, "step": 10312 }, { "epoch": 245.54925373134327, "grad_norm": 1.3404810428619385, "learning_rate": 9.386666666666668e-06, "loss": 40.1831, "step": 10313 }, { "epoch": 245.5731343283582, "grad_norm": 1.3892405033111572, "learning_rate": 9.385714285714287e-06, "loss": 39.6944, "step": 10314 }, { "epoch": 245.59701492537314, "grad_norm": 1.3367183208465576, "learning_rate": 9.384761904761906e-06, "loss": 40.1507, "step": 10315 }, { "epoch": 245.62089552238805, "grad_norm": 1.3552446365356445, "learning_rate": 9.383809523809524e-06, "loss": 39.5025, "step": 10316 }, { "epoch": 245.644776119403, "grad_norm": 1.3584622144699097, "learning_rate": 9.382857142857143e-06, "loss": 40.1896, "step": 10317 }, { "epoch": 245.6686567164179, "grad_norm": 1.3455445766448975, "learning_rate": 9.381904761904764e-06, "loss": 40.7685, "step": 10318 }, { "epoch": 245.69253731343284, "grad_norm": 1.3572133779525757, "learning_rate": 9.380952380952381e-06, "loss": 39.7047, "step": 10319 }, { "epoch": 245.71641791044777, "grad_norm": 1.3559789657592773, "learning_rate": 9.38e-06, "loss": 39.8564, "step": 10320 }, { "epoch": 245.74029850746268, "grad_norm": 1.3355987071990967, "learning_rate": 9.37904761904762e-06, "loss": 39.8331, "step": 10321 }, { "epoch": 245.76417910447762, "grad_norm": 1.3188799619674683, "learning_rate": 9.378095238095238e-06, "loss": 39.3534, "step": 10322 }, { "epoch": 245.78805970149253, "grad_norm": 1.398790955543518, "learning_rate": 9.377142857142858e-06, "loss": 39.5841, "step": 10323 }, { "epoch": 245.81194029850747, "grad_norm": 1.3656972646713257, "learning_rate": 9.376190476190477e-06, "loss": 40.6178, "step": 10324 }, { "epoch": 245.83582089552237, "grad_norm": 1.3827553987503052, "learning_rate": 9.375238095238096e-06, "loss": 39.3091, "step": 10325 }, { "epoch": 245.8597014925373, "grad_norm": 1.3362138271331787, "learning_rate": 9.374285714285715e-06, "loss": 39.9097, "step": 10326 }, { "epoch": 245.88358208955225, "grad_norm": 1.3954017162322998, "learning_rate": 9.373333333333334e-06, "loss": 40.1017, "step": 10327 }, { "epoch": 245.90746268656716, "grad_norm": 1.351642370223999, "learning_rate": 9.372380952380953e-06, "loss": 40.2945, "step": 10328 }, { "epoch": 245.9313432835821, "grad_norm": 1.337455153465271, "learning_rate": 9.371428571428572e-06, "loss": 39.3294, "step": 10329 }, { "epoch": 245.955223880597, "grad_norm": 1.3311225175857544, "learning_rate": 9.37047619047619e-06, "loss": 38.5953, "step": 10330 }, { "epoch": 245.97910447761194, "grad_norm": 1.3470896482467651, "learning_rate": 9.369523809523811e-06, "loss": 38.2299, "step": 10331 }, { "epoch": 246.0, "grad_norm": 1.3420820236206055, "learning_rate": 9.368571428571428e-06, "loss": 33.5063, "step": 10332 }, { "epoch": 246.02388059701494, "grad_norm": 1.3876551389694214, "learning_rate": 9.367619047619049e-06, "loss": 38.9146, "step": 10333 }, { "epoch": 246.04776119402985, "grad_norm": 1.3454464673995972, "learning_rate": 9.366666666666668e-06, "loss": 39.297, "step": 10334 }, { "epoch": 246.07164179104478, "grad_norm": 1.4836697578430176, "learning_rate": 9.365714285714287e-06, "loss": 39.1177, "step": 10335 }, { "epoch": 246.0955223880597, "grad_norm": 1.3545160293579102, "learning_rate": 9.364761904761906e-06, "loss": 38.2115, "step": 10336 }, { "epoch": 246.11940298507463, "grad_norm": 1.3471019268035889, "learning_rate": 9.363809523809525e-06, "loss": 38.785, "step": 10337 }, { "epoch": 246.14328358208957, "grad_norm": 1.3522493839263916, "learning_rate": 9.362857142857143e-06, "loss": 40.7842, "step": 10338 }, { "epoch": 246.16716417910447, "grad_norm": 1.4057724475860596, "learning_rate": 9.361904761904762e-06, "loss": 39.2357, "step": 10339 }, { "epoch": 246.1910447761194, "grad_norm": 1.3573944568634033, "learning_rate": 9.360952380952381e-06, "loss": 38.7793, "step": 10340 }, { "epoch": 246.21492537313432, "grad_norm": 1.423032283782959, "learning_rate": 9.360000000000002e-06, "loss": 40.588, "step": 10341 }, { "epoch": 246.23880597014926, "grad_norm": 1.3394354581832886, "learning_rate": 9.359047619047619e-06, "loss": 40.5429, "step": 10342 }, { "epoch": 246.26268656716417, "grad_norm": 1.3596118688583374, "learning_rate": 9.358095238095238e-06, "loss": 39.0203, "step": 10343 }, { "epoch": 246.2865671641791, "grad_norm": 1.3523012399673462, "learning_rate": 9.357142857142859e-06, "loss": 39.7898, "step": 10344 }, { "epoch": 246.31044776119404, "grad_norm": 1.3819859027862549, "learning_rate": 9.356190476190476e-06, "loss": 39.2939, "step": 10345 }, { "epoch": 246.33432835820895, "grad_norm": 1.3397200107574463, "learning_rate": 9.355238095238096e-06, "loss": 39.7686, "step": 10346 }, { "epoch": 246.3582089552239, "grad_norm": 1.3442409038543701, "learning_rate": 9.354285714285715e-06, "loss": 39.8084, "step": 10347 }, { "epoch": 246.3820895522388, "grad_norm": 1.3767480850219727, "learning_rate": 9.353333333333334e-06, "loss": 39.9127, "step": 10348 }, { "epoch": 246.40597014925373, "grad_norm": 1.3566482067108154, "learning_rate": 9.352380952380953e-06, "loss": 39.1786, "step": 10349 }, { "epoch": 246.42985074626867, "grad_norm": 1.3277831077575684, "learning_rate": 9.351428571428572e-06, "loss": 38.7898, "step": 10350 }, { "epoch": 246.45373134328358, "grad_norm": 1.356459379196167, "learning_rate": 9.35047619047619e-06, "loss": 39.4595, "step": 10351 }, { "epoch": 246.47761194029852, "grad_norm": 1.3681666851043701, "learning_rate": 9.34952380952381e-06, "loss": 40.0788, "step": 10352 }, { "epoch": 246.50149253731342, "grad_norm": 1.9000691175460815, "learning_rate": 9.348571428571429e-06, "loss": 41.1652, "step": 10353 }, { "epoch": 246.52537313432836, "grad_norm": 1.359844446182251, "learning_rate": 9.34761904761905e-06, "loss": 41.3404, "step": 10354 }, { "epoch": 246.54925373134327, "grad_norm": 1.3801852464675903, "learning_rate": 9.346666666666666e-06, "loss": 40.9087, "step": 10355 }, { "epoch": 246.5731343283582, "grad_norm": 1.3578726053237915, "learning_rate": 9.345714285714287e-06, "loss": 40.0298, "step": 10356 }, { "epoch": 246.59701492537314, "grad_norm": 1.3520194292068481, "learning_rate": 9.344761904761906e-06, "loss": 39.5326, "step": 10357 }, { "epoch": 246.62089552238805, "grad_norm": 1.3337996006011963, "learning_rate": 9.343809523809525e-06, "loss": 38.9949, "step": 10358 }, { "epoch": 246.644776119403, "grad_norm": 1.359753966331482, "learning_rate": 9.342857142857144e-06, "loss": 40.1075, "step": 10359 }, { "epoch": 246.6686567164179, "grad_norm": 1.3742188215255737, "learning_rate": 9.341904761904763e-06, "loss": 39.8152, "step": 10360 }, { "epoch": 246.69253731343284, "grad_norm": 1.3922412395477295, "learning_rate": 9.340952380952381e-06, "loss": 39.9785, "step": 10361 }, { "epoch": 246.71641791044777, "grad_norm": 1.353317379951477, "learning_rate": 9.340000000000002e-06, "loss": 40.1633, "step": 10362 }, { "epoch": 246.74029850746268, "grad_norm": 1.338786005973816, "learning_rate": 9.33904761904762e-06, "loss": 38.4515, "step": 10363 }, { "epoch": 246.76417910447762, "grad_norm": 1.3340404033660889, "learning_rate": 9.338095238095238e-06, "loss": 40.5023, "step": 10364 }, { "epoch": 246.78805970149253, "grad_norm": 1.3993886709213257, "learning_rate": 9.337142857142859e-06, "loss": 39.5605, "step": 10365 }, { "epoch": 246.81194029850747, "grad_norm": 1.365745186805725, "learning_rate": 9.336190476190476e-06, "loss": 40.909, "step": 10366 }, { "epoch": 246.83582089552237, "grad_norm": 1.3536626100540161, "learning_rate": 9.335238095238097e-06, "loss": 39.3713, "step": 10367 }, { "epoch": 246.8597014925373, "grad_norm": 1.3977662324905396, "learning_rate": 9.334285714285715e-06, "loss": 39.9396, "step": 10368 }, { "epoch": 246.88358208955225, "grad_norm": 1.3346747159957886, "learning_rate": 9.333333333333334e-06, "loss": 40.8116, "step": 10369 }, { "epoch": 246.90746268656716, "grad_norm": 1.3156347274780273, "learning_rate": 9.332380952380953e-06, "loss": 39.5312, "step": 10370 }, { "epoch": 246.9313432835821, "grad_norm": 1.3858150243759155, "learning_rate": 9.331428571428572e-06, "loss": 39.7451, "step": 10371 }, { "epoch": 246.955223880597, "grad_norm": 1.3553389310836792, "learning_rate": 9.330476190476191e-06, "loss": 39.8154, "step": 10372 }, { "epoch": 246.97910447761194, "grad_norm": 1.3642445802688599, "learning_rate": 9.32952380952381e-06, "loss": 40.5166, "step": 10373 }, { "epoch": 247.0, "grad_norm": 1.3529295921325684, "learning_rate": 9.328571428571429e-06, "loss": 34.1481, "step": 10374 }, { "epoch": 247.02388059701494, "grad_norm": 1.3820700645446777, "learning_rate": 9.32761904761905e-06, "loss": 39.8565, "step": 10375 }, { "epoch": 247.04776119402985, "grad_norm": 1.3308559656143188, "learning_rate": 9.326666666666667e-06, "loss": 40.7811, "step": 10376 }, { "epoch": 247.07164179104478, "grad_norm": 1.3444546461105347, "learning_rate": 9.325714285714287e-06, "loss": 39.9986, "step": 10377 }, { "epoch": 247.0955223880597, "grad_norm": 1.3342419862747192, "learning_rate": 9.324761904761906e-06, "loss": 39.9869, "step": 10378 }, { "epoch": 247.11940298507463, "grad_norm": 1.365648865699768, "learning_rate": 9.323809523809525e-06, "loss": 39.8819, "step": 10379 }, { "epoch": 247.14328358208957, "grad_norm": 1.3437520265579224, "learning_rate": 9.322857142857144e-06, "loss": 39.0826, "step": 10380 }, { "epoch": 247.16716417910447, "grad_norm": 1.332322359085083, "learning_rate": 9.321904761904763e-06, "loss": 40.1789, "step": 10381 }, { "epoch": 247.1910447761194, "grad_norm": 1.3513237237930298, "learning_rate": 9.320952380952382e-06, "loss": 40.5911, "step": 10382 }, { "epoch": 247.21492537313432, "grad_norm": 1.3504446744918823, "learning_rate": 9.32e-06, "loss": 39.4378, "step": 10383 }, { "epoch": 247.23880597014926, "grad_norm": 1.3429797887802124, "learning_rate": 9.31904761904762e-06, "loss": 38.5123, "step": 10384 }, { "epoch": 247.26268656716417, "grad_norm": 1.3484200239181519, "learning_rate": 9.318095238095238e-06, "loss": 39.2508, "step": 10385 }, { "epoch": 247.2865671641791, "grad_norm": 1.39887273311615, "learning_rate": 9.317142857142857e-06, "loss": 39.0208, "step": 10386 }, { "epoch": 247.31044776119404, "grad_norm": 1.3662382364273071, "learning_rate": 9.316190476190476e-06, "loss": 40.5419, "step": 10387 }, { "epoch": 247.33432835820895, "grad_norm": 1.3743329048156738, "learning_rate": 9.315238095238097e-06, "loss": 39.7277, "step": 10388 }, { "epoch": 247.3582089552239, "grad_norm": 1.3363882303237915, "learning_rate": 9.314285714285714e-06, "loss": 40.1277, "step": 10389 }, { "epoch": 247.3820895522388, "grad_norm": 1.4205633401870728, "learning_rate": 9.313333333333335e-06, "loss": 40.1857, "step": 10390 }, { "epoch": 247.40597014925373, "grad_norm": 1.3259003162384033, "learning_rate": 9.312380952380953e-06, "loss": 39.8382, "step": 10391 }, { "epoch": 247.42985074626867, "grad_norm": 1.3791624307632446, "learning_rate": 9.311428571428572e-06, "loss": 39.5635, "step": 10392 }, { "epoch": 247.45373134328358, "grad_norm": 1.3805805444717407, "learning_rate": 9.310476190476191e-06, "loss": 40.3915, "step": 10393 }, { "epoch": 247.47761194029852, "grad_norm": 1.3465080261230469, "learning_rate": 9.30952380952381e-06, "loss": 39.335, "step": 10394 }, { "epoch": 247.50149253731342, "grad_norm": 1.3920878171920776, "learning_rate": 9.308571428571429e-06, "loss": 39.1836, "step": 10395 }, { "epoch": 247.52537313432836, "grad_norm": 1.4554592370986938, "learning_rate": 9.307619047619048e-06, "loss": 39.7231, "step": 10396 }, { "epoch": 247.54925373134327, "grad_norm": 1.3166935443878174, "learning_rate": 9.306666666666667e-06, "loss": 39.3819, "step": 10397 }, { "epoch": 247.5731343283582, "grad_norm": 1.331278920173645, "learning_rate": 9.305714285714287e-06, "loss": 39.2866, "step": 10398 }, { "epoch": 247.59701492537314, "grad_norm": 1.3600852489471436, "learning_rate": 9.304761904761905e-06, "loss": 41.6483, "step": 10399 }, { "epoch": 247.62089552238805, "grad_norm": 1.3524738550186157, "learning_rate": 9.303809523809525e-06, "loss": 39.6535, "step": 10400 }, { "epoch": 247.644776119403, "grad_norm": 1.364173173904419, "learning_rate": 9.302857142857144e-06, "loss": 39.6322, "step": 10401 }, { "epoch": 247.6686567164179, "grad_norm": 1.3998594284057617, "learning_rate": 9.301904761904763e-06, "loss": 39.6776, "step": 10402 }, { "epoch": 247.69253731343284, "grad_norm": 1.3700085878372192, "learning_rate": 9.300952380952382e-06, "loss": 39.9016, "step": 10403 }, { "epoch": 247.71641791044777, "grad_norm": 1.343340516090393, "learning_rate": 9.3e-06, "loss": 38.2975, "step": 10404 }, { "epoch": 247.74029850746268, "grad_norm": 1.370579719543457, "learning_rate": 9.29904761904762e-06, "loss": 40.3933, "step": 10405 }, { "epoch": 247.76417910447762, "grad_norm": 1.323255181312561, "learning_rate": 9.29809523809524e-06, "loss": 39.2481, "step": 10406 }, { "epoch": 247.78805970149253, "grad_norm": 1.3542063236236572, "learning_rate": 9.297142857142857e-06, "loss": 40.7112, "step": 10407 }, { "epoch": 247.81194029850747, "grad_norm": 1.3516201972961426, "learning_rate": 9.296190476190476e-06, "loss": 40.0599, "step": 10408 }, { "epoch": 247.83582089552237, "grad_norm": 1.3723844289779663, "learning_rate": 9.295238095238095e-06, "loss": 39.4977, "step": 10409 }, { "epoch": 247.8597014925373, "grad_norm": 1.3856825828552246, "learning_rate": 9.294285714285714e-06, "loss": 40.1414, "step": 10410 }, { "epoch": 247.88358208955225, "grad_norm": 1.3391963243484497, "learning_rate": 9.293333333333335e-06, "loss": 39.4528, "step": 10411 }, { "epoch": 247.90746268656716, "grad_norm": 1.358086109161377, "learning_rate": 9.292380952380952e-06, "loss": 39.045, "step": 10412 }, { "epoch": 247.9313432835821, "grad_norm": 1.329856038093567, "learning_rate": 9.291428571428572e-06, "loss": 38.1729, "step": 10413 }, { "epoch": 247.955223880597, "grad_norm": 1.3915904760360718, "learning_rate": 9.290476190476191e-06, "loss": 39.3564, "step": 10414 }, { "epoch": 247.97910447761194, "grad_norm": 1.3477131128311157, "learning_rate": 9.28952380952381e-06, "loss": 39.6437, "step": 10415 }, { "epoch": 248.0, "grad_norm": 1.3890045881271362, "learning_rate": 9.28857142857143e-06, "loss": 36.1284, "step": 10416 }, { "epoch": 248.02388059701494, "grad_norm": 1.3725874423980713, "learning_rate": 9.287619047619048e-06, "loss": 38.3757, "step": 10417 }, { "epoch": 248.04776119402985, "grad_norm": 1.3257439136505127, "learning_rate": 9.286666666666667e-06, "loss": 40.2142, "step": 10418 }, { "epoch": 248.07164179104478, "grad_norm": 1.3618111610412598, "learning_rate": 9.285714285714288e-06, "loss": 39.0745, "step": 10419 }, { "epoch": 248.0955223880597, "grad_norm": 1.3307377099990845, "learning_rate": 9.284761904761905e-06, "loss": 38.0138, "step": 10420 }, { "epoch": 248.11940298507463, "grad_norm": 1.292189598083496, "learning_rate": 9.283809523809525e-06, "loss": 40.2612, "step": 10421 }, { "epoch": 248.14328358208957, "grad_norm": 1.3759983777999878, "learning_rate": 9.282857142857144e-06, "loss": 40.534, "step": 10422 }, { "epoch": 248.16716417910447, "grad_norm": 1.3245717287063599, "learning_rate": 9.281904761904763e-06, "loss": 38.6506, "step": 10423 }, { "epoch": 248.1910447761194, "grad_norm": 1.3610963821411133, "learning_rate": 9.280952380952382e-06, "loss": 39.7593, "step": 10424 }, { "epoch": 248.21492537313432, "grad_norm": 1.3848546743392944, "learning_rate": 9.280000000000001e-06, "loss": 38.2923, "step": 10425 }, { "epoch": 248.23880597014926, "grad_norm": 1.3081958293914795, "learning_rate": 9.27904761904762e-06, "loss": 39.7495, "step": 10426 }, { "epoch": 248.26268656716417, "grad_norm": 1.335105299949646, "learning_rate": 9.278095238095239e-06, "loss": 40.0022, "step": 10427 }, { "epoch": 248.2865671641791, "grad_norm": 1.3852819204330444, "learning_rate": 9.277142857142858e-06, "loss": 38.9548, "step": 10428 }, { "epoch": 248.31044776119404, "grad_norm": 1.3704179525375366, "learning_rate": 9.276190476190477e-06, "loss": 39.5889, "step": 10429 }, { "epoch": 248.33432835820895, "grad_norm": NaN, "learning_rate": 9.275238095238095e-06, "loss": 40.3433, "step": 10430 }, { "epoch": 248.3582089552239, "grad_norm": 1.3482799530029297, "learning_rate": 9.275238095238095e-06, "loss": 40.1806, "step": 10431 }, { "epoch": 248.3820895522388, "grad_norm": 1.3692665100097656, "learning_rate": 9.274285714285714e-06, "loss": 40.5051, "step": 10432 }, { "epoch": 248.40597014925373, "grad_norm": 1.3620270490646362, "learning_rate": 9.273333333333335e-06, "loss": 39.461, "step": 10433 }, { "epoch": 248.42985074626867, "grad_norm": 1.3838999271392822, "learning_rate": 9.272380952380952e-06, "loss": 40.5849, "step": 10434 }, { "epoch": 248.45373134328358, "grad_norm": 1.3854483366012573, "learning_rate": 9.271428571428573e-06, "loss": 40.1642, "step": 10435 }, { "epoch": 248.47761194029852, "grad_norm": 1.379987359046936, "learning_rate": 9.270476190476192e-06, "loss": 40.2401, "step": 10436 }, { "epoch": 248.50149253731342, "grad_norm": 1.3582227230072021, "learning_rate": 9.26952380952381e-06, "loss": 40.6036, "step": 10437 }, { "epoch": 248.52537313432836, "grad_norm": 1.35991370677948, "learning_rate": 9.26857142857143e-06, "loss": 40.278, "step": 10438 }, { "epoch": 248.54925373134327, "grad_norm": 1.3684972524642944, "learning_rate": 9.267619047619048e-06, "loss": 40.601, "step": 10439 }, { "epoch": 248.5731343283582, "grad_norm": 1.3803770542144775, "learning_rate": 9.266666666666667e-06, "loss": 41.0305, "step": 10440 }, { "epoch": 248.59701492537314, "grad_norm": 1.4369288682937622, "learning_rate": 9.265714285714286e-06, "loss": 40.0923, "step": 10441 }, { "epoch": 248.62089552238805, "grad_norm": 1.366847038269043, "learning_rate": 9.264761904761905e-06, "loss": 39.4645, "step": 10442 }, { "epoch": 248.644776119403, "grad_norm": 1.341105580329895, "learning_rate": 9.263809523809526e-06, "loss": 40.3488, "step": 10443 }, { "epoch": 248.6686567164179, "grad_norm": 1.3000922203063965, "learning_rate": 9.262857142857143e-06, "loss": 38.7112, "step": 10444 }, { "epoch": 248.69253731343284, "grad_norm": 1.327433466911316, "learning_rate": 9.261904761904763e-06, "loss": 38.7894, "step": 10445 }, { "epoch": 248.71641791044777, "grad_norm": 1.3675776720046997, "learning_rate": 9.260952380952382e-06, "loss": 40.1597, "step": 10446 }, { "epoch": 248.74029850746268, "grad_norm": 1.3363956212997437, "learning_rate": 9.260000000000001e-06, "loss": 39.9598, "step": 10447 }, { "epoch": 248.76417910447762, "grad_norm": 1.368316411972046, "learning_rate": 9.25904761904762e-06, "loss": 40.8491, "step": 10448 }, { "epoch": 248.78805970149253, "grad_norm": 1.3601373434066772, "learning_rate": 9.258095238095239e-06, "loss": 39.3751, "step": 10449 }, { "epoch": 248.81194029850747, "grad_norm": 1.3362337350845337, "learning_rate": 9.257142857142858e-06, "loss": 38.7883, "step": 10450 }, { "epoch": 248.83582089552237, "grad_norm": 1.3325012922286987, "learning_rate": 9.256190476190477e-06, "loss": 40.5029, "step": 10451 }, { "epoch": 248.8597014925373, "grad_norm": 1.3608150482177734, "learning_rate": 9.255238095238096e-06, "loss": 39.3321, "step": 10452 }, { "epoch": 248.88358208955225, "grad_norm": 1.3990724086761475, "learning_rate": 9.254285714285714e-06, "loss": 38.833, "step": 10453 }, { "epoch": 248.90746268656716, "grad_norm": 1.337786078453064, "learning_rate": 9.253333333333333e-06, "loss": 39.8368, "step": 10454 }, { "epoch": 248.9313432835821, "grad_norm": 1.3680533170700073, "learning_rate": 9.252380952380952e-06, "loss": 38.5424, "step": 10455 }, { "epoch": 248.955223880597, "grad_norm": 1.3311853408813477, "learning_rate": 9.251428571428573e-06, "loss": 38.8294, "step": 10456 }, { "epoch": 248.97910447761194, "grad_norm": 1.3733782768249512, "learning_rate": 9.25047619047619e-06, "loss": 40.3023, "step": 10457 }, { "epoch": 249.0, "grad_norm": 1.2939749956130981, "learning_rate": 9.24952380952381e-06, "loss": 36.1022, "step": 10458 }, { "epoch": 249.02388059701494, "grad_norm": 1.3760799169540405, "learning_rate": 9.24857142857143e-06, "loss": 40.0342, "step": 10459 }, { "epoch": 249.04776119402985, "grad_norm": 1.35152006149292, "learning_rate": 9.247619047619048e-06, "loss": 40.753, "step": 10460 }, { "epoch": 249.07164179104478, "grad_norm": 1.3389378786087036, "learning_rate": 9.246666666666667e-06, "loss": 39.8285, "step": 10461 }, { "epoch": 249.0955223880597, "grad_norm": 1.3370436429977417, "learning_rate": 9.245714285714286e-06, "loss": 39.5656, "step": 10462 }, { "epoch": 249.11940298507463, "grad_norm": 1.389830470085144, "learning_rate": 9.244761904761905e-06, "loss": 39.8112, "step": 10463 }, { "epoch": 249.14328358208957, "grad_norm": 1.3325996398925781, "learning_rate": 9.243809523809526e-06, "loss": 40.6101, "step": 10464 }, { "epoch": 249.16716417910447, "grad_norm": 1.3284125328063965, "learning_rate": 9.242857142857143e-06, "loss": 40.5431, "step": 10465 }, { "epoch": 249.1910447761194, "grad_norm": 1.3578250408172607, "learning_rate": 9.241904761904764e-06, "loss": 40.8726, "step": 10466 }, { "epoch": 249.21492537313432, "grad_norm": 1.3538992404937744, "learning_rate": 9.24095238095238e-06, "loss": 40.1633, "step": 10467 }, { "epoch": 249.23880597014926, "grad_norm": 1.3344141244888306, "learning_rate": 9.240000000000001e-06, "loss": 38.8318, "step": 10468 }, { "epoch": 249.26268656716417, "grad_norm": 1.339163899421692, "learning_rate": 9.23904761904762e-06, "loss": 39.3527, "step": 10469 }, { "epoch": 249.2865671641791, "grad_norm": 1.3145591020584106, "learning_rate": 9.238095238095239e-06, "loss": 39.9684, "step": 10470 }, { "epoch": 249.31044776119404, "grad_norm": 1.3245477676391602, "learning_rate": 9.237142857142858e-06, "loss": 40.6914, "step": 10471 }, { "epoch": 249.33432835820895, "grad_norm": 1.326460599899292, "learning_rate": 9.236190476190477e-06, "loss": 38.7843, "step": 10472 }, { "epoch": 249.3582089552239, "grad_norm": 1.4152820110321045, "learning_rate": 9.235238095238096e-06, "loss": 39.6567, "step": 10473 }, { "epoch": 249.3820895522388, "grad_norm": 1.3623358011245728, "learning_rate": 9.234285714285715e-06, "loss": 39.8633, "step": 10474 }, { "epoch": 249.40597014925373, "grad_norm": 1.3703948259353638, "learning_rate": 9.233333333333334e-06, "loss": 38.3635, "step": 10475 }, { "epoch": 249.42985074626867, "grad_norm": 1.3192030191421509, "learning_rate": 9.232380952380952e-06, "loss": 40.2674, "step": 10476 }, { "epoch": 249.45373134328358, "grad_norm": 1.3328429460525513, "learning_rate": 9.231428571428573e-06, "loss": 39.008, "step": 10477 }, { "epoch": 249.47761194029852, "grad_norm": 1.422727108001709, "learning_rate": 9.23047619047619e-06, "loss": 39.2373, "step": 10478 }, { "epoch": 249.50149253731342, "grad_norm": 1.3330609798431396, "learning_rate": 9.229523809523811e-06, "loss": 40.311, "step": 10479 }, { "epoch": 249.52537313432836, "grad_norm": 1.3395957946777344, "learning_rate": 9.22857142857143e-06, "loss": 39.2246, "step": 10480 }, { "epoch": 249.54925373134327, "grad_norm": 1.365294337272644, "learning_rate": 9.227619047619049e-06, "loss": 39.3959, "step": 10481 }, { "epoch": 249.5731343283582, "grad_norm": 1.328464388847351, "learning_rate": 9.226666666666668e-06, "loss": 40.0913, "step": 10482 }, { "epoch": 249.59701492537314, "grad_norm": 1.3930165767669678, "learning_rate": 9.225714285714286e-06, "loss": 39.7484, "step": 10483 }, { "epoch": 249.62089552238805, "grad_norm": 1.31330406665802, "learning_rate": 9.224761904761905e-06, "loss": 38.2537, "step": 10484 }, { "epoch": 249.644776119403, "grad_norm": 1.3565995693206787, "learning_rate": 9.223809523809524e-06, "loss": 39.4839, "step": 10485 }, { "epoch": 249.6686567164179, "grad_norm": 1.3598816394805908, "learning_rate": 9.222857142857143e-06, "loss": 40.2636, "step": 10486 }, { "epoch": 249.69253731343284, "grad_norm": 1.3581936359405518, "learning_rate": 9.221904761904764e-06, "loss": 38.7736, "step": 10487 }, { "epoch": 249.71641791044777, "grad_norm": 1.3664422035217285, "learning_rate": 9.220952380952381e-06, "loss": 40.6219, "step": 10488 }, { "epoch": 249.74029850746268, "grad_norm": 1.394587516784668, "learning_rate": 9.220000000000002e-06, "loss": 39.8208, "step": 10489 }, { "epoch": 249.76417910447762, "grad_norm": 1.3652254343032837, "learning_rate": 9.21904761904762e-06, "loss": 38.4655, "step": 10490 }, { "epoch": 249.78805970149253, "grad_norm": 1.380753993988037, "learning_rate": 9.21809523809524e-06, "loss": 38.3896, "step": 10491 }, { "epoch": 249.81194029850747, "grad_norm": 1.362686038017273, "learning_rate": 9.217142857142858e-06, "loss": 41.2414, "step": 10492 }, { "epoch": 249.83582089552237, "grad_norm": 1.3303611278533936, "learning_rate": 9.216190476190477e-06, "loss": 39.9489, "step": 10493 }, { "epoch": 249.8597014925373, "grad_norm": 1.3176674842834473, "learning_rate": 9.215238095238096e-06, "loss": 40.2496, "step": 10494 }, { "epoch": 249.88358208955225, "grad_norm": 1.3293453454971313, "learning_rate": 9.214285714285715e-06, "loss": 39.8423, "step": 10495 }, { "epoch": 249.90746268656716, "grad_norm": 1.3654402494430542, "learning_rate": 9.213333333333334e-06, "loss": 40.4273, "step": 10496 }, { "epoch": 249.9313432835821, "grad_norm": 1.390635371208191, "learning_rate": 9.212380952380953e-06, "loss": 39.173, "step": 10497 }, { "epoch": 249.955223880597, "grad_norm": 1.3706836700439453, "learning_rate": 9.211428571428572e-06, "loss": 39.125, "step": 10498 }, { "epoch": 249.97910447761194, "grad_norm": 1.3887364864349365, "learning_rate": 9.21047619047619e-06, "loss": 40.8947, "step": 10499 }, { "epoch": 250.0, "grad_norm": 1.3804928064346313, "learning_rate": 9.209523809523811e-06, "loss": 33.8724, "step": 10500 }, { "epoch": 250.0, "step": 10500, "total_flos": 5.205377471694062e+17, "train_loss": 3.1863947993687223, "train_runtime": 25251.0751, "train_samples_per_second": 52.988, "train_steps_per_second": 0.416 }, { "epoch": 250.02388059701494, "grad_norm": 2.1921679973602295, "learning_rate": 1e-05, "loss": 40.3698, "step": 10501 }, { "epoch": 250.04776119402985, "grad_norm": 2.089646577835083, "learning_rate": 9.999118165784834e-06, "loss": 40.6396, "step": 10502 }, { "epoch": 250.07164179104478, "grad_norm": 1.8944206237792969, "learning_rate": 9.998236331569667e-06, "loss": 39.1064, "step": 10503 }, { "epoch": 250.0955223880597, "grad_norm": 1.728524923324585, "learning_rate": 9.997354497354498e-06, "loss": 40.1112, "step": 10504 }, { "epoch": 250.11940298507463, "grad_norm": 1.7715049982070923, "learning_rate": 9.99647266313933e-06, "loss": 39.0035, "step": 10505 }, { "epoch": 250.14328358208957, "grad_norm": 1.475654125213623, "learning_rate": 9.995590828924163e-06, "loss": 39.6883, "step": 10506 }, { "epoch": 250.16716417910447, "grad_norm": 1.3564939498901367, "learning_rate": 9.994708994708996e-06, "loss": 39.8828, "step": 10507 }, { "epoch": 250.1910447761194, "grad_norm": 1.3825258016586304, "learning_rate": 9.993827160493827e-06, "loss": 38.8284, "step": 10508 }, { "epoch": 250.21492537313432, "grad_norm": 1.4197767972946167, "learning_rate": 9.99294532627866e-06, "loss": 39.028, "step": 10509 }, { "epoch": 250.23880597014926, "grad_norm": 1.4359097480773926, "learning_rate": 9.992063492063493e-06, "loss": 40.5121, "step": 10510 }, { "epoch": 250.26268656716417, "grad_norm": 1.531156063079834, "learning_rate": 9.991181657848326e-06, "loss": 40.7505, "step": 10511 }, { "epoch": 250.2865671641791, "grad_norm": 1.5069704055786133, "learning_rate": 9.990299823633158e-06, "loss": 40.0005, "step": 10512 }, { "epoch": 250.31044776119404, "grad_norm": 1.5270435810089111, "learning_rate": 9.989417989417989e-06, "loss": 40.325, "step": 10513 }, { "epoch": 250.33432835820895, "grad_norm": 1.488959550857544, "learning_rate": 9.988536155202822e-06, "loss": 40.5876, "step": 10514 }, { "epoch": 250.3582089552239, "grad_norm": 1.4776759147644043, "learning_rate": 9.987654320987655e-06, "loss": 39.5868, "step": 10515 }, { "epoch": 250.3820895522388, "grad_norm": 1.4377377033233643, "learning_rate": 9.986772486772488e-06, "loss": 40.0639, "step": 10516 }, { "epoch": 250.40597014925373, "grad_norm": 1.4020990133285522, "learning_rate": 9.98589065255732e-06, "loss": 40.4011, "step": 10517 }, { "epoch": 250.42985074626867, "grad_norm": 1.4457677602767944, "learning_rate": 9.985008818342153e-06, "loss": 38.6549, "step": 10518 }, { "epoch": 250.45373134328358, "grad_norm": 1.4033653736114502, "learning_rate": 9.984126984126986e-06, "loss": 40.1813, "step": 10519 }, { "epoch": 250.47761194029852, "grad_norm": 1.389251708984375, "learning_rate": 9.983245149911817e-06, "loss": 40.46, "step": 10520 }, { "epoch": 250.50149253731342, "grad_norm": 1.3875435590744019, "learning_rate": 9.982363315696649e-06, "loss": 39.9283, "step": 10521 }, { "epoch": 250.52537313432836, "grad_norm": 1.3576221466064453, "learning_rate": 9.981481481481482e-06, "loss": 41.0899, "step": 10522 }, { "epoch": 250.54925373134327, "grad_norm": 1.3914140462875366, "learning_rate": 9.980599647266315e-06, "loss": 39.4819, "step": 10523 }, { "epoch": 250.5731343283582, "grad_norm": 1.3724967241287231, "learning_rate": 9.979717813051148e-06, "loss": 40.0979, "step": 10524 }, { "epoch": 250.59701492537314, "grad_norm": 1.3789385557174683, "learning_rate": 9.97883597883598e-06, "loss": 39.6191, "step": 10525 }, { "epoch": 250.62089552238805, "grad_norm": 1.376076579093933, "learning_rate": 9.977954144620812e-06, "loss": 38.999, "step": 10526 }, { "epoch": 250.644776119403, "grad_norm": 1.3673099279403687, "learning_rate": 9.977072310405645e-06, "loss": 39.9709, "step": 10527 }, { "epoch": 250.6686567164179, "grad_norm": NaN, "learning_rate": 9.976190476190477e-06, "loss": 59.3968, "step": 10528 }, { "epoch": 250.69253731343284, "grad_norm": 1.3818362951278687, "learning_rate": 9.976190476190477e-06, "loss": 40.8689, "step": 10529 }, { "epoch": 250.71641791044777, "grad_norm": 1.360676884651184, "learning_rate": 9.97530864197531e-06, "loss": 39.3602, "step": 10530 }, { "epoch": 250.74029850746268, "grad_norm": 1.3306424617767334, "learning_rate": 9.974426807760141e-06, "loss": 40.161, "step": 10531 }, { "epoch": 250.76417910447762, "grad_norm": 1.3583126068115234, "learning_rate": 9.973544973544974e-06, "loss": 39.9154, "step": 10532 }, { "epoch": 250.78805970149253, "grad_norm": 1.349703073501587, "learning_rate": 9.972663139329807e-06, "loss": 40.1163, "step": 10533 }, { "epoch": 250.81194029850747, "grad_norm": 1.347726583480835, "learning_rate": 9.97178130511464e-06, "loss": 39.3017, "step": 10534 }, { "epoch": 250.83582089552237, "grad_norm": 1.396958351135254, "learning_rate": 9.970899470899472e-06, "loss": 39.5322, "step": 10535 }, { "epoch": 250.8597014925373, "grad_norm": 1.3526915311813354, "learning_rate": 9.970017636684303e-06, "loss": 40.0001, "step": 10536 }, { "epoch": 250.88358208955225, "grad_norm": 1.3773057460784912, "learning_rate": 9.969135802469136e-06, "loss": 39.768, "step": 10537 }, { "epoch": 250.90746268656716, "grad_norm": 1.386195182800293, "learning_rate": 9.968253968253969e-06, "loss": 40.422, "step": 10538 }, { "epoch": 250.9313432835821, "grad_norm": 1.3438739776611328, "learning_rate": 9.967372134038802e-06, "loss": 40.6697, "step": 10539 }, { "epoch": 250.955223880597, "grad_norm": 1.3530758619308472, "learning_rate": 9.966490299823634e-06, "loss": 38.6471, "step": 10540 }, { "epoch": 250.97910447761194, "grad_norm": 1.4011762142181396, "learning_rate": 9.965608465608467e-06, "loss": 39.5076, "step": 10541 }, { "epoch": 251.0, "grad_norm": 1.327392816543579, "learning_rate": 9.9647266313933e-06, "loss": 34.5679, "step": 10542 }, { "epoch": 251.02388059701494, "grad_norm": 1.3732423782348633, "learning_rate": 9.963844797178131e-06, "loss": 40.2118, "step": 10543 }, { "epoch": 251.04776119402985, "grad_norm": 1.3530257940292358, "learning_rate": 9.962962962962964e-06, "loss": 40.3369, "step": 10544 }, { "epoch": 251.07164179104478, "grad_norm": 1.387791633605957, "learning_rate": 9.962081128747795e-06, "loss": 38.293, "step": 10545 }, { "epoch": 251.0955223880597, "grad_norm": 1.3275604248046875, "learning_rate": 9.961199294532629e-06, "loss": 40.5037, "step": 10546 }, { "epoch": 251.11940298507463, "grad_norm": 1.386987328529358, "learning_rate": 9.960317460317462e-06, "loss": 41.0914, "step": 10547 }, { "epoch": 251.14328358208957, "grad_norm": 1.3550736904144287, "learning_rate": 9.959435626102295e-06, "loss": 39.5405, "step": 10548 }, { "epoch": 251.16716417910447, "grad_norm": 1.3612709045410156, "learning_rate": 9.958553791887126e-06, "loss": 40.3297, "step": 10549 }, { "epoch": 251.1910447761194, "grad_norm": 1.343617558479309, "learning_rate": 9.957671957671959e-06, "loss": 38.7391, "step": 10550 }, { "epoch": 251.21492537313432, "grad_norm": 1.3534482717514038, "learning_rate": 9.95679012345679e-06, "loss": 39.1071, "step": 10551 }, { "epoch": 251.23880597014926, "grad_norm": 1.3353281021118164, "learning_rate": 9.955908289241623e-06, "loss": 40.0802, "step": 10552 }, { "epoch": 251.26268656716417, "grad_norm": 1.4030379056930542, "learning_rate": 9.955026455026457e-06, "loss": 38.8194, "step": 10553 }, { "epoch": 251.2865671641791, "grad_norm": 1.3803681135177612, "learning_rate": 9.954144620811288e-06, "loss": 40.1716, "step": 10554 }, { "epoch": 251.31044776119404, "grad_norm": 1.3487125635147095, "learning_rate": 9.953262786596121e-06, "loss": 38.7713, "step": 10555 }, { "epoch": 251.33432835820895, "grad_norm": 1.3783968687057495, "learning_rate": 9.952380952380954e-06, "loss": 37.5006, "step": 10556 }, { "epoch": 251.3582089552239, "grad_norm": 1.3805662393569946, "learning_rate": 9.951499118165785e-06, "loss": 39.9114, "step": 10557 }, { "epoch": 251.3820895522388, "grad_norm": NaN, "learning_rate": 9.950617283950618e-06, "loss": 36.8356, "step": 10558 }, { "epoch": 251.40597014925373, "grad_norm": 1.3543481826782227, "learning_rate": 9.950617283950618e-06, "loss": 39.6192, "step": 10559 }, { "epoch": 251.42985074626867, "grad_norm": 1.3758713006973267, "learning_rate": 9.94973544973545e-06, "loss": 38.8706, "step": 10560 }, { "epoch": 251.45373134328358, "grad_norm": 1.46674382686615, "learning_rate": 9.948853615520283e-06, "loss": 39.3501, "step": 10561 }, { "epoch": 251.47761194029852, "grad_norm": 1.3740344047546387, "learning_rate": 9.947971781305116e-06, "loss": 40.8395, "step": 10562 }, { "epoch": 251.50149253731342, "grad_norm": 1.3957149982452393, "learning_rate": 9.947089947089947e-06, "loss": 39.8343, "step": 10563 }, { "epoch": 251.52537313432836, "grad_norm": 1.381754755973816, "learning_rate": 9.94620811287478e-06, "loss": 38.48, "step": 10564 }, { "epoch": 251.54925373134327, "grad_norm": 1.3913391828536987, "learning_rate": 9.945326278659613e-06, "loss": 39.469, "step": 10565 }, { "epoch": 251.5731343283582, "grad_norm": 1.3768842220306396, "learning_rate": 9.944444444444445e-06, "loss": 40.6781, "step": 10566 }, { "epoch": 251.59701492537314, "grad_norm": 1.3332890272140503, "learning_rate": 9.943562610229278e-06, "loss": 39.8958, "step": 10567 }, { "epoch": 251.62089552238805, "grad_norm": 1.3273770809173584, "learning_rate": 9.94268077601411e-06, "loss": 40.0264, "step": 10568 }, { "epoch": 251.644776119403, "grad_norm": 1.3208810091018677, "learning_rate": 9.941798941798942e-06, "loss": 41.1523, "step": 10569 }, { "epoch": 251.6686567164179, "grad_norm": 1.374711275100708, "learning_rate": 9.940917107583775e-06, "loss": 39.3163, "step": 10570 }, { "epoch": 251.69253731343284, "grad_norm": 1.3534605503082275, "learning_rate": 9.940035273368608e-06, "loss": 40.4529, "step": 10571 }, { "epoch": 251.71641791044777, "grad_norm": 1.3711304664611816, "learning_rate": 9.93915343915344e-06, "loss": 40.5767, "step": 10572 }, { "epoch": 251.74029850746268, "grad_norm": 1.395110845565796, "learning_rate": 9.938271604938273e-06, "loss": 39.6177, "step": 10573 }, { "epoch": 251.76417910447762, "grad_norm": 1.3683602809906006, "learning_rate": 9.937389770723104e-06, "loss": 40.6291, "step": 10574 }, { "epoch": 251.78805970149253, "grad_norm": 1.338921070098877, "learning_rate": 9.936507936507937e-06, "loss": 41.3591, "step": 10575 }, { "epoch": 251.81194029850747, "grad_norm": 1.371520757675171, "learning_rate": 9.93562610229277e-06, "loss": 39.5465, "step": 10576 }, { "epoch": 251.83582089552237, "grad_norm": 1.3196711540222168, "learning_rate": 9.934744268077602e-06, "loss": 41.346, "step": 10577 }, { "epoch": 251.8597014925373, "grad_norm": 1.3384023904800415, "learning_rate": 9.933862433862435e-06, "loss": 39.6638, "step": 10578 }, { "epoch": 251.88358208955225, "grad_norm": 1.3645946979522705, "learning_rate": 9.932980599647268e-06, "loss": 40.2804, "step": 10579 }, { "epoch": 251.90746268656716, "grad_norm": 1.3310152292251587, "learning_rate": 9.9320987654321e-06, "loss": 39.9206, "step": 10580 }, { "epoch": 251.9313432835821, "grad_norm": 1.2904884815216064, "learning_rate": 9.931216931216932e-06, "loss": 38.431, "step": 10581 }, { "epoch": 251.955223880597, "grad_norm": 1.3735036849975586, "learning_rate": 9.930335097001764e-06, "loss": 39.5107, "step": 10582 }, { "epoch": 251.97910447761194, "grad_norm": 1.3460135459899902, "learning_rate": 9.929453262786597e-06, "loss": 40.4463, "step": 10583 }, { "epoch": 252.0, "grad_norm": 1.3490216732025146, "learning_rate": 9.92857142857143e-06, "loss": 34.3399, "step": 10584 }, { "epoch": 252.02388059701494, "grad_norm": 1.379584789276123, "learning_rate": 9.927689594356263e-06, "loss": 40.0725, "step": 10585 }, { "epoch": 252.04776119402985, "grad_norm": 1.3413156270980835, "learning_rate": 9.926807760141094e-06, "loss": 41.0116, "step": 10586 }, { "epoch": 252.07164179104478, "grad_norm": 1.3538950681686401, "learning_rate": 9.925925925925927e-06, "loss": 38.8772, "step": 10587 }, { "epoch": 252.0955223880597, "grad_norm": 1.3444949388504028, "learning_rate": 9.925044091710759e-06, "loss": 40.458, "step": 10588 }, { "epoch": 252.11940298507463, "grad_norm": 1.3313732147216797, "learning_rate": 9.924162257495592e-06, "loss": 39.1824, "step": 10589 }, { "epoch": 252.14328358208957, "grad_norm": 1.352254033088684, "learning_rate": 9.923280423280423e-06, "loss": 40.5714, "step": 10590 }, { "epoch": 252.16716417910447, "grad_norm": 1.3536652326583862, "learning_rate": 9.922398589065256e-06, "loss": 39.5446, "step": 10591 }, { "epoch": 252.1910447761194, "grad_norm": 1.3376878499984741, "learning_rate": 9.92151675485009e-06, "loss": 38.9081, "step": 10592 }, { "epoch": 252.21492537313432, "grad_norm": 1.3671746253967285, "learning_rate": 9.920634920634922e-06, "loss": 40.4262, "step": 10593 }, { "epoch": 252.23880597014926, "grad_norm": 1.3364135026931763, "learning_rate": 9.919753086419754e-06, "loss": 41.0894, "step": 10594 }, { "epoch": 252.26268656716417, "grad_norm": 1.3572956323623657, "learning_rate": 9.918871252204587e-06, "loss": 38.9659, "step": 10595 }, { "epoch": 252.2865671641791, "grad_norm": 1.3382114171981812, "learning_rate": 9.917989417989418e-06, "loss": 39.5581, "step": 10596 }, { "epoch": 252.31044776119404, "grad_norm": 1.3439621925354004, "learning_rate": 9.917107583774251e-06, "loss": 40.6726, "step": 10597 }, { "epoch": 252.33432835820895, "grad_norm": 1.2865102291107178, "learning_rate": 9.916225749559084e-06, "loss": 40.1752, "step": 10598 }, { "epoch": 252.3582089552239, "grad_norm": 1.314003586769104, "learning_rate": 9.915343915343916e-06, "loss": 39.3585, "step": 10599 }, { "epoch": 252.3820895522388, "grad_norm": 1.351733922958374, "learning_rate": 9.914462081128749e-06, "loss": 39.5359, "step": 10600 }, { "epoch": 252.40597014925373, "grad_norm": 1.384981393814087, "learning_rate": 9.913580246913582e-06, "loss": 40.274, "step": 10601 }, { "epoch": 252.42985074626867, "grad_norm": 1.3657991886138916, "learning_rate": 9.912698412698413e-06, "loss": 39.5423, "step": 10602 }, { "epoch": 252.45373134328358, "grad_norm": 1.3338768482208252, "learning_rate": 9.911816578483246e-06, "loss": 40.0044, "step": 10603 }, { "epoch": 252.47761194029852, "grad_norm": 1.325417399406433, "learning_rate": 9.910934744268078e-06, "loss": 39.6411, "step": 10604 }, { "epoch": 252.50149253731342, "grad_norm": 1.3510899543762207, "learning_rate": 9.91005291005291e-06, "loss": 39.8669, "step": 10605 }, { "epoch": 252.52537313432836, "grad_norm": 1.3431742191314697, "learning_rate": 9.909171075837744e-06, "loss": 40.2349, "step": 10606 }, { "epoch": 252.54925373134327, "grad_norm": NaN, "learning_rate": 9.908289241622577e-06, "loss": 39.4552, "step": 10607 }, { "epoch": 252.5731343283582, "grad_norm": 1.341931939125061, "learning_rate": 9.908289241622577e-06, "loss": 39.9881, "step": 10608 }, { "epoch": 252.59701492537314, "grad_norm": 1.3610568046569824, "learning_rate": 9.907407407407408e-06, "loss": 40.402, "step": 10609 }, { "epoch": 252.62089552238805, "grad_norm": 1.373623013496399, "learning_rate": 9.906525573192241e-06, "loss": 40.216, "step": 10610 }, { "epoch": 252.644776119403, "grad_norm": 1.3474271297454834, "learning_rate": 9.905643738977073e-06, "loss": 40.2282, "step": 10611 }, { "epoch": 252.6686567164179, "grad_norm": 1.3855059146881104, "learning_rate": 9.904761904761906e-06, "loss": 40.7064, "step": 10612 }, { "epoch": 252.69253731343284, "grad_norm": 1.413797378540039, "learning_rate": 9.903880070546737e-06, "loss": 40.0769, "step": 10613 }, { "epoch": 252.71641791044777, "grad_norm": 1.4275468587875366, "learning_rate": 9.90299823633157e-06, "loss": 39.4203, "step": 10614 }, { "epoch": 252.74029850746268, "grad_norm": 1.3415850400924683, "learning_rate": 9.902116402116403e-06, "loss": 38.8304, "step": 10615 }, { "epoch": 252.76417910447762, "grad_norm": 1.3480720520019531, "learning_rate": 9.901234567901236e-06, "loss": 39.3069, "step": 10616 }, { "epoch": 252.78805970149253, "grad_norm": 1.3531330823898315, "learning_rate": 9.900352733686068e-06, "loss": 39.3223, "step": 10617 }, { "epoch": 252.81194029850747, "grad_norm": 1.3358463048934937, "learning_rate": 9.8994708994709e-06, "loss": 39.5168, "step": 10618 }, { "epoch": 252.83582089552237, "grad_norm": 1.3389610052108765, "learning_rate": 9.898589065255732e-06, "loss": 39.9943, "step": 10619 }, { "epoch": 252.8597014925373, "grad_norm": 1.3844460248947144, "learning_rate": 9.897707231040565e-06, "loss": 38.0072, "step": 10620 }, { "epoch": 252.88358208955225, "grad_norm": 1.3421772718429565, "learning_rate": 9.896825396825398e-06, "loss": 39.4078, "step": 10621 }, { "epoch": 252.90746268656716, "grad_norm": 1.3448072671890259, "learning_rate": 9.89594356261023e-06, "loss": 40.2295, "step": 10622 }, { "epoch": 252.9313432835821, "grad_norm": 2.2201271057128906, "learning_rate": 9.895061728395063e-06, "loss": 39.7946, "step": 10623 }, { "epoch": 252.955223880597, "grad_norm": 1.3789139986038208, "learning_rate": 9.894179894179896e-06, "loss": 39.755, "step": 10624 }, { "epoch": 252.97910447761194, "grad_norm": 1.3674843311309814, "learning_rate": 9.893298059964727e-06, "loss": 39.7168, "step": 10625 }, { "epoch": 253.0, "grad_norm": 1.415688395500183, "learning_rate": 9.89241622574956e-06, "loss": 35.5203, "step": 10626 }, { "epoch": 253.02388059701494, "grad_norm": 1.3317011594772339, "learning_rate": 9.891534391534391e-06, "loss": 40.497, "step": 10627 }, { "epoch": 253.04776119402985, "grad_norm": 1.3328512907028198, "learning_rate": 9.890652557319224e-06, "loss": 39.7104, "step": 10628 }, { "epoch": 253.07164179104478, "grad_norm": 1.3884071111679077, "learning_rate": 9.889770723104058e-06, "loss": 39.0237, "step": 10629 }, { "epoch": 253.0955223880597, "grad_norm": 1.313393473625183, "learning_rate": 9.88888888888889e-06, "loss": 39.8233, "step": 10630 }, { "epoch": 253.11940298507463, "grad_norm": 1.3570992946624756, "learning_rate": 9.888007054673722e-06, "loss": 40.8251, "step": 10631 }, { "epoch": 253.14328358208957, "grad_norm": 1.3652751445770264, "learning_rate": 9.887125220458555e-06, "loss": 39.6895, "step": 10632 }, { "epoch": 253.16716417910447, "grad_norm": 1.3356496095657349, "learning_rate": 9.886243386243386e-06, "loss": 39.498, "step": 10633 }, { "epoch": 253.1910447761194, "grad_norm": 1.3531768321990967, "learning_rate": 9.88536155202822e-06, "loss": 39.4653, "step": 10634 }, { "epoch": 253.21492537313432, "grad_norm": 1.3327196836471558, "learning_rate": 9.884479717813053e-06, "loss": 40.6644, "step": 10635 }, { "epoch": 253.23880597014926, "grad_norm": 1.3596458435058594, "learning_rate": 9.883597883597884e-06, "loss": 40.0269, "step": 10636 }, { "epoch": 253.26268656716417, "grad_norm": 1.3779792785644531, "learning_rate": 9.882716049382717e-06, "loss": 40.4705, "step": 10637 }, { "epoch": 253.2865671641791, "grad_norm": 1.3911702632904053, "learning_rate": 9.88183421516755e-06, "loss": 38.3527, "step": 10638 }, { "epoch": 253.31044776119404, "grad_norm": 1.3568097352981567, "learning_rate": 9.880952380952381e-06, "loss": 40.2406, "step": 10639 }, { "epoch": 253.33432835820895, "grad_norm": 1.3452147245407104, "learning_rate": 9.880070546737214e-06, "loss": 39.9733, "step": 10640 }, { "epoch": 253.3582089552239, "grad_norm": 1.3330786228179932, "learning_rate": 9.879188712522046e-06, "loss": 39.8333, "step": 10641 }, { "epoch": 253.3820895522388, "grad_norm": 1.3249883651733398, "learning_rate": 9.878306878306879e-06, "loss": 40.0332, "step": 10642 }, { "epoch": 253.40597014925373, "grad_norm": 1.3795099258422852, "learning_rate": 9.877425044091712e-06, "loss": 40.2791, "step": 10643 }, { "epoch": 253.42985074626867, "grad_norm": 1.3222503662109375, "learning_rate": 9.876543209876543e-06, "loss": 39.6733, "step": 10644 }, { "epoch": 253.45373134328358, "grad_norm": 1.3678841590881348, "learning_rate": 9.875661375661376e-06, "loss": 40.5779, "step": 10645 }, { "epoch": 253.47761194029852, "grad_norm": 1.3266198635101318, "learning_rate": 9.87477954144621e-06, "loss": 41.3379, "step": 10646 }, { "epoch": 253.50149253731342, "grad_norm": 1.4002189636230469, "learning_rate": 9.87389770723104e-06, "loss": 39.601, "step": 10647 }, { "epoch": 253.52537313432836, "grad_norm": 1.4021034240722656, "learning_rate": 9.873015873015874e-06, "loss": 38.7818, "step": 10648 }, { "epoch": 253.54925373134327, "grad_norm": 1.3973534107208252, "learning_rate": 9.872134038800705e-06, "loss": 38.9658, "step": 10649 }, { "epoch": 253.5731343283582, "grad_norm": 1.343833565711975, "learning_rate": 9.871252204585538e-06, "loss": 40.2317, "step": 10650 }, { "epoch": 253.59701492537314, "grad_norm": 1.3559799194335938, "learning_rate": 9.870370370370371e-06, "loss": 39.5964, "step": 10651 }, { "epoch": 253.62089552238805, "grad_norm": 1.605331540107727, "learning_rate": 9.869488536155204e-06, "loss": 39.1788, "step": 10652 }, { "epoch": 253.644776119403, "grad_norm": 1.3761121034622192, "learning_rate": 9.868606701940036e-06, "loss": 40.6816, "step": 10653 }, { "epoch": 253.6686567164179, "grad_norm": 1.362964391708374, "learning_rate": 9.867724867724869e-06, "loss": 39.1969, "step": 10654 }, { "epoch": 253.69253731343284, "grad_norm": 1.3428213596343994, "learning_rate": 9.8668430335097e-06, "loss": 40.0391, "step": 10655 }, { "epoch": 253.71641791044777, "grad_norm": 1.322096824645996, "learning_rate": 9.865961199294533e-06, "loss": 39.5832, "step": 10656 }, { "epoch": 253.74029850746268, "grad_norm": 1.3547359704971313, "learning_rate": 9.865079365079366e-06, "loss": 40.0022, "step": 10657 }, { "epoch": 253.76417910447762, "grad_norm": 1.3870985507965088, "learning_rate": 9.864197530864198e-06, "loss": 39.0126, "step": 10658 }, { "epoch": 253.78805970149253, "grad_norm": 1.5507276058197021, "learning_rate": 9.86331569664903e-06, "loss": 39.7992, "step": 10659 }, { "epoch": 253.81194029850747, "grad_norm": 1.3543835878372192, "learning_rate": 9.862433862433864e-06, "loss": 38.9848, "step": 10660 }, { "epoch": 253.83582089552237, "grad_norm": 1.355263590812683, "learning_rate": 9.861552028218695e-06, "loss": 40.5587, "step": 10661 }, { "epoch": 253.8597014925373, "grad_norm": 1.3595378398895264, "learning_rate": 9.860670194003528e-06, "loss": 38.8911, "step": 10662 }, { "epoch": 253.88358208955225, "grad_norm": 1.4282208681106567, "learning_rate": 9.85978835978836e-06, "loss": 41.0098, "step": 10663 }, { "epoch": 253.90746268656716, "grad_norm": 1.3840993642807007, "learning_rate": 9.858906525573193e-06, "loss": 40.1086, "step": 10664 }, { "epoch": 253.9313432835821, "grad_norm": 1.3312474489212036, "learning_rate": 9.858024691358026e-06, "loss": 39.6932, "step": 10665 }, { "epoch": 253.955223880597, "grad_norm": 1.3351184129714966, "learning_rate": 9.857142857142859e-06, "loss": 40.4599, "step": 10666 }, { "epoch": 253.97910447761194, "grad_norm": 1.3848155736923218, "learning_rate": 9.85626102292769e-06, "loss": 39.0993, "step": 10667 }, { "epoch": 254.0, "grad_norm": 1.3559796810150146, "learning_rate": 9.855379188712523e-06, "loss": 34.0008, "step": 10668 }, { "epoch": 254.02388059701494, "grad_norm": 1.3587557077407837, "learning_rate": 9.854497354497355e-06, "loss": 39.775, "step": 10669 }, { "epoch": 254.04776119402985, "grad_norm": 1.3801796436309814, "learning_rate": 9.853615520282188e-06, "loss": 39.1069, "step": 10670 }, { "epoch": 254.07164179104478, "grad_norm": 1.3791277408599854, "learning_rate": 9.852733686067019e-06, "loss": 39.7329, "step": 10671 }, { "epoch": 254.0955223880597, "grad_norm": 1.4125465154647827, "learning_rate": 9.851851851851852e-06, "loss": 37.7206, "step": 10672 }, { "epoch": 254.11940298507463, "grad_norm": 1.3357454538345337, "learning_rate": 9.850970017636685e-06, "loss": 39.9694, "step": 10673 }, { "epoch": 254.14328358208957, "grad_norm": 1.3696887493133545, "learning_rate": 9.850088183421518e-06, "loss": 39.1864, "step": 10674 }, { "epoch": 254.16716417910447, "grad_norm": 1.3595482110977173, "learning_rate": 9.849206349206351e-06, "loss": 39.268, "step": 10675 }, { "epoch": 254.1910447761194, "grad_norm": 1.342637300491333, "learning_rate": 9.848324514991183e-06, "loss": 39.8503, "step": 10676 }, { "epoch": 254.21492537313432, "grad_norm": 1.3580621480941772, "learning_rate": 9.847442680776014e-06, "loss": 40.1881, "step": 10677 }, { "epoch": 254.23880597014926, "grad_norm": 1.3937290906906128, "learning_rate": 9.846560846560847e-06, "loss": 40.5127, "step": 10678 }, { "epoch": 254.26268656716417, "grad_norm": 1.3690370321273804, "learning_rate": 9.84567901234568e-06, "loss": 39.8622, "step": 10679 }, { "epoch": 254.2865671641791, "grad_norm": 1.344618320465088, "learning_rate": 9.844797178130512e-06, "loss": 38.9874, "step": 10680 }, { "epoch": 254.31044776119404, "grad_norm": 1.3206486701965332, "learning_rate": 9.843915343915345e-06, "loss": 39.5037, "step": 10681 }, { "epoch": 254.33432835820895, "grad_norm": 1.3196312189102173, "learning_rate": 9.843033509700178e-06, "loss": 38.6175, "step": 10682 }, { "epoch": 254.3582089552239, "grad_norm": 1.3591564893722534, "learning_rate": 9.84215167548501e-06, "loss": 38.6404, "step": 10683 }, { "epoch": 254.3820895522388, "grad_norm": 1.3251851797103882, "learning_rate": 9.841269841269842e-06, "loss": 40.2715, "step": 10684 }, { "epoch": 254.40597014925373, "grad_norm": 1.3577885627746582, "learning_rate": 9.840388007054673e-06, "loss": 40.0165, "step": 10685 }, { "epoch": 254.42985074626867, "grad_norm": 1.3716191053390503, "learning_rate": 9.839506172839507e-06, "loss": 40.8563, "step": 10686 }, { "epoch": 254.45373134328358, "grad_norm": 1.3535329103469849, "learning_rate": 9.83862433862434e-06, "loss": 40.2669, "step": 10687 }, { "epoch": 254.47761194029852, "grad_norm": 1.3360430002212524, "learning_rate": 9.837742504409173e-06, "loss": 41.1082, "step": 10688 }, { "epoch": 254.50149253731342, "grad_norm": 1.3577322959899902, "learning_rate": 9.836860670194004e-06, "loss": 39.4195, "step": 10689 }, { "epoch": 254.52537313432836, "grad_norm": 1.3616520166397095, "learning_rate": 9.835978835978837e-06, "loss": 40.9314, "step": 10690 }, { "epoch": 254.54925373134327, "grad_norm": 1.3856308460235596, "learning_rate": 9.835097001763668e-06, "loss": 40.1331, "step": 10691 }, { "epoch": 254.5731343283582, "grad_norm": 1.6206289529800415, "learning_rate": 9.834215167548502e-06, "loss": 40.4112, "step": 10692 }, { "epoch": 254.59701492537314, "grad_norm": 1.369954228401184, "learning_rate": 9.833333333333333e-06, "loss": 40.3786, "step": 10693 }, { "epoch": 254.62089552238805, "grad_norm": 1.3995909690856934, "learning_rate": 9.832451499118166e-06, "loss": 40.1792, "step": 10694 }, { "epoch": 254.644776119403, "grad_norm": 1.341331124305725, "learning_rate": 9.831569664902999e-06, "loss": 40.2592, "step": 10695 }, { "epoch": 254.6686567164179, "grad_norm": 1.410402536392212, "learning_rate": 9.830687830687832e-06, "loss": 39.1081, "step": 10696 }, { "epoch": 254.69253731343284, "grad_norm": 1.3518548011779785, "learning_rate": 9.829805996472665e-06, "loss": 40.3046, "step": 10697 }, { "epoch": 254.71641791044777, "grad_norm": 1.638381838798523, "learning_rate": 9.828924162257497e-06, "loss": 39.6316, "step": 10698 }, { "epoch": 254.74029850746268, "grad_norm": 1.3581488132476807, "learning_rate": 9.828042328042328e-06, "loss": 39.3842, "step": 10699 }, { "epoch": 254.76417910447762, "grad_norm": 1.3668123483657837, "learning_rate": 9.827160493827161e-06, "loss": 39.2723, "step": 10700 }, { "epoch": 254.78805970149253, "grad_norm": 1.3700960874557495, "learning_rate": 9.826278659611994e-06, "loss": 39.9494, "step": 10701 }, { "epoch": 254.81194029850747, "grad_norm": 1.3773081302642822, "learning_rate": 9.825396825396825e-06, "loss": 39.5338, "step": 10702 }, { "epoch": 254.83582089552237, "grad_norm": 1.3716596364974976, "learning_rate": 9.824514991181658e-06, "loss": 40.6071, "step": 10703 }, { "epoch": 254.8597014925373, "grad_norm": 1.3360058069229126, "learning_rate": 9.823633156966492e-06, "loss": 38.8225, "step": 10704 }, { "epoch": 254.88358208955225, "grad_norm": 1.3359123468399048, "learning_rate": 9.822751322751325e-06, "loss": 40.0664, "step": 10705 }, { "epoch": 254.90746268656716, "grad_norm": 1.3696709871292114, "learning_rate": 9.821869488536156e-06, "loss": 40.5176, "step": 10706 }, { "epoch": 254.9313432835821, "grad_norm": 1.3282774686813354, "learning_rate": 9.820987654320987e-06, "loss": 39.6124, "step": 10707 }, { "epoch": 254.955223880597, "grad_norm": 1.364279866218567, "learning_rate": 9.82010582010582e-06, "loss": 39.8244, "step": 10708 }, { "epoch": 254.97910447761194, "grad_norm": 1.300444483757019, "learning_rate": 9.819223985890653e-06, "loss": 40.6882, "step": 10709 }, { "epoch": 255.0, "grad_norm": 1.3536803722381592, "learning_rate": 9.818342151675487e-06, "loss": 35.047, "step": 10710 }, { "epoch": 255.02388059701494, "grad_norm": 1.3458669185638428, "learning_rate": 9.817460317460318e-06, "loss": 40.0279, "step": 10711 }, { "epoch": 255.04776119402985, "grad_norm": 1.3102805614471436, "learning_rate": 9.816578483245151e-06, "loss": 39.6751, "step": 10712 }, { "epoch": 255.07164179104478, "grad_norm": 1.3948651552200317, "learning_rate": 9.815696649029984e-06, "loss": 39.5539, "step": 10713 }, { "epoch": 255.0955223880597, "grad_norm": 1.3350692987442017, "learning_rate": 9.814814814814815e-06, "loss": 40.0436, "step": 10714 }, { "epoch": 255.11940298507463, "grad_norm": 1.3638746738433838, "learning_rate": 9.813932980599648e-06, "loss": 40.6442, "step": 10715 }, { "epoch": 255.14328358208957, "grad_norm": NaN, "learning_rate": 9.81305114638448e-06, "loss": 40.3998, "step": 10716 }, { "epoch": 255.16716417910447, "grad_norm": 1.341970682144165, "learning_rate": 9.81305114638448e-06, "loss": 39.5691, "step": 10717 }, { "epoch": 255.1910447761194, "grad_norm": 1.3325163125991821, "learning_rate": 9.812169312169313e-06, "loss": 39.9132, "step": 10718 }, { "epoch": 255.21492537313432, "grad_norm": 1.3290495872497559, "learning_rate": 9.811287477954146e-06, "loss": 40.672, "step": 10719 }, { "epoch": 255.23880597014926, "grad_norm": 1.3827701807022095, "learning_rate": 9.810405643738979e-06, "loss": 40.4714, "step": 10720 }, { "epoch": 255.26268656716417, "grad_norm": 1.3553849458694458, "learning_rate": 9.80952380952381e-06, "loss": 40.6681, "step": 10721 }, { "epoch": 255.2865671641791, "grad_norm": 1.3988373279571533, "learning_rate": 9.808641975308642e-06, "loss": 40.6402, "step": 10722 }, { "epoch": 255.31044776119404, "grad_norm": 1.352552056312561, "learning_rate": 9.807760141093475e-06, "loss": 38.9172, "step": 10723 }, { "epoch": 255.33432835820895, "grad_norm": 1.4030412435531616, "learning_rate": 9.806878306878308e-06, "loss": 39.5991, "step": 10724 }, { "epoch": 255.3582089552239, "grad_norm": 1.344491720199585, "learning_rate": 9.80599647266314e-06, "loss": 39.9546, "step": 10725 }, { "epoch": 255.3820895522388, "grad_norm": 1.3502202033996582, "learning_rate": 9.805114638447972e-06, "loss": 38.4632, "step": 10726 }, { "epoch": 255.40597014925373, "grad_norm": 1.393944263458252, "learning_rate": 9.804232804232805e-06, "loss": 40.4172, "step": 10727 }, { "epoch": 255.42985074626867, "grad_norm": 1.350498080253601, "learning_rate": 9.803350970017638e-06, "loss": 40.9765, "step": 10728 }, { "epoch": 255.45373134328358, "grad_norm": 1.3508245944976807, "learning_rate": 9.80246913580247e-06, "loss": 39.48, "step": 10729 }, { "epoch": 255.47761194029852, "grad_norm": 1.3354111909866333, "learning_rate": 9.801587301587301e-06, "loss": 39.4992, "step": 10730 }, { "epoch": 255.50149253731342, "grad_norm": 1.3361178636550903, "learning_rate": 9.800705467372134e-06, "loss": 39.0648, "step": 10731 }, { "epoch": 255.52537313432836, "grad_norm": 1.362167239189148, "learning_rate": 9.799823633156967e-06, "loss": 39.8285, "step": 10732 }, { "epoch": 255.54925373134327, "grad_norm": 1.3739560842514038, "learning_rate": 9.7989417989418e-06, "loss": 41.0399, "step": 10733 }, { "epoch": 255.5731343283582, "grad_norm": 1.3215539455413818, "learning_rate": 9.798059964726632e-06, "loss": 39.7686, "step": 10734 }, { "epoch": 255.59701492537314, "grad_norm": 1.3415372371673584, "learning_rate": 9.797178130511465e-06, "loss": 39.8857, "step": 10735 }, { "epoch": 255.62089552238805, "grad_norm": NaN, "learning_rate": 9.796296296296298e-06, "loss": 40.0369, "step": 10736 }, { "epoch": 255.644776119403, "grad_norm": 1.3240665197372437, "learning_rate": 9.796296296296298e-06, "loss": 40.5071, "step": 10737 }, { "epoch": 255.6686567164179, "grad_norm": 1.396680474281311, "learning_rate": 9.79541446208113e-06, "loss": 39.0879, "step": 10738 }, { "epoch": 255.69253731343284, "grad_norm": 1.3395425081253052, "learning_rate": 9.794532627865962e-06, "loss": 39.085, "step": 10739 }, { "epoch": 255.71641791044777, "grad_norm": 1.355476975440979, "learning_rate": 9.793650793650794e-06, "loss": 38.9858, "step": 10740 }, { "epoch": 255.74029850746268, "grad_norm": 1.3485443592071533, "learning_rate": 9.792768959435627e-06, "loss": 40.1246, "step": 10741 }, { "epoch": 255.76417910447762, "grad_norm": 1.3224623203277588, "learning_rate": 9.79188712522046e-06, "loss": 40.8662, "step": 10742 }, { "epoch": 255.78805970149253, "grad_norm": 1.4062414169311523, "learning_rate": 9.791005291005293e-06, "loss": 40.3063, "step": 10743 }, { "epoch": 255.81194029850747, "grad_norm": 1.3575087785720825, "learning_rate": 9.790123456790124e-06, "loss": 40.2388, "step": 10744 }, { "epoch": 255.83582089552237, "grad_norm": 1.3832697868347168, "learning_rate": 9.789241622574957e-06, "loss": 38.6037, "step": 10745 }, { "epoch": 255.8597014925373, "grad_norm": 1.3493146896362305, "learning_rate": 9.788359788359789e-06, "loss": 39.4062, "step": 10746 }, { "epoch": 255.88358208955225, "grad_norm": 1.3569790124893188, "learning_rate": 9.787477954144622e-06, "loss": 39.456, "step": 10747 }, { "epoch": 255.90746268656716, "grad_norm": 1.3566110134124756, "learning_rate": 9.786596119929455e-06, "loss": 38.5334, "step": 10748 }, { "epoch": 255.9313432835821, "grad_norm": 1.3056905269622803, "learning_rate": 9.785714285714286e-06, "loss": 40.2979, "step": 10749 }, { "epoch": 255.955223880597, "grad_norm": 1.3368300199508667, "learning_rate": 9.78483245149912e-06, "loss": 39.9211, "step": 10750 }, { "epoch": 255.97910447761194, "grad_norm": 1.3391193151474, "learning_rate": 9.783950617283952e-06, "loss": 38.7613, "step": 10751 }, { "epoch": 256.0, "grad_norm": 1.3654685020446777, "learning_rate": 9.783068783068784e-06, "loss": 34.6137, "step": 10752 }, { "epoch": 256.0238805970149, "grad_norm": 1.3525254726409912, "learning_rate": 9.782186948853615e-06, "loss": 40.4889, "step": 10753 }, { "epoch": 256.0477611940299, "grad_norm": 1.3474535942077637, "learning_rate": 9.781305114638448e-06, "loss": 40.3619, "step": 10754 }, { "epoch": 256.0716417910448, "grad_norm": 1.3435072898864746, "learning_rate": 9.780423280423281e-06, "loss": 40.9955, "step": 10755 }, { "epoch": 256.0955223880597, "grad_norm": 1.351089358329773, "learning_rate": 9.779541446208114e-06, "loss": 39.5167, "step": 10756 }, { "epoch": 256.1194029850746, "grad_norm": 1.3256869316101074, "learning_rate": 9.778659611992947e-06, "loss": 39.2002, "step": 10757 }, { "epoch": 256.14328358208957, "grad_norm": 1.2880183458328247, "learning_rate": 9.777777777777779e-06, "loss": 39.7987, "step": 10758 }, { "epoch": 256.1671641791045, "grad_norm": 1.3579797744750977, "learning_rate": 9.776895943562612e-06, "loss": 40.335, "step": 10759 }, { "epoch": 256.1910447761194, "grad_norm": 1.3449617624282837, "learning_rate": 9.776014109347443e-06, "loss": 40.0813, "step": 10760 }, { "epoch": 256.21492537313435, "grad_norm": 1.4235007762908936, "learning_rate": 9.775132275132276e-06, "loss": 39.3431, "step": 10761 }, { "epoch": 256.23880597014926, "grad_norm": 1.3731850385665894, "learning_rate": 9.774250440917108e-06, "loss": 39.8925, "step": 10762 }, { "epoch": 256.26268656716417, "grad_norm": 1.3898743391036987, "learning_rate": 9.77336860670194e-06, "loss": 39.3937, "step": 10763 }, { "epoch": 256.28656716417913, "grad_norm": 1.3668562173843384, "learning_rate": 9.772486772486774e-06, "loss": 38.9943, "step": 10764 }, { "epoch": 256.31044776119404, "grad_norm": 1.3454052209854126, "learning_rate": 9.771604938271607e-06, "loss": 39.5839, "step": 10765 }, { "epoch": 256.33432835820895, "grad_norm": 1.352825403213501, "learning_rate": 9.770723104056438e-06, "loss": 40.3822, "step": 10766 }, { "epoch": 256.35820895522386, "grad_norm": NaN, "learning_rate": 9.769841269841271e-06, "loss": 50.6019, "step": 10767 }, { "epoch": 256.3820895522388, "grad_norm": 1.3345181941986084, "learning_rate": 9.769841269841271e-06, "loss": 39.5045, "step": 10768 }, { "epoch": 256.40597014925373, "grad_norm": 1.4013285636901855, "learning_rate": 9.768959435626103e-06, "loss": 38.9445, "step": 10769 }, { "epoch": 256.42985074626864, "grad_norm": 1.3682047128677368, "learning_rate": 9.768077601410936e-06, "loss": 39.0367, "step": 10770 }, { "epoch": 256.4537313432836, "grad_norm": 1.3821349143981934, "learning_rate": 9.767195767195769e-06, "loss": 38.4679, "step": 10771 }, { "epoch": 256.4776119402985, "grad_norm": 1.4004615545272827, "learning_rate": 9.7663139329806e-06, "loss": 38.8621, "step": 10772 }, { "epoch": 256.5014925373134, "grad_norm": 1.354845404624939, "learning_rate": 9.765432098765433e-06, "loss": 38.8264, "step": 10773 }, { "epoch": 256.52537313432833, "grad_norm": 1.397647500038147, "learning_rate": 9.764550264550266e-06, "loss": 40.5522, "step": 10774 }, { "epoch": 256.5492537313433, "grad_norm": 1.3407264947891235, "learning_rate": 9.763668430335098e-06, "loss": 40.0675, "step": 10775 }, { "epoch": 256.5731343283582, "grad_norm": 1.3517348766326904, "learning_rate": 9.76278659611993e-06, "loss": 39.6726, "step": 10776 }, { "epoch": 256.5970149253731, "grad_norm": 1.3738054037094116, "learning_rate": 9.761904761904762e-06, "loss": 39.0693, "step": 10777 }, { "epoch": 256.6208955223881, "grad_norm": 1.4089776277542114, "learning_rate": 9.761022927689595e-06, "loss": 39.6317, "step": 10778 }, { "epoch": 256.644776119403, "grad_norm": 1.3470150232315063, "learning_rate": 9.760141093474428e-06, "loss": 40.5068, "step": 10779 }, { "epoch": 256.6686567164179, "grad_norm": 1.3469817638397217, "learning_rate": 9.759259259259261e-06, "loss": 39.3326, "step": 10780 }, { "epoch": 256.6925373134328, "grad_norm": 1.352879285812378, "learning_rate": 9.758377425044092e-06, "loss": 38.5158, "step": 10781 }, { "epoch": 256.7164179104478, "grad_norm": 1.3403745889663696, "learning_rate": 9.757495590828926e-06, "loss": 41.2157, "step": 10782 }, { "epoch": 256.7402985074627, "grad_norm": 1.3440155982971191, "learning_rate": 9.756613756613757e-06, "loss": 41.4051, "step": 10783 }, { "epoch": 256.7641791044776, "grad_norm": 1.404590129852295, "learning_rate": 9.75573192239859e-06, "loss": 38.8862, "step": 10784 }, { "epoch": 256.78805970149256, "grad_norm": 1.3484537601470947, "learning_rate": 9.754850088183421e-06, "loss": 39.9499, "step": 10785 }, { "epoch": 256.81194029850747, "grad_norm": 1.3183577060699463, "learning_rate": 9.753968253968254e-06, "loss": 38.7812, "step": 10786 }, { "epoch": 256.8358208955224, "grad_norm": 1.5481266975402832, "learning_rate": 9.753086419753087e-06, "loss": 40.8143, "step": 10787 }, { "epoch": 256.85970149253734, "grad_norm": 1.3488281965255737, "learning_rate": 9.75220458553792e-06, "loss": 39.3822, "step": 10788 }, { "epoch": 256.88358208955225, "grad_norm": 1.36357843875885, "learning_rate": 9.751322751322752e-06, "loss": 40.6755, "step": 10789 }, { "epoch": 256.90746268656716, "grad_norm": 1.3731732368469238, "learning_rate": 9.750440917107585e-06, "loss": 39.9085, "step": 10790 }, { "epoch": 256.93134328358207, "grad_norm": 1.4371671676635742, "learning_rate": 9.749559082892416e-06, "loss": 39.8081, "step": 10791 }, { "epoch": 256.95522388059703, "grad_norm": 1.347330927848816, "learning_rate": 9.74867724867725e-06, "loss": 40.7453, "step": 10792 }, { "epoch": 256.97910447761194, "grad_norm": 1.3687584400177002, "learning_rate": 9.747795414462082e-06, "loss": 40.63, "step": 10793 }, { "epoch": 257.0, "grad_norm": 1.3257819414138794, "learning_rate": 9.746913580246914e-06, "loss": 35.285, "step": 10794 }, { "epoch": 257.0238805970149, "grad_norm": 1.3717379570007324, "learning_rate": 9.746031746031747e-06, "loss": 40.6253, "step": 10795 }, { "epoch": 257.0477611940299, "grad_norm": NaN, "learning_rate": 9.74514991181658e-06, "loss": 35.4081, "step": 10796 }, { "epoch": 257.0716417910448, "grad_norm": 1.4341565370559692, "learning_rate": 9.74514991181658e-06, "loss": 38.1321, "step": 10797 }, { "epoch": 257.0955223880597, "grad_norm": 1.3585251569747925, "learning_rate": 9.744268077601411e-06, "loss": 40.2383, "step": 10798 }, { "epoch": 257.1194029850746, "grad_norm": 1.3409748077392578, "learning_rate": 9.743386243386244e-06, "loss": 40.0277, "step": 10799 }, { "epoch": 257.14328358208957, "grad_norm": 1.365688681602478, "learning_rate": 9.742504409171076e-06, "loss": 40.6361, "step": 10800 }, { "epoch": 257.1671641791045, "grad_norm": 1.338606834411621, "learning_rate": 9.741622574955909e-06, "loss": 38.7173, "step": 10801 }, { "epoch": 257.1910447761194, "grad_norm": 1.365814447402954, "learning_rate": 9.740740740740742e-06, "loss": 40.1685, "step": 10802 }, { "epoch": 257.21492537313435, "grad_norm": 1.3372141122817993, "learning_rate": 9.739858906525575e-06, "loss": 40.529, "step": 10803 }, { "epoch": 257.23880597014926, "grad_norm": 1.3595507144927979, "learning_rate": 9.738977072310406e-06, "loss": 40.6467, "step": 10804 }, { "epoch": 257.26268656716417, "grad_norm": 1.341930627822876, "learning_rate": 9.73809523809524e-06, "loss": 40.8508, "step": 10805 }, { "epoch": 257.28656716417913, "grad_norm": 1.357550859451294, "learning_rate": 9.73721340388007e-06, "loss": 39.0913, "step": 10806 }, { "epoch": 257.31044776119404, "grad_norm": 1.3186854124069214, "learning_rate": 9.736331569664904e-06, "loss": 39.4709, "step": 10807 }, { "epoch": 257.33432835820895, "grad_norm": 1.3443143367767334, "learning_rate": 9.735449735449735e-06, "loss": 39.405, "step": 10808 }, { "epoch": 257.35820895522386, "grad_norm": 1.3388371467590332, "learning_rate": 9.734567901234568e-06, "loss": 39.6348, "step": 10809 }, { "epoch": 257.3820895522388, "grad_norm": 1.3060904741287231, "learning_rate": 9.733686067019401e-06, "loss": 39.2089, "step": 10810 }, { "epoch": 257.40597014925373, "grad_norm": 1.3493009805679321, "learning_rate": 9.732804232804234e-06, "loss": 40.5036, "step": 10811 }, { "epoch": 257.42985074626864, "grad_norm": 1.2948417663574219, "learning_rate": 9.731922398589066e-06, "loss": 40.1216, "step": 10812 }, { "epoch": 257.4537313432836, "grad_norm": 1.3749876022338867, "learning_rate": 9.731040564373899e-06, "loss": 41.0193, "step": 10813 }, { "epoch": 257.4776119402985, "grad_norm": 1.3479565382003784, "learning_rate": 9.73015873015873e-06, "loss": 39.429, "step": 10814 }, { "epoch": 257.5014925373134, "grad_norm": 1.338919997215271, "learning_rate": 9.729276895943563e-06, "loss": 40.5138, "step": 10815 }, { "epoch": 257.52537313432833, "grad_norm": 1.3201231956481934, "learning_rate": 9.728395061728396e-06, "loss": 39.4357, "step": 10816 }, { "epoch": 257.5492537313433, "grad_norm": 1.3772618770599365, "learning_rate": 9.727513227513228e-06, "loss": 39.5743, "step": 10817 }, { "epoch": 257.5731343283582, "grad_norm": 1.3540958166122437, "learning_rate": 9.72663139329806e-06, "loss": 40.2895, "step": 10818 }, { "epoch": 257.5970149253731, "grad_norm": 1.336292028427124, "learning_rate": 9.725749559082894e-06, "loss": 40.0142, "step": 10819 }, { "epoch": 257.6208955223881, "grad_norm": 1.3354753255844116, "learning_rate": 9.724867724867725e-06, "loss": 40.2976, "step": 10820 }, { "epoch": 257.644776119403, "grad_norm": 1.396193027496338, "learning_rate": 9.723985890652558e-06, "loss": 39.0221, "step": 10821 }, { "epoch": 257.6686567164179, "grad_norm": 1.3425037860870361, "learning_rate": 9.72310405643739e-06, "loss": 39.5505, "step": 10822 }, { "epoch": 257.6925373134328, "grad_norm": 1.348610758781433, "learning_rate": 9.722222222222223e-06, "loss": 38.8212, "step": 10823 }, { "epoch": 257.7164179104478, "grad_norm": 1.364874243736267, "learning_rate": 9.721340388007056e-06, "loss": 39.4255, "step": 10824 }, { "epoch": 257.7402985074627, "grad_norm": 1.3386898040771484, "learning_rate": 9.720458553791889e-06, "loss": 40.1525, "step": 10825 }, { "epoch": 257.7641791044776, "grad_norm": 1.4105224609375, "learning_rate": 9.71957671957672e-06, "loss": 39.535, "step": 10826 }, { "epoch": 257.78805970149256, "grad_norm": 1.4217689037322998, "learning_rate": 9.718694885361553e-06, "loss": 38.9565, "step": 10827 }, { "epoch": 257.81194029850747, "grad_norm": 1.370247721672058, "learning_rate": 9.717813051146385e-06, "loss": 40.395, "step": 10828 }, { "epoch": 257.8358208955224, "grad_norm": 1.3535115718841553, "learning_rate": 9.716931216931218e-06, "loss": 38.9612, "step": 10829 }, { "epoch": 257.85970149253734, "grad_norm": 1.3724311590194702, "learning_rate": 9.71604938271605e-06, "loss": 39.4382, "step": 10830 }, { "epoch": 257.88358208955225, "grad_norm": 1.3285748958587646, "learning_rate": 9.715167548500882e-06, "loss": 38.9676, "step": 10831 }, { "epoch": 257.90746268656716, "grad_norm": 1.3891751766204834, "learning_rate": 9.714285714285715e-06, "loss": 39.53, "step": 10832 }, { "epoch": 257.93134328358207, "grad_norm": 1.3807052373886108, "learning_rate": 9.713403880070548e-06, "loss": 39.11, "step": 10833 }, { "epoch": 257.95522388059703, "grad_norm": 1.3674180507659912, "learning_rate": 9.71252204585538e-06, "loss": 39.5163, "step": 10834 }, { "epoch": 257.97910447761194, "grad_norm": 1.3214434385299683, "learning_rate": 9.711640211640213e-06, "loss": 40.6403, "step": 10835 }, { "epoch": 258.0, "grad_norm": 1.3572372198104858, "learning_rate": 9.710758377425044e-06, "loss": 35.0867, "step": 10836 }, { "epoch": 258.0238805970149, "grad_norm": 1.341968059539795, "learning_rate": 9.709876543209877e-06, "loss": 39.246, "step": 10837 }, { "epoch": 258.0477611940299, "grad_norm": 1.3659663200378418, "learning_rate": 9.70899470899471e-06, "loss": 39.6475, "step": 10838 }, { "epoch": 258.0716417910448, "grad_norm": 1.3212348222732544, "learning_rate": 9.708112874779543e-06, "loss": 39.8603, "step": 10839 }, { "epoch": 258.0955223880597, "grad_norm": 1.350567102432251, "learning_rate": 9.707231040564375e-06, "loss": 40.7468, "step": 10840 }, { "epoch": 258.1194029850746, "grad_norm": 1.361383080482483, "learning_rate": 9.706349206349208e-06, "loss": 39.3439, "step": 10841 }, { "epoch": 258.14328358208957, "grad_norm": 1.3298285007476807, "learning_rate": 9.705467372134039e-06, "loss": 40.9603, "step": 10842 }, { "epoch": 258.1671641791045, "grad_norm": 1.3749511241912842, "learning_rate": 9.704585537918872e-06, "loss": 40.5523, "step": 10843 }, { "epoch": 258.1910447761194, "grad_norm": 1.3361761569976807, "learning_rate": 9.703703703703703e-06, "loss": 38.9237, "step": 10844 }, { "epoch": 258.21492537313435, "grad_norm": 1.4171159267425537, "learning_rate": 9.702821869488537e-06, "loss": 40.8628, "step": 10845 }, { "epoch": 258.23880597014926, "grad_norm": 1.3731554746627808, "learning_rate": 9.70194003527337e-06, "loss": 38.7766, "step": 10846 }, { "epoch": 258.26268656716417, "grad_norm": 1.3805251121520996, "learning_rate": 9.701058201058203e-06, "loss": 39.268, "step": 10847 }, { "epoch": 258.28656716417913, "grad_norm": 1.3191357851028442, "learning_rate": 9.700176366843034e-06, "loss": 39.5721, "step": 10848 }, { "epoch": 258.31044776119404, "grad_norm": 1.3665574789047241, "learning_rate": 9.699294532627867e-06, "loss": 40.6715, "step": 10849 }, { "epoch": 258.33432835820895, "grad_norm": 1.3372207880020142, "learning_rate": 9.698412698412698e-06, "loss": 39.9894, "step": 10850 }, { "epoch": 258.35820895522386, "grad_norm": 1.337847352027893, "learning_rate": 9.697530864197532e-06, "loss": 39.2998, "step": 10851 }, { "epoch": 258.3820895522388, "grad_norm": 1.3628761768341064, "learning_rate": 9.696649029982365e-06, "loss": 39.4577, "step": 10852 }, { "epoch": 258.40597014925373, "grad_norm": 1.5734317302703857, "learning_rate": 9.695767195767196e-06, "loss": 40.4322, "step": 10853 }, { "epoch": 258.42985074626864, "grad_norm": 1.3996505737304688, "learning_rate": 9.694885361552029e-06, "loss": 39.8124, "step": 10854 }, { "epoch": 258.4537313432836, "grad_norm": 1.351224660873413, "learning_rate": 9.694003527336862e-06, "loss": 39.9899, "step": 10855 }, { "epoch": 258.4776119402985, "grad_norm": 1.3508002758026123, "learning_rate": 9.693121693121693e-06, "loss": 39.7502, "step": 10856 }, { "epoch": 258.5014925373134, "grad_norm": 1.3479228019714355, "learning_rate": 9.692239858906527e-06, "loss": 39.1648, "step": 10857 }, { "epoch": 258.52537313432833, "grad_norm": 1.3615375757217407, "learning_rate": 9.691358024691358e-06, "loss": 40.3461, "step": 10858 }, { "epoch": 258.5492537313433, "grad_norm": 1.3599086999893188, "learning_rate": 9.690476190476191e-06, "loss": 39.7821, "step": 10859 }, { "epoch": 258.5731343283582, "grad_norm": 1.3118109703063965, "learning_rate": 9.689594356261024e-06, "loss": 40.3277, "step": 10860 }, { "epoch": 258.5970149253731, "grad_norm": 1.346532940864563, "learning_rate": 9.688712522045857e-06, "loss": 40.4065, "step": 10861 }, { "epoch": 258.6208955223881, "grad_norm": 1.3576853275299072, "learning_rate": 9.687830687830688e-06, "loss": 38.699, "step": 10862 }, { "epoch": 258.644776119403, "grad_norm": 1.3175050020217896, "learning_rate": 9.686948853615522e-06, "loss": 40.6459, "step": 10863 }, { "epoch": 258.6686567164179, "grad_norm": 1.3176695108413696, "learning_rate": 9.686067019400353e-06, "loss": 40.0778, "step": 10864 }, { "epoch": 258.6925373134328, "grad_norm": 1.3296691179275513, "learning_rate": 9.685185185185186e-06, "loss": 39.677, "step": 10865 }, { "epoch": 258.7164179104478, "grad_norm": 1.2999943494796753, "learning_rate": 9.684303350970017e-06, "loss": 40.0032, "step": 10866 }, { "epoch": 258.7402985074627, "grad_norm": 1.3195043802261353, "learning_rate": 9.68342151675485e-06, "loss": 38.7226, "step": 10867 }, { "epoch": 258.7641791044776, "grad_norm": 1.3519659042358398, "learning_rate": 9.682539682539683e-06, "loss": 39.8125, "step": 10868 }, { "epoch": 258.78805970149256, "grad_norm": 1.3453508615493774, "learning_rate": 9.681657848324517e-06, "loss": 39.3137, "step": 10869 }, { "epoch": 258.81194029850747, "grad_norm": 1.3508586883544922, "learning_rate": 9.68077601410935e-06, "loss": 39.6148, "step": 10870 }, { "epoch": 258.8358208955224, "grad_norm": 1.367440104484558, "learning_rate": 9.679894179894181e-06, "loss": 40.1352, "step": 10871 }, { "epoch": 258.85970149253734, "grad_norm": 1.3600516319274902, "learning_rate": 9.679012345679012e-06, "loss": 40.819, "step": 10872 }, { "epoch": 258.88358208955225, "grad_norm": 1.395025610923767, "learning_rate": 9.678130511463845e-06, "loss": 39.5762, "step": 10873 }, { "epoch": 258.90746268656716, "grad_norm": 1.3607839345932007, "learning_rate": 9.677248677248678e-06, "loss": 39.5009, "step": 10874 }, { "epoch": 258.93134328358207, "grad_norm": 1.3559160232543945, "learning_rate": 9.67636684303351e-06, "loss": 39.0489, "step": 10875 }, { "epoch": 258.95522388059703, "grad_norm": 1.4037197828292847, "learning_rate": 9.675485008818343e-06, "loss": 40.3135, "step": 10876 }, { "epoch": 258.97910447761194, "grad_norm": 1.3419318199157715, "learning_rate": 9.674603174603176e-06, "loss": 38.4441, "step": 10877 }, { "epoch": 259.0, "grad_norm": 1.3610018491744995, "learning_rate": 9.673721340388007e-06, "loss": 34.9504, "step": 10878 }, { "epoch": 259.0238805970149, "grad_norm": 1.3315635919570923, "learning_rate": 9.67283950617284e-06, "loss": 39.2953, "step": 10879 }, { "epoch": 259.0477611940299, "grad_norm": 1.3162180185317993, "learning_rate": 9.671957671957672e-06, "loss": 38.8039, "step": 10880 }, { "epoch": 259.0716417910448, "grad_norm": 1.394539713859558, "learning_rate": 9.671075837742505e-06, "loss": 40.4238, "step": 10881 }, { "epoch": 259.0955223880597, "grad_norm": 1.359700083732605, "learning_rate": 9.670194003527338e-06, "loss": 39.8, "step": 10882 }, { "epoch": 259.1194029850746, "grad_norm": 1.3215121030807495, "learning_rate": 9.669312169312171e-06, "loss": 39.386, "step": 10883 }, { "epoch": 259.14328358208957, "grad_norm": 1.3331449031829834, "learning_rate": 9.668430335097002e-06, "loss": 40.5358, "step": 10884 }, { "epoch": 259.1671641791045, "grad_norm": NaN, "learning_rate": 9.667548500881835e-06, "loss": 40.1475, "step": 10885 }, { "epoch": 259.1910447761194, "grad_norm": 1.3711529970169067, "learning_rate": 9.667548500881835e-06, "loss": 39.5764, "step": 10886 }, { "epoch": 259.21492537313435, "grad_norm": 1.4160228967666626, "learning_rate": 9.666666666666667e-06, "loss": 39.1354, "step": 10887 }, { "epoch": 259.23880597014926, "grad_norm": 1.3624448776245117, "learning_rate": 9.6657848324515e-06, "loss": 41.0596, "step": 10888 }, { "epoch": 259.26268656716417, "grad_norm": 1.330605387687683, "learning_rate": 9.664902998236331e-06, "loss": 39.5731, "step": 10889 }, { "epoch": 259.28656716417913, "grad_norm": 1.3452922105789185, "learning_rate": 9.664021164021164e-06, "loss": 39.0613, "step": 10890 }, { "epoch": 259.31044776119404, "grad_norm": 1.3622416257858276, "learning_rate": 9.663139329805997e-06, "loss": 39.7229, "step": 10891 }, { "epoch": 259.33432835820895, "grad_norm": 1.3140305280685425, "learning_rate": 9.66225749559083e-06, "loss": 40.2523, "step": 10892 }, { "epoch": 259.35820895522386, "grad_norm": 1.355017066001892, "learning_rate": 9.661375661375663e-06, "loss": 39.3581, "step": 10893 }, { "epoch": 259.3820895522388, "grad_norm": 1.3458837270736694, "learning_rate": 9.660493827160495e-06, "loss": 39.6014, "step": 10894 }, { "epoch": 259.40597014925373, "grad_norm": 1.3515324592590332, "learning_rate": 9.659611992945326e-06, "loss": 40.2852, "step": 10895 }, { "epoch": 259.42985074626864, "grad_norm": 1.2936146259307861, "learning_rate": 9.65873015873016e-06, "loss": 40.1704, "step": 10896 }, { "epoch": 259.4537313432836, "grad_norm": 1.3728492259979248, "learning_rate": 9.657848324514992e-06, "loss": 39.4121, "step": 10897 }, { "epoch": 259.4776119402985, "grad_norm": 1.3457893133163452, "learning_rate": 9.656966490299824e-06, "loss": 40.1541, "step": 10898 }, { "epoch": 259.5014925373134, "grad_norm": 1.3900660276412964, "learning_rate": 9.656084656084657e-06, "loss": 40.551, "step": 10899 }, { "epoch": 259.52537313432833, "grad_norm": 1.3379777669906616, "learning_rate": 9.65520282186949e-06, "loss": 39.9703, "step": 10900 }, { "epoch": 259.5492537313433, "grad_norm": 1.3530079126358032, "learning_rate": 9.654320987654323e-06, "loss": 40.752, "step": 10901 }, { "epoch": 259.5731343283582, "grad_norm": 1.334955096244812, "learning_rate": 9.653439153439154e-06, "loss": 39.8745, "step": 10902 }, { "epoch": 259.5970149253731, "grad_norm": 1.3904354572296143, "learning_rate": 9.652557319223986e-06, "loss": 39.785, "step": 10903 }, { "epoch": 259.6208955223881, "grad_norm": 1.359431505203247, "learning_rate": 9.651675485008819e-06, "loss": 39.8461, "step": 10904 }, { "epoch": 259.644776119403, "grad_norm": 1.3725444078445435, "learning_rate": 9.650793650793652e-06, "loss": 39.2328, "step": 10905 }, { "epoch": 259.6686567164179, "grad_norm": 1.333742618560791, "learning_rate": 9.649911816578485e-06, "loss": 39.4171, "step": 10906 }, { "epoch": 259.6925373134328, "grad_norm": 1.2969377040863037, "learning_rate": 9.649029982363316e-06, "loss": 41.3785, "step": 10907 }, { "epoch": 259.7164179104478, "grad_norm": 1.6350141763687134, "learning_rate": 9.64814814814815e-06, "loss": 39.7227, "step": 10908 }, { "epoch": 259.7402985074627, "grad_norm": 1.3644070625305176, "learning_rate": 9.64726631393298e-06, "loss": 39.6203, "step": 10909 }, { "epoch": 259.7641791044776, "grad_norm": 1.3566888570785522, "learning_rate": 9.646384479717814e-06, "loss": 40.5469, "step": 10910 }, { "epoch": 259.78805970149256, "grad_norm": 1.365502953529358, "learning_rate": 9.645502645502647e-06, "loss": 39.4953, "step": 10911 }, { "epoch": 259.81194029850747, "grad_norm": 1.332312822341919, "learning_rate": 9.644620811287478e-06, "loss": 41.2661, "step": 10912 }, { "epoch": 259.8358208955224, "grad_norm": 1.348312497138977, "learning_rate": 9.643738977072311e-06, "loss": 39.3898, "step": 10913 }, { "epoch": 259.85970149253734, "grad_norm": 1.3728663921356201, "learning_rate": 9.642857142857144e-06, "loss": 39.0602, "step": 10914 }, { "epoch": 259.88358208955225, "grad_norm": 1.2716931104660034, "learning_rate": 9.641975308641977e-06, "loss": 39.1609, "step": 10915 }, { "epoch": 259.90746268656716, "grad_norm": 1.3283432722091675, "learning_rate": 9.641093474426809e-06, "loss": 39.4236, "step": 10916 }, { "epoch": 259.93134328358207, "grad_norm": 1.3858423233032227, "learning_rate": 9.64021164021164e-06, "loss": 38.5484, "step": 10917 }, { "epoch": 259.95522388059703, "grad_norm": 1.3377037048339844, "learning_rate": 9.639329805996473e-06, "loss": 40.3073, "step": 10918 }, { "epoch": 259.97910447761194, "grad_norm": 1.3691117763519287, "learning_rate": 9.638447971781306e-06, "loss": 39.4672, "step": 10919 }, { "epoch": 260.0, "grad_norm": 1.3068797588348389, "learning_rate": 9.63756613756614e-06, "loss": 33.4042, "step": 10920 }, { "epoch": 260.0238805970149, "grad_norm": 1.385156512260437, "learning_rate": 9.63668430335097e-06, "loss": 40.8462, "step": 10921 }, { "epoch": 260.0477611940299, "grad_norm": 1.3395620584487915, "learning_rate": 9.635802469135804e-06, "loss": 39.4163, "step": 10922 }, { "epoch": 260.0716417910448, "grad_norm": 1.33933424949646, "learning_rate": 9.634920634920637e-06, "loss": 40.603, "step": 10923 }, { "epoch": 260.0955223880597, "grad_norm": 1.296547770500183, "learning_rate": 9.634038800705468e-06, "loss": 39.251, "step": 10924 }, { "epoch": 260.1194029850746, "grad_norm": 1.357055902481079, "learning_rate": 9.6331569664903e-06, "loss": 40.0952, "step": 10925 }, { "epoch": 260.14328358208957, "grad_norm": 1.3307783603668213, "learning_rate": 9.632275132275132e-06, "loss": 39.9368, "step": 10926 }, { "epoch": 260.1671641791045, "grad_norm": 1.3837127685546875, "learning_rate": 9.631393298059966e-06, "loss": 40.5203, "step": 10927 }, { "epoch": 260.1910447761194, "grad_norm": 1.3779993057250977, "learning_rate": 9.630511463844799e-06, "loss": 39.3064, "step": 10928 }, { "epoch": 260.21492537313435, "grad_norm": 1.3669720888137817, "learning_rate": 9.62962962962963e-06, "loss": 41.2302, "step": 10929 }, { "epoch": 260.23880597014926, "grad_norm": 1.3966922760009766, "learning_rate": 9.628747795414463e-06, "loss": 40.6231, "step": 10930 }, { "epoch": 260.26268656716417, "grad_norm": 1.334028720855713, "learning_rate": 9.627865961199296e-06, "loss": 40.5773, "step": 10931 }, { "epoch": 260.28656716417913, "grad_norm": 1.3534640073776245, "learning_rate": 9.626984126984127e-06, "loss": 39.9567, "step": 10932 }, { "epoch": 260.31044776119404, "grad_norm": 1.357122540473938, "learning_rate": 9.62610229276896e-06, "loss": 39.6897, "step": 10933 }, { "epoch": 260.33432835820895, "grad_norm": 1.3256688117980957, "learning_rate": 9.625220458553792e-06, "loss": 40.6093, "step": 10934 }, { "epoch": 260.35820895522386, "grad_norm": 1.3404160737991333, "learning_rate": 9.624338624338625e-06, "loss": 40.5448, "step": 10935 }, { "epoch": 260.3820895522388, "grad_norm": NaN, "learning_rate": 9.623456790123458e-06, "loss": 34.5599, "step": 10936 }, { "epoch": 260.40597014925373, "grad_norm": 1.3885859251022339, "learning_rate": 9.623456790123458e-06, "loss": 39.5195, "step": 10937 }, { "epoch": 260.42985074626864, "grad_norm": 1.3765268325805664, "learning_rate": 9.622574955908291e-06, "loss": 40.2345, "step": 10938 }, { "epoch": 260.4537313432836, "grad_norm": 1.321792483329773, "learning_rate": 9.621693121693122e-06, "loss": 39.4218, "step": 10939 }, { "epoch": 260.4776119402985, "grad_norm": 1.4409973621368408, "learning_rate": 9.620811287477954e-06, "loss": 38.3655, "step": 10940 }, { "epoch": 260.5014925373134, "grad_norm": 1.3493342399597168, "learning_rate": 9.619929453262787e-06, "loss": 38.7654, "step": 10941 }, { "epoch": 260.52537313432833, "grad_norm": 1.331803321838379, "learning_rate": 9.61904761904762e-06, "loss": 36.0159, "step": 10942 }, { "epoch": 260.5492537313433, "grad_norm": 1.3138195276260376, "learning_rate": 9.618165784832453e-06, "loss": 39.0995, "step": 10943 }, { "epoch": 260.5731343283582, "grad_norm": 1.3492025136947632, "learning_rate": 9.617283950617284e-06, "loss": 39.8184, "step": 10944 }, { "epoch": 260.5970149253731, "grad_norm": 1.3482664823532104, "learning_rate": 9.616402116402117e-06, "loss": 39.5997, "step": 10945 }, { "epoch": 260.6208955223881, "grad_norm": 1.350699782371521, "learning_rate": 9.61552028218695e-06, "loss": 40.2292, "step": 10946 }, { "epoch": 260.644776119403, "grad_norm": 1.335776448249817, "learning_rate": 9.614638447971782e-06, "loss": 38.9746, "step": 10947 }, { "epoch": 260.6686567164179, "grad_norm": 1.3448867797851562, "learning_rate": 9.613756613756613e-06, "loss": 39.1136, "step": 10948 }, { "epoch": 260.6925373134328, "grad_norm": 2.1885650157928467, "learning_rate": 9.612874779541446e-06, "loss": 40.5758, "step": 10949 }, { "epoch": 260.7164179104478, "grad_norm": NaN, "learning_rate": 9.61199294532628e-06, "loss": 33.4974, "step": 10950 }, { "epoch": 260.7402985074627, "grad_norm": 1.3476439714431763, "learning_rate": 9.61199294532628e-06, "loss": 38.9866, "step": 10951 }, { "epoch": 260.7641791044776, "grad_norm": 1.3503283262252808, "learning_rate": 9.611111111111112e-06, "loss": 41.8437, "step": 10952 }, { "epoch": 260.78805970149256, "grad_norm": 1.290765404701233, "learning_rate": 9.610229276895946e-06, "loss": 40.5859, "step": 10953 }, { "epoch": 260.81194029850747, "grad_norm": 1.3405566215515137, "learning_rate": 9.609347442680777e-06, "loss": 40.2783, "step": 10954 }, { "epoch": 260.8358208955224, "grad_norm": 1.3474996089935303, "learning_rate": 9.60846560846561e-06, "loss": 40.3356, "step": 10955 }, { "epoch": 260.85970149253734, "grad_norm": 1.3773193359375, "learning_rate": 9.607583774250441e-06, "loss": 40.3575, "step": 10956 }, { "epoch": 260.88358208955225, "grad_norm": 1.3910976648330688, "learning_rate": 9.606701940035274e-06, "loss": 39.5323, "step": 10957 }, { "epoch": 260.90746268656716, "grad_norm": 1.3175477981567383, "learning_rate": 9.605820105820106e-06, "loss": 39.2827, "step": 10958 }, { "epoch": 260.93134328358207, "grad_norm": 1.3431384563446045, "learning_rate": 9.604938271604939e-06, "loss": 39.0855, "step": 10959 }, { "epoch": 260.95522388059703, "grad_norm": 1.3307996988296509, "learning_rate": 9.604056437389772e-06, "loss": 37.6866, "step": 10960 }, { "epoch": 260.97910447761194, "grad_norm": 1.4860939979553223, "learning_rate": 9.603174603174605e-06, "loss": 40.4364, "step": 10961 }, { "epoch": 261.0, "grad_norm": 1.297798752784729, "learning_rate": 9.602292768959436e-06, "loss": 36.1236, "step": 10962 }, { "epoch": 261.0238805970149, "grad_norm": 1.3150029182434082, "learning_rate": 9.60141093474427e-06, "loss": 40.1082, "step": 10963 }, { "epoch": 261.0477611940299, "grad_norm": 1.3265035152435303, "learning_rate": 9.6005291005291e-06, "loss": 39.263, "step": 10964 }, { "epoch": 261.0716417910448, "grad_norm": 1.488178014755249, "learning_rate": 9.599647266313934e-06, "loss": 39.0889, "step": 10965 }, { "epoch": 261.0955223880597, "grad_norm": 1.3118826150894165, "learning_rate": 9.598765432098767e-06, "loss": 40.3917, "step": 10966 }, { "epoch": 261.1194029850746, "grad_norm": 1.3814677000045776, "learning_rate": 9.597883597883598e-06, "loss": 40.5191, "step": 10967 }, { "epoch": 261.14328358208957, "grad_norm": 1.3074464797973633, "learning_rate": 9.597001763668431e-06, "loss": 40.5498, "step": 10968 }, { "epoch": 261.1671641791045, "grad_norm": 1.4027786254882812, "learning_rate": 9.596119929453264e-06, "loss": 39.7093, "step": 10969 }, { "epoch": 261.1910447761194, "grad_norm": 1.279317021369934, "learning_rate": 9.595238095238096e-06, "loss": 40.2911, "step": 10970 }, { "epoch": 261.21492537313435, "grad_norm": 1.3322957754135132, "learning_rate": 9.594356261022927e-06, "loss": 39.2082, "step": 10971 }, { "epoch": 261.23880597014926, "grad_norm": 1.3443113565444946, "learning_rate": 9.59347442680776e-06, "loss": 40.3157, "step": 10972 }, { "epoch": 261.26268656716417, "grad_norm": 1.350110650062561, "learning_rate": 9.592592592592593e-06, "loss": 38.8808, "step": 10973 }, { "epoch": 261.28656716417913, "grad_norm": 1.3729221820831299, "learning_rate": 9.591710758377426e-06, "loss": 39.0108, "step": 10974 }, { "epoch": 261.31044776119404, "grad_norm": NaN, "learning_rate": 9.59082892416226e-06, "loss": 34.7066, "step": 10975 }, { "epoch": 261.33432835820895, "grad_norm": 1.326159954071045, "learning_rate": 9.59082892416226e-06, "loss": 39.3182, "step": 10976 }, { "epoch": 261.35820895522386, "grad_norm": 1.301585078239441, "learning_rate": 9.58994708994709e-06, "loss": 40.2715, "step": 10977 }, { "epoch": 261.3820895522388, "grad_norm": 1.3533471822738647, "learning_rate": 9.589065255731924e-06, "loss": 39.2462, "step": 10978 }, { "epoch": 261.40597014925373, "grad_norm": 1.3924534320831299, "learning_rate": 9.588183421516755e-06, "loss": 40.4987, "step": 10979 }, { "epoch": 261.42985074626864, "grad_norm": 1.3305037021636963, "learning_rate": 9.587301587301588e-06, "loss": 39.7499, "step": 10980 }, { "epoch": 261.4537313432836, "grad_norm": 1.3540340662002563, "learning_rate": 9.58641975308642e-06, "loss": 38.3092, "step": 10981 }, { "epoch": 261.4776119402985, "grad_norm": 1.3315110206604004, "learning_rate": 9.585537918871253e-06, "loss": 39.9721, "step": 10982 }, { "epoch": 261.5014925373134, "grad_norm": 1.3371539115905762, "learning_rate": 9.584656084656086e-06, "loss": 40.55, "step": 10983 }, { "epoch": 261.52537313432833, "grad_norm": 1.3394726514816284, "learning_rate": 9.583774250440919e-06, "loss": 39.6058, "step": 10984 }, { "epoch": 261.5492537313433, "grad_norm": 1.3177586793899536, "learning_rate": 9.58289241622575e-06, "loss": 40.3581, "step": 10985 }, { "epoch": 261.5731343283582, "grad_norm": NaN, "learning_rate": 9.582010582010583e-06, "loss": 56.9881, "step": 10986 }, { "epoch": 261.5970149253731, "grad_norm": NaN, "learning_rate": 9.582010582010583e-06, "loss": 40.3207, "step": 10987 }, { "epoch": 261.6208955223881, "grad_norm": 1.3511536121368408, "learning_rate": 9.582010582010583e-06, "loss": 38.4037, "step": 10988 }, { "epoch": 261.644776119403, "grad_norm": 1.3343217372894287, "learning_rate": 9.581128747795415e-06, "loss": 39.2142, "step": 10989 }, { "epoch": 261.6686567164179, "grad_norm": 1.3393723964691162, "learning_rate": 9.580246913580248e-06, "loss": 40.5159, "step": 10990 }, { "epoch": 261.6925373134328, "grad_norm": 1.3037984371185303, "learning_rate": 9.57936507936508e-06, "loss": 40.4017, "step": 10991 }, { "epoch": 261.7164179104478, "grad_norm": 1.4590885639190674, "learning_rate": 9.578483245149912e-06, "loss": 39.9591, "step": 10992 }, { "epoch": 261.7402985074627, "grad_norm": 1.2988319396972656, "learning_rate": 9.577601410934745e-06, "loss": 39.5345, "step": 10993 }, { "epoch": 261.7641791044776, "grad_norm": 1.303982138633728, "learning_rate": 9.576719576719578e-06, "loss": 40.0718, "step": 10994 }, { "epoch": 261.78805970149256, "grad_norm": 1.335340142250061, "learning_rate": 9.57583774250441e-06, "loss": 39.4954, "step": 10995 }, { "epoch": 261.81194029850747, "grad_norm": 1.3583860397338867, "learning_rate": 9.574955908289243e-06, "loss": 40.0703, "step": 10996 }, { "epoch": 261.8358208955224, "grad_norm": 1.3154890537261963, "learning_rate": 9.574074074074074e-06, "loss": 39.9896, "step": 10997 }, { "epoch": 261.85970149253734, "grad_norm": 1.299994945526123, "learning_rate": 9.573192239858907e-06, "loss": 40.121, "step": 10998 }, { "epoch": 261.88358208955225, "grad_norm": NaN, "learning_rate": 9.57231040564374e-06, "loss": 55.1728, "step": 10999 }, { "epoch": 261.90746268656716, "grad_norm": 1.4763442277908325, "learning_rate": 9.57231040564374e-06, "loss": 39.9903, "step": 11000 }, { "epoch": 261.93134328358207, "grad_norm": 1.3571033477783203, "learning_rate": 9.571428571428573e-06, "loss": 39.1526, "step": 11001 }, { "epoch": 261.95522388059703, "grad_norm": 1.3599194288253784, "learning_rate": 9.570546737213405e-06, "loss": 39.8079, "step": 11002 }, { "epoch": 261.97910447761194, "grad_norm": 1.3289680480957031, "learning_rate": 9.569664902998238e-06, "loss": 40.101, "step": 11003 }, { "epoch": 262.0, "grad_norm": 1.34956693649292, "learning_rate": 9.568783068783069e-06, "loss": 35.2025, "step": 11004 }, { "epoch": 262.0238805970149, "grad_norm": 1.2701388597488403, "learning_rate": 9.567901234567902e-06, "loss": 39.4206, "step": 11005 }, { "epoch": 262.0477611940299, "grad_norm": 1.3039302825927734, "learning_rate": 9.567019400352735e-06, "loss": 39.8773, "step": 11006 }, { "epoch": 262.0716417910448, "grad_norm": 1.3375074863433838, "learning_rate": 9.566137566137567e-06, "loss": 39.6491, "step": 11007 }, { "epoch": 262.0955223880597, "grad_norm": 1.3948214054107666, "learning_rate": 9.5652557319224e-06, "loss": 40.0632, "step": 11008 }, { "epoch": 262.1194029850746, "grad_norm": 1.3376342058181763, "learning_rate": 9.564373897707233e-06, "loss": 38.7729, "step": 11009 }, { "epoch": 262.14328358208957, "grad_norm": 1.362669587135315, "learning_rate": 9.563492063492064e-06, "loss": 39.5894, "step": 11010 }, { "epoch": 262.1671641791045, "grad_norm": 1.2741061449050903, "learning_rate": 9.562610229276897e-06, "loss": 39.1139, "step": 11011 }, { "epoch": 262.1910447761194, "grad_norm": 1.3458747863769531, "learning_rate": 9.561728395061728e-06, "loss": 39.4754, "step": 11012 }, { "epoch": 262.21492537313435, "grad_norm": 1.3009753227233887, "learning_rate": 9.560846560846561e-06, "loss": 39.2753, "step": 11013 }, { "epoch": 262.23880597014926, "grad_norm": 1.3388205766677856, "learning_rate": 9.559964726631395e-06, "loss": 39.1312, "step": 11014 }, { "epoch": 262.26268656716417, "grad_norm": 1.3330156803131104, "learning_rate": 9.559082892416226e-06, "loss": 39.3214, "step": 11015 }, { "epoch": 262.28656716417913, "grad_norm": NaN, "learning_rate": 9.558201058201059e-06, "loss": 54.2471, "step": 11016 }, { "epoch": 262.31044776119404, "grad_norm": 1.7814407348632812, "learning_rate": 9.558201058201059e-06, "loss": 38.9786, "step": 11017 }, { "epoch": 262.33432835820895, "grad_norm": 1.803310513496399, "learning_rate": 9.557319223985892e-06, "loss": 39.2734, "step": 11018 }, { "epoch": 262.35820895522386, "grad_norm": 1.6645331382751465, "learning_rate": 9.556437389770723e-06, "loss": 39.938, "step": 11019 }, { "epoch": 262.3820895522388, "grad_norm": 1.8918858766555786, "learning_rate": 9.555555555555556e-06, "loss": 41.0867, "step": 11020 }, { "epoch": 262.40597014925373, "grad_norm": 1.460067629814148, "learning_rate": 9.554673721340388e-06, "loss": 40.4015, "step": 11021 }, { "epoch": 262.42985074626864, "grad_norm": 1.3726826906204224, "learning_rate": 9.553791887125221e-06, "loss": 39.4579, "step": 11022 }, { "epoch": 262.4537313432836, "grad_norm": 1.5072730779647827, "learning_rate": 9.552910052910054e-06, "loss": 40.0848, "step": 11023 }, { "epoch": 262.4776119402985, "grad_norm": 1.4873336553573608, "learning_rate": 9.552028218694887e-06, "loss": 39.8741, "step": 11024 }, { "epoch": 262.5014925373134, "grad_norm": 1.5278078317642212, "learning_rate": 9.551146384479718e-06, "loss": 39.4698, "step": 11025 }, { "epoch": 262.52537313432833, "grad_norm": 1.5337252616882324, "learning_rate": 9.550264550264551e-06, "loss": 40.9124, "step": 11026 }, { "epoch": 262.5492537313433, "grad_norm": 1.5442131757736206, "learning_rate": 9.549382716049383e-06, "loss": 40.3341, "step": 11027 }, { "epoch": 262.5731343283582, "grad_norm": 1.4205265045166016, "learning_rate": 9.548500881834216e-06, "loss": 39.9973, "step": 11028 }, { "epoch": 262.5970149253731, "grad_norm": 1.436244249343872, "learning_rate": 9.547619047619049e-06, "loss": 39.3611, "step": 11029 }, { "epoch": 262.6208955223881, "grad_norm": 1.3821502923965454, "learning_rate": 9.54673721340388e-06, "loss": 39.6851, "step": 11030 }, { "epoch": 262.644776119403, "grad_norm": 1.5359870195388794, "learning_rate": 9.545855379188713e-06, "loss": 39.5825, "step": 11031 }, { "epoch": 262.6686567164179, "grad_norm": 1.492841362953186, "learning_rate": 9.544973544973546e-06, "loss": 39.8758, "step": 11032 }, { "epoch": 262.6925373134328, "grad_norm": 1.4855270385742188, "learning_rate": 9.544091710758378e-06, "loss": 40.0826, "step": 11033 }, { "epoch": 262.7164179104478, "grad_norm": 1.4357179403305054, "learning_rate": 9.543209876543211e-06, "loss": 41.007, "step": 11034 }, { "epoch": 262.7402985074627, "grad_norm": 1.5122369527816772, "learning_rate": 9.542328042328042e-06, "loss": 39.525, "step": 11035 }, { "epoch": 262.7641791044776, "grad_norm": 1.3908441066741943, "learning_rate": 9.541446208112875e-06, "loss": 40.1822, "step": 11036 }, { "epoch": 262.78805970149256, "grad_norm": 1.415174126625061, "learning_rate": 9.540564373897708e-06, "loss": 39.2575, "step": 11037 }, { "epoch": 262.81194029850747, "grad_norm": 1.3955402374267578, "learning_rate": 9.539682539682541e-06, "loss": 39.5076, "step": 11038 }, { "epoch": 262.8358208955224, "grad_norm": 1.4038375616073608, "learning_rate": 9.538800705467373e-06, "loss": 39.8917, "step": 11039 }, { "epoch": 262.85970149253734, "grad_norm": 1.4364829063415527, "learning_rate": 9.537918871252206e-06, "loss": 38.972, "step": 11040 }, { "epoch": 262.88358208955225, "grad_norm": 1.5320183038711548, "learning_rate": 9.537037037037037e-06, "loss": 40.1525, "step": 11041 }, { "epoch": 262.90746268656716, "grad_norm": 1.4557307958602905, "learning_rate": 9.53615520282187e-06, "loss": 40.3737, "step": 11042 }, { "epoch": 262.93134328358207, "grad_norm": 1.47365403175354, "learning_rate": 9.535273368606702e-06, "loss": 39.9069, "step": 11043 }, { "epoch": 262.95522388059703, "grad_norm": 1.4319478273391724, "learning_rate": 9.534391534391535e-06, "loss": 40.7778, "step": 11044 }, { "epoch": 262.97910447761194, "grad_norm": 1.4303208589553833, "learning_rate": 9.533509700176368e-06, "loss": 40.2477, "step": 11045 }, { "epoch": 263.0, "grad_norm": 1.4580843448638916, "learning_rate": 9.532627865961201e-06, "loss": 36.2377, "step": 11046 }, { "epoch": 263.0238805970149, "grad_norm": 1.4735437631607056, "learning_rate": 9.531746031746032e-06, "loss": 39.9193, "step": 11047 }, { "epoch": 263.0477611940299, "grad_norm": 1.4402790069580078, "learning_rate": 9.530864197530865e-06, "loss": 40.5044, "step": 11048 }, { "epoch": 263.0716417910448, "grad_norm": 1.3983283042907715, "learning_rate": 9.529982363315697e-06, "loss": 41.4467, "step": 11049 }, { "epoch": 263.0955223880597, "grad_norm": 1.3871731758117676, "learning_rate": 9.52910052910053e-06, "loss": 41.0976, "step": 11050 }, { "epoch": 263.1194029850746, "grad_norm": 1.3993468284606934, "learning_rate": 9.528218694885363e-06, "loss": 40.37, "step": 11051 }, { "epoch": 263.14328358208957, "grad_norm": 1.4817452430725098, "learning_rate": 9.527336860670194e-06, "loss": 39.1753, "step": 11052 }, { "epoch": 263.1671641791045, "grad_norm": 1.422795295715332, "learning_rate": 9.526455026455027e-06, "loss": 40.0078, "step": 11053 }, { "epoch": 263.1910447761194, "grad_norm": 1.544062614440918, "learning_rate": 9.52557319223986e-06, "loss": 39.4243, "step": 11054 }, { "epoch": 263.21492537313435, "grad_norm": 1.5189142227172852, "learning_rate": 9.524691358024692e-06, "loss": 39.6033, "step": 11055 }, { "epoch": 263.23880597014926, "grad_norm": 1.6835061311721802, "learning_rate": 9.523809523809525e-06, "loss": 41.5583, "step": 11056 }, { "epoch": 263.26268656716417, "grad_norm": 1.4814389944076538, "learning_rate": 9.522927689594356e-06, "loss": 39.3464, "step": 11057 }, { "epoch": 263.28656716417913, "grad_norm": 1.4500453472137451, "learning_rate": 9.52204585537919e-06, "loss": 38.9564, "step": 11058 }, { "epoch": 263.31044776119404, "grad_norm": 1.4696437120437622, "learning_rate": 9.521164021164022e-06, "loss": 39.2621, "step": 11059 }, { "epoch": 263.33432835820895, "grad_norm": 1.454594373703003, "learning_rate": 9.520282186948855e-06, "loss": 40.1176, "step": 11060 }, { "epoch": 263.35820895522386, "grad_norm": 1.4362577199935913, "learning_rate": 9.519400352733687e-06, "loss": 39.5773, "step": 11061 }, { "epoch": 263.3820895522388, "grad_norm": 1.461825966835022, "learning_rate": 9.51851851851852e-06, "loss": 39.7379, "step": 11062 }, { "epoch": 263.40597014925373, "grad_norm": 1.455212950706482, "learning_rate": 9.517636684303351e-06, "loss": 40.8723, "step": 11063 }, { "epoch": 263.42985074626864, "grad_norm": 1.5426651239395142, "learning_rate": 9.516754850088184e-06, "loss": 38.2283, "step": 11064 }, { "epoch": 263.4537313432836, "grad_norm": 1.5111769437789917, "learning_rate": 9.515873015873016e-06, "loss": 40.4063, "step": 11065 }, { "epoch": 263.4776119402985, "grad_norm": 1.5706543922424316, "learning_rate": 9.514991181657849e-06, "loss": 41.3437, "step": 11066 }, { "epoch": 263.5014925373134, "grad_norm": 1.5421589612960815, "learning_rate": 9.514109347442682e-06, "loss": 38.934, "step": 11067 }, { "epoch": 263.52537313432833, "grad_norm": 1.6436387300491333, "learning_rate": 9.513227513227515e-06, "loss": 41.0068, "step": 11068 }, { "epoch": 263.5492537313433, "grad_norm": 1.6186575889587402, "learning_rate": 9.512345679012348e-06, "loss": 38.8221, "step": 11069 }, { "epoch": 263.5731343283582, "grad_norm": 1.4905680418014526, "learning_rate": 9.51146384479718e-06, "loss": 39.9604, "step": 11070 }, { "epoch": 263.5970149253731, "grad_norm": 1.519944429397583, "learning_rate": 9.51058201058201e-06, "loss": 39.9098, "step": 11071 }, { "epoch": 263.6208955223881, "grad_norm": 1.4913849830627441, "learning_rate": 9.509700176366844e-06, "loss": 40.8175, "step": 11072 }, { "epoch": 263.644776119403, "grad_norm": 1.5057015419006348, "learning_rate": 9.508818342151677e-06, "loss": 38.5775, "step": 11073 }, { "epoch": 263.6686567164179, "grad_norm": 1.5651681423187256, "learning_rate": 9.507936507936508e-06, "loss": 40.1933, "step": 11074 }, { "epoch": 263.6925373134328, "grad_norm": 1.5280174016952515, "learning_rate": 9.507054673721341e-06, "loss": 39.4243, "step": 11075 }, { "epoch": 263.7164179104478, "grad_norm": 1.5952842235565186, "learning_rate": 9.506172839506174e-06, "loss": 39.7106, "step": 11076 }, { "epoch": 263.7402985074627, "grad_norm": 1.5674067735671997, "learning_rate": 9.505291005291006e-06, "loss": 39.5876, "step": 11077 }, { "epoch": 263.7641791044776, "grad_norm": 1.580061674118042, "learning_rate": 9.504409171075839e-06, "loss": 40.3354, "step": 11078 }, { "epoch": 263.78805970149256, "grad_norm": 1.5288664102554321, "learning_rate": 9.50352733686067e-06, "loss": 40.2097, "step": 11079 }, { "epoch": 263.81194029850747, "grad_norm": 1.5210719108581543, "learning_rate": 9.502645502645503e-06, "loss": 39.8765, "step": 11080 }, { "epoch": 263.8358208955224, "grad_norm": 1.626196265220642, "learning_rate": 9.501763668430336e-06, "loss": 39.712, "step": 11081 }, { "epoch": 263.85970149253734, "grad_norm": 1.7383569478988647, "learning_rate": 9.50088183421517e-06, "loss": 40.6828, "step": 11082 }, { "epoch": 263.88358208955225, "grad_norm": 1.5385510921478271, "learning_rate": 9.5e-06, "loss": 39.956, "step": 11083 }, { "epoch": 263.90746268656716, "grad_norm": 1.590219259262085, "learning_rate": 9.499118165784834e-06, "loss": 39.8883, "step": 11084 }, { "epoch": 263.93134328358207, "grad_norm": 1.6294162273406982, "learning_rate": 9.498236331569665e-06, "loss": 39.6743, "step": 11085 }, { "epoch": 263.95522388059703, "grad_norm": 1.6139830350875854, "learning_rate": 9.497354497354498e-06, "loss": 39.6729, "step": 11086 }, { "epoch": 263.97910447761194, "grad_norm": 1.7235676050186157, "learning_rate": 9.49647266313933e-06, "loss": 39.0117, "step": 11087 }, { "epoch": 264.0, "grad_norm": 1.630520224571228, "learning_rate": 9.495590828924162e-06, "loss": 33.6574, "step": 11088 }, { "epoch": 264.0238805970149, "grad_norm": 1.501772165298462, "learning_rate": 9.494708994708996e-06, "loss": 40.0716, "step": 11089 }, { "epoch": 264.0477611940299, "grad_norm": 1.575295090675354, "learning_rate": 9.493827160493829e-06, "loss": 40.1842, "step": 11090 }, { "epoch": 264.0716417910448, "grad_norm": 1.5870190858840942, "learning_rate": 9.492945326278662e-06, "loss": 39.323, "step": 11091 }, { "epoch": 264.0955223880597, "grad_norm": 1.6215733289718628, "learning_rate": 9.492063492063493e-06, "loss": 39.5527, "step": 11092 }, { "epoch": 264.1194029850746, "grad_norm": 1.551605463027954, "learning_rate": 9.491181657848324e-06, "loss": 40.2713, "step": 11093 }, { "epoch": 264.14328358208957, "grad_norm": 1.6779261827468872, "learning_rate": 9.490299823633157e-06, "loss": 40.1643, "step": 11094 }, { "epoch": 264.1671641791045, "grad_norm": 1.591853380203247, "learning_rate": 9.48941798941799e-06, "loss": 40.3141, "step": 11095 }, { "epoch": 264.1910447761194, "grad_norm": 1.560279130935669, "learning_rate": 9.488536155202822e-06, "loss": 38.3035, "step": 11096 }, { "epoch": 264.21492537313435, "grad_norm": 1.6339911222457886, "learning_rate": 9.487654320987655e-06, "loss": 38.7963, "step": 11097 }, { "epoch": 264.23880597014926, "grad_norm": 1.6237887144088745, "learning_rate": 9.486772486772488e-06, "loss": 40.203, "step": 11098 }, { "epoch": 264.26268656716417, "grad_norm": 1.6196738481521606, "learning_rate": 9.485890652557321e-06, "loss": 39.272, "step": 11099 }, { "epoch": 264.28656716417913, "grad_norm": 1.6806775331497192, "learning_rate": 9.485008818342152e-06, "loss": 40.8315, "step": 11100 }, { "epoch": 264.31044776119404, "grad_norm": 1.611965298652649, "learning_rate": 9.484126984126984e-06, "loss": 40.9921, "step": 11101 }, { "epoch": 264.33432835820895, "grad_norm": 1.569248080253601, "learning_rate": 9.483245149911817e-06, "loss": 39.9355, "step": 11102 }, { "epoch": 264.35820895522386, "grad_norm": 1.6441794633865356, "learning_rate": 9.48236331569665e-06, "loss": 39.8611, "step": 11103 }, { "epoch": 264.3820895522388, "grad_norm": 1.7284375429153442, "learning_rate": 9.481481481481483e-06, "loss": 39.602, "step": 11104 }, { "epoch": 264.40597014925373, "grad_norm": 1.59506356716156, "learning_rate": 9.480599647266314e-06, "loss": 39.8243, "step": 11105 }, { "epoch": 264.42985074626864, "grad_norm": 1.5699117183685303, "learning_rate": 9.479717813051147e-06, "loss": 40.769, "step": 11106 }, { "epoch": 264.4537313432836, "grad_norm": 1.6370006799697876, "learning_rate": 9.478835978835979e-06, "loss": 41.5743, "step": 11107 }, { "epoch": 264.4776119402985, "grad_norm": 1.5964711904525757, "learning_rate": 9.477954144620812e-06, "loss": 39.8884, "step": 11108 }, { "epoch": 264.5014925373134, "grad_norm": 1.7293678522109985, "learning_rate": 9.477072310405645e-06, "loss": 39.4323, "step": 11109 }, { "epoch": 264.52537313432833, "grad_norm": 1.765647530555725, "learning_rate": 9.476190476190476e-06, "loss": 41.2136, "step": 11110 }, { "epoch": 264.5492537313433, "grad_norm": 1.7098437547683716, "learning_rate": 9.47530864197531e-06, "loss": 39.6337, "step": 11111 }, { "epoch": 264.5731343283582, "grad_norm": 1.6038447618484497, "learning_rate": 9.474426807760142e-06, "loss": 39.5542, "step": 11112 }, { "epoch": 264.5970149253731, "grad_norm": 1.6168779134750366, "learning_rate": 9.473544973544975e-06, "loss": 40.5675, "step": 11113 }, { "epoch": 264.6208955223881, "grad_norm": 1.6818190813064575, "learning_rate": 9.472663139329807e-06, "loss": 39.1434, "step": 11114 }, { "epoch": 264.644776119403, "grad_norm": 1.6840814352035522, "learning_rate": 9.471781305114638e-06, "loss": 40.2326, "step": 11115 }, { "epoch": 264.6686567164179, "grad_norm": 1.6526665687561035, "learning_rate": 9.470899470899471e-06, "loss": 40.3699, "step": 11116 }, { "epoch": 264.6925373134328, "grad_norm": 1.594697117805481, "learning_rate": 9.470017636684304e-06, "loss": 40.2541, "step": 11117 }, { "epoch": 264.7164179104478, "grad_norm": 1.7430872917175293, "learning_rate": 9.469135802469137e-06, "loss": 40.8192, "step": 11118 }, { "epoch": 264.7402985074627, "grad_norm": 1.707537293434143, "learning_rate": 9.468253968253969e-06, "loss": 38.5796, "step": 11119 }, { "epoch": 264.7641791044776, "grad_norm": 1.7188972234725952, "learning_rate": 9.467372134038802e-06, "loss": 39.6226, "step": 11120 }, { "epoch": 264.78805970149256, "grad_norm": 1.6330283880233765, "learning_rate": 9.466490299823635e-06, "loss": 39.2222, "step": 11121 }, { "epoch": 264.81194029850747, "grad_norm": 1.8672114610671997, "learning_rate": 9.465608465608466e-06, "loss": 39.6725, "step": 11122 }, { "epoch": 264.8358208955224, "grad_norm": 1.719328761100769, "learning_rate": 9.464726631393298e-06, "loss": 41.4924, "step": 11123 }, { "epoch": 264.85970149253734, "grad_norm": 1.8214755058288574, "learning_rate": 9.46384479717813e-06, "loss": 40.1653, "step": 11124 }, { "epoch": 264.88358208955225, "grad_norm": 1.7208667993545532, "learning_rate": 9.462962962962964e-06, "loss": 39.262, "step": 11125 }, { "epoch": 264.90746268656716, "grad_norm": 1.7286288738250732, "learning_rate": 9.462081128747797e-06, "loss": 40.4662, "step": 11126 }, { "epoch": 264.93134328358207, "grad_norm": 1.7811837196350098, "learning_rate": 9.461199294532628e-06, "loss": 41.0274, "step": 11127 }, { "epoch": 264.95522388059703, "grad_norm": 2.1537365913391113, "learning_rate": 9.460317460317461e-06, "loss": 39.1914, "step": 11128 }, { "epoch": 264.97910447761194, "grad_norm": 1.7807774543762207, "learning_rate": 9.459435626102293e-06, "loss": 39.4219, "step": 11129 }, { "epoch": 265.0, "grad_norm": 1.7342748641967773, "learning_rate": 9.458553791887126e-06, "loss": 33.8781, "step": 11130 }, { "epoch": 265.0238805970149, "grad_norm": 1.8042250871658325, "learning_rate": 9.457671957671959e-06, "loss": 39.2087, "step": 11131 }, { "epoch": 265.0477611940299, "grad_norm": 1.7934316396713257, "learning_rate": 9.45679012345679e-06, "loss": 40.7491, "step": 11132 }, { "epoch": 265.0716417910448, "grad_norm": 1.6531046628952026, "learning_rate": 9.455908289241623e-06, "loss": 39.6102, "step": 11133 }, { "epoch": 265.0955223880597, "grad_norm": 1.7694377899169922, "learning_rate": 9.455026455026456e-06, "loss": 40.4959, "step": 11134 }, { "epoch": 265.1194029850746, "grad_norm": 1.6900501251220703, "learning_rate": 9.45414462081129e-06, "loss": 41.707, "step": 11135 }, { "epoch": 265.14328358208957, "grad_norm": 1.7662745714187622, "learning_rate": 9.45326278659612e-06, "loss": 40.0879, "step": 11136 }, { "epoch": 265.1671641791045, "grad_norm": 1.6745691299438477, "learning_rate": 9.452380952380952e-06, "loss": 40.0043, "step": 11137 }, { "epoch": 265.1910447761194, "grad_norm": 1.7642098665237427, "learning_rate": 9.451499118165785e-06, "loss": 39.3379, "step": 11138 }, { "epoch": 265.21492537313435, "grad_norm": 1.7571173906326294, "learning_rate": 9.450617283950618e-06, "loss": 40.2379, "step": 11139 }, { "epoch": 265.23880597014926, "grad_norm": 1.840388536453247, "learning_rate": 9.449735449735451e-06, "loss": 39.5652, "step": 11140 }, { "epoch": 265.26268656716417, "grad_norm": 1.883176565170288, "learning_rate": 9.448853615520283e-06, "loss": 40.0241, "step": 11141 }, { "epoch": 265.28656716417913, "grad_norm": 1.7107597589492798, "learning_rate": 9.447971781305116e-06, "loss": 40.6913, "step": 11142 }, { "epoch": 265.31044776119404, "grad_norm": 1.9240273237228394, "learning_rate": 9.447089947089949e-06, "loss": 39.9004, "step": 11143 }, { "epoch": 265.33432835820895, "grad_norm": 1.997233271598816, "learning_rate": 9.44620811287478e-06, "loss": 40.2703, "step": 11144 }, { "epoch": 265.35820895522386, "grad_norm": 1.783259391784668, "learning_rate": 9.445326278659611e-06, "loss": 39.5648, "step": 11145 }, { "epoch": 265.3820895522388, "grad_norm": 1.701804757118225, "learning_rate": 9.444444444444445e-06, "loss": 39.5268, "step": 11146 }, { "epoch": 265.40597014925373, "grad_norm": 1.823791742324829, "learning_rate": 9.443562610229278e-06, "loss": 40.1326, "step": 11147 }, { "epoch": 265.42985074626864, "grad_norm": 1.821493148803711, "learning_rate": 9.44268077601411e-06, "loss": 40.1274, "step": 11148 }, { "epoch": 265.4537313432836, "grad_norm": 1.7800713777542114, "learning_rate": 9.441798941798944e-06, "loss": 38.5953, "step": 11149 }, { "epoch": 265.4776119402985, "grad_norm": 1.6889206171035767, "learning_rate": 9.440917107583775e-06, "loss": 39.6706, "step": 11150 }, { "epoch": 265.5014925373134, "grad_norm": 1.7190959453582764, "learning_rate": 9.440035273368608e-06, "loss": 40.0328, "step": 11151 }, { "epoch": 265.52537313432833, "grad_norm": 1.8978456258773804, "learning_rate": 9.43915343915344e-06, "loss": 39.6932, "step": 11152 }, { "epoch": 265.5492537313433, "grad_norm": 1.8689734935760498, "learning_rate": 9.438271604938273e-06, "loss": 40.1259, "step": 11153 }, { "epoch": 265.5731343283582, "grad_norm": 1.829033613204956, "learning_rate": 9.437389770723104e-06, "loss": 40.7285, "step": 11154 }, { "epoch": 265.5970149253731, "grad_norm": 1.7404003143310547, "learning_rate": 9.436507936507937e-06, "loss": 39.4209, "step": 11155 }, { "epoch": 265.6208955223881, "grad_norm": 1.8134456872940063, "learning_rate": 9.43562610229277e-06, "loss": 40.4234, "step": 11156 }, { "epoch": 265.644776119403, "grad_norm": 1.9928745031356812, "learning_rate": 9.434744268077603e-06, "loss": 40.2274, "step": 11157 }, { "epoch": 265.6686567164179, "grad_norm": 1.8424315452575684, "learning_rate": 9.433862433862435e-06, "loss": 40.2449, "step": 11158 }, { "epoch": 265.6925373134328, "grad_norm": 1.9016598463058472, "learning_rate": 9.432980599647266e-06, "loss": 39.4298, "step": 11159 }, { "epoch": 265.7164179104478, "grad_norm": 2.0489885807037354, "learning_rate": 9.432098765432099e-06, "loss": 40.9387, "step": 11160 }, { "epoch": 265.7402985074627, "grad_norm": 1.819980502128601, "learning_rate": 9.431216931216932e-06, "loss": 39.5654, "step": 11161 }, { "epoch": 265.7641791044776, "grad_norm": 1.7707027196884155, "learning_rate": 9.430335097001765e-06, "loss": 41.4156, "step": 11162 }, { "epoch": 265.78805970149256, "grad_norm": 1.8338661193847656, "learning_rate": 9.429453262786596e-06, "loss": 38.9762, "step": 11163 }, { "epoch": 265.81194029850747, "grad_norm": 2.185920000076294, "learning_rate": 9.42857142857143e-06, "loss": 40.6195, "step": 11164 }, { "epoch": 265.8358208955224, "grad_norm": 1.9766192436218262, "learning_rate": 9.427689594356263e-06, "loss": 40.8726, "step": 11165 }, { "epoch": 265.85970149253734, "grad_norm": 1.8576736450195312, "learning_rate": 9.426807760141094e-06, "loss": 40.2795, "step": 11166 }, { "epoch": 265.88358208955225, "grad_norm": 2.0011467933654785, "learning_rate": 9.425925925925925e-06, "loss": 38.5118, "step": 11167 }, { "epoch": 265.90746268656716, "grad_norm": 2.0406241416931152, "learning_rate": 9.425044091710758e-06, "loss": 41.2083, "step": 11168 }, { "epoch": 265.93134328358207, "grad_norm": 1.9499824047088623, "learning_rate": 9.424162257495591e-06, "loss": 39.1114, "step": 11169 }, { "epoch": 265.95522388059703, "grad_norm": 1.8748691082000732, "learning_rate": 9.423280423280425e-06, "loss": 39.8199, "step": 11170 }, { "epoch": 265.97910447761194, "grad_norm": 1.81623375415802, "learning_rate": 9.422398589065258e-06, "loss": 39.7705, "step": 11171 }, { "epoch": 266.0, "grad_norm": 2.0740966796875, "learning_rate": 9.421516754850089e-06, "loss": 33.553, "step": 11172 }, { "epoch": 266.0238805970149, "grad_norm": 1.8995767831802368, "learning_rate": 9.420634920634922e-06, "loss": 38.8906, "step": 11173 }, { "epoch": 266.0477611940299, "grad_norm": 1.957885503768921, "learning_rate": 9.419753086419753e-06, "loss": 39.2722, "step": 11174 }, { "epoch": 266.0716417910448, "grad_norm": 1.970649003982544, "learning_rate": 9.418871252204586e-06, "loss": 40.8415, "step": 11175 }, { "epoch": 266.0955223880597, "grad_norm": 2.0128138065338135, "learning_rate": 9.417989417989418e-06, "loss": 39.2208, "step": 11176 }, { "epoch": 266.1194029850746, "grad_norm": 1.9676634073257446, "learning_rate": 9.417107583774251e-06, "loss": 40.2646, "step": 11177 }, { "epoch": 266.14328358208957, "grad_norm": 1.9938279390335083, "learning_rate": 9.416225749559084e-06, "loss": 39.7534, "step": 11178 }, { "epoch": 266.1671641791045, "grad_norm": 2.0161585807800293, "learning_rate": 9.415343915343917e-06, "loss": 40.1109, "step": 11179 }, { "epoch": 266.1910447761194, "grad_norm": 1.9915316104888916, "learning_rate": 9.414462081128748e-06, "loss": 41.1481, "step": 11180 }, { "epoch": 266.21492537313435, "grad_norm": 1.9481030702590942, "learning_rate": 9.413580246913581e-06, "loss": 39.7506, "step": 11181 }, { "epoch": 266.23880597014926, "grad_norm": 2.030442237854004, "learning_rate": 9.412698412698413e-06, "loss": 40.0802, "step": 11182 }, { "epoch": 266.26268656716417, "grad_norm": 1.7925748825073242, "learning_rate": 9.411816578483246e-06, "loss": 39.2543, "step": 11183 }, { "epoch": 266.28656716417913, "grad_norm": 1.779591679573059, "learning_rate": 9.410934744268079e-06, "loss": 39.0753, "step": 11184 }, { "epoch": 266.31044776119404, "grad_norm": 1.9078892469406128, "learning_rate": 9.41005291005291e-06, "loss": 41.4999, "step": 11185 }, { "epoch": 266.33432835820895, "grad_norm": 1.8994312286376953, "learning_rate": 9.409171075837743e-06, "loss": 41.0811, "step": 11186 }, { "epoch": 266.35820895522386, "grad_norm": 1.856598138809204, "learning_rate": 9.408289241622576e-06, "loss": 40.7875, "step": 11187 }, { "epoch": 266.3820895522388, "grad_norm": 1.9469159841537476, "learning_rate": 9.407407407407408e-06, "loss": 39.4345, "step": 11188 }, { "epoch": 266.40597014925373, "grad_norm": 2.271885395050049, "learning_rate": 9.406525573192241e-06, "loss": 40.1679, "step": 11189 }, { "epoch": 266.42985074626864, "grad_norm": 1.9063481092453003, "learning_rate": 9.405643738977072e-06, "loss": 39.884, "step": 11190 }, { "epoch": 266.4537313432836, "grad_norm": 1.959763526916504, "learning_rate": 9.404761904761905e-06, "loss": 39.7587, "step": 11191 }, { "epoch": 266.4776119402985, "grad_norm": 1.983992099761963, "learning_rate": 9.403880070546738e-06, "loss": 39.2794, "step": 11192 }, { "epoch": 266.5014925373134, "grad_norm": 2.3095459938049316, "learning_rate": 9.402998236331571e-06, "loss": 40.9714, "step": 11193 }, { "epoch": 266.52537313432833, "grad_norm": 1.898734211921692, "learning_rate": 9.402116402116403e-06, "loss": 39.671, "step": 11194 }, { "epoch": 266.5492537313433, "grad_norm": 2.045151472091675, "learning_rate": 9.401234567901236e-06, "loss": 39.3331, "step": 11195 }, { "epoch": 266.5731343283582, "grad_norm": 1.9579055309295654, "learning_rate": 9.400352733686067e-06, "loss": 40.071, "step": 11196 }, { "epoch": 266.5970149253731, "grad_norm": 2.0126123428344727, "learning_rate": 9.3994708994709e-06, "loss": 39.3148, "step": 11197 }, { "epoch": 266.6208955223881, "grad_norm": 2.0612123012542725, "learning_rate": 9.398589065255733e-06, "loss": 41.0955, "step": 11198 }, { "epoch": 266.644776119403, "grad_norm": 2.191852569580078, "learning_rate": 9.397707231040565e-06, "loss": 39.3757, "step": 11199 }, { "epoch": 266.6686567164179, "grad_norm": 2.0572190284729004, "learning_rate": 9.396825396825398e-06, "loss": 41.9285, "step": 11200 }, { "epoch": 266.6925373134328, "grad_norm": 2.0043561458587646, "learning_rate": 9.395943562610231e-06, "loss": 40.8725, "step": 11201 }, { "epoch": 266.7164179104478, "grad_norm": 1.7713338136672974, "learning_rate": 9.395061728395062e-06, "loss": 39.4722, "step": 11202 }, { "epoch": 266.7402985074627, "grad_norm": 1.7836006879806519, "learning_rate": 9.394179894179895e-06, "loss": 41.0635, "step": 11203 }, { "epoch": 266.7641791044776, "grad_norm": 1.98702073097229, "learning_rate": 9.393298059964727e-06, "loss": 39.849, "step": 11204 }, { "epoch": 266.78805970149256, "grad_norm": 2.0025408267974854, "learning_rate": 9.39241622574956e-06, "loss": 39.8418, "step": 11205 }, { "epoch": 266.81194029850747, "grad_norm": 2.0321223735809326, "learning_rate": 9.391534391534393e-06, "loss": 40.4576, "step": 11206 }, { "epoch": 266.8358208955224, "grad_norm": 1.9746233224868774, "learning_rate": 9.390652557319224e-06, "loss": 39.2987, "step": 11207 }, { "epoch": 266.85970149253734, "grad_norm": 2.126621961593628, "learning_rate": 9.389770723104057e-06, "loss": 39.7311, "step": 11208 }, { "epoch": 266.88358208955225, "grad_norm": 2.1026949882507324, "learning_rate": 9.38888888888889e-06, "loss": 38.2591, "step": 11209 }, { "epoch": 266.90746268656716, "grad_norm": 1.867531657218933, "learning_rate": 9.388007054673722e-06, "loss": 39.4475, "step": 11210 }, { "epoch": 266.93134328358207, "grad_norm": 2.0237841606140137, "learning_rate": 9.387125220458555e-06, "loss": 41.3703, "step": 11211 }, { "epoch": 266.95522388059703, "grad_norm": 1.9870686531066895, "learning_rate": 9.386243386243386e-06, "loss": 39.6692, "step": 11212 }, { "epoch": 266.97910447761194, "grad_norm": 1.9410037994384766, "learning_rate": 9.38536155202822e-06, "loss": 40.6146, "step": 11213 }, { "epoch": 267.0, "grad_norm": 1.8229119777679443, "learning_rate": 9.384479717813052e-06, "loss": 34.3316, "step": 11214 }, { "epoch": 267.0238805970149, "grad_norm": 2.2340381145477295, "learning_rate": 9.383597883597885e-06, "loss": 39.8427, "step": 11215 }, { "epoch": 267.0477611940299, "grad_norm": 2.09136962890625, "learning_rate": 9.382716049382717e-06, "loss": 40.9963, "step": 11216 }, { "epoch": 267.0716417910448, "grad_norm": 2.0006775856018066, "learning_rate": 9.38183421516755e-06, "loss": 39.7726, "step": 11217 }, { "epoch": 267.0955223880597, "grad_norm": 1.9360883235931396, "learning_rate": 9.380952380952381e-06, "loss": 40.9399, "step": 11218 }, { "epoch": 267.1194029850746, "grad_norm": 1.9209994077682495, "learning_rate": 9.380070546737214e-06, "loss": 39.8973, "step": 11219 }, { "epoch": 267.14328358208957, "grad_norm": 1.901391863822937, "learning_rate": 9.379188712522047e-06, "loss": 39.87, "step": 11220 }, { "epoch": 267.1671641791045, "grad_norm": 2.006185531616211, "learning_rate": 9.378306878306879e-06, "loss": 39.3685, "step": 11221 }, { "epoch": 267.1910447761194, "grad_norm": 1.8344476222991943, "learning_rate": 9.377425044091712e-06, "loss": 39.6737, "step": 11222 }, { "epoch": 267.21492537313435, "grad_norm": 1.9653791189193726, "learning_rate": 9.376543209876545e-06, "loss": 39.8825, "step": 11223 }, { "epoch": 267.23880597014926, "grad_norm": 2.1786515712738037, "learning_rate": 9.375661375661376e-06, "loss": 39.6649, "step": 11224 }, { "epoch": 267.26268656716417, "grad_norm": 2.068927764892578, "learning_rate": 9.374779541446209e-06, "loss": 39.6824, "step": 11225 }, { "epoch": 267.28656716417913, "grad_norm": 2.1062753200531006, "learning_rate": 9.37389770723104e-06, "loss": 41.3296, "step": 11226 }, { "epoch": 267.31044776119404, "grad_norm": 1.8383135795593262, "learning_rate": 9.373015873015874e-06, "loss": 39.8115, "step": 11227 }, { "epoch": 267.33432835820895, "grad_norm": 1.9159365892410278, "learning_rate": 9.372134038800707e-06, "loss": 40.9161, "step": 11228 }, { "epoch": 267.35820895522386, "grad_norm": 1.9511005878448486, "learning_rate": 9.37125220458554e-06, "loss": 39.7479, "step": 11229 }, { "epoch": 267.3820895522388, "grad_norm": 1.8658132553100586, "learning_rate": 9.370370370370371e-06, "loss": 39.5209, "step": 11230 }, { "epoch": 267.40597014925373, "grad_norm": 2.0931243896484375, "learning_rate": 9.369488536155204e-06, "loss": 41.6411, "step": 11231 }, { "epoch": 267.42985074626864, "grad_norm": 1.969403624534607, "learning_rate": 9.368606701940036e-06, "loss": 39.6277, "step": 11232 }, { "epoch": 267.4537313432836, "grad_norm": 2.226022481918335, "learning_rate": 9.367724867724869e-06, "loss": 40.6688, "step": 11233 }, { "epoch": 267.4776119402985, "grad_norm": 2.391698122024536, "learning_rate": 9.3668430335097e-06, "loss": 39.9265, "step": 11234 }, { "epoch": 267.5014925373134, "grad_norm": 2.046767473220825, "learning_rate": 9.365961199294533e-06, "loss": 40.7571, "step": 11235 }, { "epoch": 267.52537313432833, "grad_norm": 2.0937893390655518, "learning_rate": 9.365079365079366e-06, "loss": 39.5481, "step": 11236 }, { "epoch": 267.5492537313433, "grad_norm": 2.3491523265838623, "learning_rate": 9.364197530864199e-06, "loss": 39.996, "step": 11237 }, { "epoch": 267.5731343283582, "grad_norm": 2.0321855545043945, "learning_rate": 9.36331569664903e-06, "loss": 39.8931, "step": 11238 }, { "epoch": 267.5970149253731, "grad_norm": 2.0335476398468018, "learning_rate": 9.362433862433864e-06, "loss": 40.1122, "step": 11239 }, { "epoch": 267.6208955223881, "grad_norm": 2.006875514984131, "learning_rate": 9.361552028218695e-06, "loss": 39.3318, "step": 11240 }, { "epoch": 267.644776119403, "grad_norm": 2.3600008487701416, "learning_rate": 9.360670194003528e-06, "loss": 39.7493, "step": 11241 }, { "epoch": 267.6686567164179, "grad_norm": 2.160414934158325, "learning_rate": 9.359788359788361e-06, "loss": 40.0858, "step": 11242 }, { "epoch": 267.6925373134328, "grad_norm": 2.1238319873809814, "learning_rate": 9.358906525573192e-06, "loss": 39.2445, "step": 11243 }, { "epoch": 267.7164179104478, "grad_norm": 2.095144510269165, "learning_rate": 9.358024691358025e-06, "loss": 39.7533, "step": 11244 }, { "epoch": 267.7402985074627, "grad_norm": 2.0827667713165283, "learning_rate": 9.357142857142859e-06, "loss": 40.1079, "step": 11245 }, { "epoch": 267.7641791044776, "grad_norm": 2.212702751159668, "learning_rate": 9.35626102292769e-06, "loss": 40.3725, "step": 11246 }, { "epoch": 267.78805970149256, "grad_norm": 2.1474852561950684, "learning_rate": 9.355379188712523e-06, "loss": 39.4775, "step": 11247 }, { "epoch": 267.81194029850747, "grad_norm": 1.962712287902832, "learning_rate": 9.354497354497354e-06, "loss": 39.9113, "step": 11248 }, { "epoch": 267.8358208955224, "grad_norm": 1.9558900594711304, "learning_rate": 9.353615520282187e-06, "loss": 39.6221, "step": 11249 }, { "epoch": 267.85970149253734, "grad_norm": 2.2489147186279297, "learning_rate": 9.35273368606702e-06, "loss": 40.6693, "step": 11250 }, { "epoch": 267.88358208955225, "grad_norm": 2.1084952354431152, "learning_rate": 9.351851851851854e-06, "loss": 38.9551, "step": 11251 }, { "epoch": 267.90746268656716, "grad_norm": 2.1078755855560303, "learning_rate": 9.350970017636685e-06, "loss": 40.6464, "step": 11252 }, { "epoch": 267.93134328358207, "grad_norm": 1.975015640258789, "learning_rate": 9.350088183421518e-06, "loss": 40.6628, "step": 11253 }, { "epoch": 267.95522388059703, "grad_norm": 2.2445523738861084, "learning_rate": 9.34920634920635e-06, "loss": 39.9385, "step": 11254 }, { "epoch": 267.97910447761194, "grad_norm": 2.119166612625122, "learning_rate": 9.348324514991182e-06, "loss": 39.5395, "step": 11255 }, { "epoch": 268.0, "grad_norm": 2.0635857582092285, "learning_rate": 9.347442680776014e-06, "loss": 35.5053, "step": 11256 }, { "epoch": 268.0238805970149, "grad_norm": 2.215437412261963, "learning_rate": 9.346560846560847e-06, "loss": 39.9551, "step": 11257 }, { "epoch": 268.0477611940299, "grad_norm": 2.2606897354125977, "learning_rate": 9.34567901234568e-06, "loss": 40.3249, "step": 11258 }, { "epoch": 268.0716417910448, "grad_norm": 2.0418660640716553, "learning_rate": 9.344797178130513e-06, "loss": 39.2498, "step": 11259 }, { "epoch": 268.0955223880597, "grad_norm": 2.1931238174438477, "learning_rate": 9.343915343915344e-06, "loss": 39.6233, "step": 11260 }, { "epoch": 268.1194029850746, "grad_norm": 2.067831516265869, "learning_rate": 9.343033509700177e-06, "loss": 40.0996, "step": 11261 }, { "epoch": 268.14328358208957, "grad_norm": 2.12958025932312, "learning_rate": 9.342151675485009e-06, "loss": 40.8008, "step": 11262 }, { "epoch": 268.1671641791045, "grad_norm": 2.0191800594329834, "learning_rate": 9.341269841269842e-06, "loss": 40.3679, "step": 11263 }, { "epoch": 268.1910447761194, "grad_norm": 2.164774179458618, "learning_rate": 9.340388007054675e-06, "loss": 39.8965, "step": 11264 }, { "epoch": 268.21492537313435, "grad_norm": NaN, "learning_rate": 9.339506172839506e-06, "loss": 35.7343, "step": 11265 }, { "epoch": 268.23880597014926, "grad_norm": 9.513036727905273, "learning_rate": 9.339506172839506e-06, "loss": 40.0251, "step": 11266 }, { "epoch": 268.26268656716417, "grad_norm": 9.310385704040527, "learning_rate": 9.33862433862434e-06, "loss": 40.748, "step": 11267 }, { "epoch": 268.28656716417913, "grad_norm": 7.484086513519287, "learning_rate": 9.337742504409172e-06, "loss": 40.5008, "step": 11268 }, { "epoch": 268.31044776119404, "grad_norm": 5.988757133483887, "learning_rate": 9.336860670194004e-06, "loss": 38.643, "step": 11269 }, { "epoch": 268.33432835820895, "grad_norm": 5.695390701293945, "learning_rate": 9.335978835978837e-06, "loss": 40.5788, "step": 11270 }, { "epoch": 268.35820895522386, "grad_norm": 5.092775344848633, "learning_rate": 9.335097001763668e-06, "loss": 41.4045, "step": 11271 }, { "epoch": 268.3820895522388, "grad_norm": 3.778723955154419, "learning_rate": 9.334215167548501e-06, "loss": 38.164, "step": 11272 }, { "epoch": 268.40597014925373, "grad_norm": 4.126461029052734, "learning_rate": 9.333333333333334e-06, "loss": 42.3278, "step": 11273 }, { "epoch": 268.42985074626864, "grad_norm": 4.246984481811523, "learning_rate": 9.332451499118167e-06, "loss": 40.9104, "step": 11274 }, { "epoch": 268.4537313432836, "grad_norm": 3.836505651473999, "learning_rate": 9.331569664902999e-06, "loss": 40.3171, "step": 11275 }, { "epoch": 268.4776119402985, "grad_norm": 4.760846138000488, "learning_rate": 9.330687830687832e-06, "loss": 40.1633, "step": 11276 }, { "epoch": 268.5014925373134, "grad_norm": NaN, "learning_rate": 9.329805996472663e-06, "loss": 57.6213, "step": 11277 }, { "epoch": 268.52537313432833, "grad_norm": 11.585193634033203, "learning_rate": 9.329805996472663e-06, "loss": 39.2208, "step": 11278 }, { "epoch": 268.5492537313433, "grad_norm": 12.035174369812012, "learning_rate": 9.328924162257496e-06, "loss": 40.376, "step": 11279 }, { "epoch": 268.5731343283582, "grad_norm": 10.621764183044434, "learning_rate": 9.32804232804233e-06, "loss": 38.837, "step": 11280 }, { "epoch": 268.5970149253731, "grad_norm": 10.716426849365234, "learning_rate": 9.32716049382716e-06, "loss": 40.3203, "step": 11281 }, { "epoch": 268.6208955223881, "grad_norm": 9.73912239074707, "learning_rate": 9.326278659611994e-06, "loss": 41.0689, "step": 11282 }, { "epoch": 268.644776119403, "grad_norm": 7.252051830291748, "learning_rate": 9.325396825396827e-06, "loss": 40.6042, "step": 11283 }, { "epoch": 268.6686567164179, "grad_norm": 7.205382823944092, "learning_rate": 9.32451499118166e-06, "loss": 40.2604, "step": 11284 }, { "epoch": 268.6925373134328, "grad_norm": 5.687811374664307, "learning_rate": 9.323633156966491e-06, "loss": 40.4904, "step": 11285 }, { "epoch": 268.7164179104478, "grad_norm": 5.041977882385254, "learning_rate": 9.322751322751323e-06, "loss": 41.4627, "step": 11286 }, { "epoch": 268.7402985074627, "grad_norm": 5.766401290893555, "learning_rate": 9.321869488536156e-06, "loss": 40.7804, "step": 11287 }, { "epoch": 268.7641791044776, "grad_norm": 6.191514492034912, "learning_rate": 9.320987654320989e-06, "loss": 41.9094, "step": 11288 }, { "epoch": 268.78805970149256, "grad_norm": 7.873541355133057, "learning_rate": 9.32010582010582e-06, "loss": 40.9847, "step": 11289 }, { "epoch": 268.81194029850747, "grad_norm": 7.7074174880981445, "learning_rate": 9.319223985890653e-06, "loss": 38.712, "step": 11290 }, { "epoch": 268.8358208955224, "grad_norm": 8.483597755432129, "learning_rate": 9.318342151675486e-06, "loss": 41.1783, "step": 11291 }, { "epoch": 268.85970149253734, "grad_norm": 8.012494087219238, "learning_rate": 9.317460317460318e-06, "loss": 40.0852, "step": 11292 }, { "epoch": 268.88358208955225, "grad_norm": NaN, "learning_rate": 9.31657848324515e-06, "loss": 56.6392, "step": 11293 }, { "epoch": 268.90746268656716, "grad_norm": 16.779102325439453, "learning_rate": 9.31657848324515e-06, "loss": 41.197, "step": 11294 }, { "epoch": 268.93134328358207, "grad_norm": 18.5029296875, "learning_rate": 9.315696649029982e-06, "loss": 42.3188, "step": 11295 }, { "epoch": 268.95522388059703, "grad_norm": 16.504255294799805, "learning_rate": 9.314814814814815e-06, "loss": 41.9048, "step": 11296 }, { "epoch": 268.97910447761194, "grad_norm": 15.995084762573242, "learning_rate": 9.313932980599648e-06, "loss": 40.626, "step": 11297 }, { "epoch": 269.0, "grad_norm": 10.575970649719238, "learning_rate": 9.313051146384481e-06, "loss": 36.1748, "step": 11298 }, { "epoch": 269.0238805970149, "grad_norm": 13.294049263000488, "learning_rate": 9.312169312169313e-06, "loss": 41.0326, "step": 11299 }, { "epoch": 269.0477611940299, "grad_norm": 12.427556991577148, "learning_rate": 9.311287477954146e-06, "loss": 41.8638, "step": 11300 }, { "epoch": 269.0716417910448, "grad_norm": 10.793148040771484, "learning_rate": 9.310405643738977e-06, "loss": 41.1971, "step": 11301 }, { "epoch": 269.0955223880597, "grad_norm": 8.396902084350586, "learning_rate": 9.30952380952381e-06, "loss": 41.5764, "step": 11302 }, { "epoch": 269.1194029850746, "grad_norm": 6.897744178771973, "learning_rate": 9.308641975308643e-06, "loss": 41.8214, "step": 11303 }, { "epoch": 269.14328358208957, "grad_norm": 6.281709671020508, "learning_rate": 9.307760141093475e-06, "loss": 41.5652, "step": 11304 }, { "epoch": 269.1671641791045, "grad_norm": 7.066766738891602, "learning_rate": 9.306878306878308e-06, "loss": 41.3994, "step": 11305 }, { "epoch": 269.1910447761194, "grad_norm": 7.773789882659912, "learning_rate": 9.30599647266314e-06, "loss": 41.5152, "step": 11306 }, { "epoch": 269.21492537313435, "grad_norm": 8.664027214050293, "learning_rate": 9.305114638447974e-06, "loss": 41.8307, "step": 11307 }, { "epoch": 269.23880597014926, "grad_norm": 9.800107955932617, "learning_rate": 9.304232804232805e-06, "loss": 41.0848, "step": 11308 }, { "epoch": 269.26268656716417, "grad_norm": 11.601771354675293, "learning_rate": 9.303350970017636e-06, "loss": 41.4174, "step": 11309 }, { "epoch": 269.28656716417913, "grad_norm": 10.858922958374023, "learning_rate": 9.30246913580247e-06, "loss": 42.4046, "step": 11310 }, { "epoch": 269.31044776119404, "grad_norm": 8.922061920166016, "learning_rate": 9.301587301587303e-06, "loss": 42.7733, "step": 11311 }, { "epoch": 269.33432835820895, "grad_norm": 7.8710784912109375, "learning_rate": 9.300705467372136e-06, "loss": 42.3915, "step": 11312 }, { "epoch": 269.35820895522386, "grad_norm": 7.012109756469727, "learning_rate": 9.299823633156967e-06, "loss": 42.1597, "step": 11313 }, { "epoch": 269.3820895522388, "grad_norm": 6.8818678855896, "learning_rate": 9.2989417989418e-06, "loss": 41.7593, "step": 11314 }, { "epoch": 269.40597014925373, "grad_norm": 5.948573112487793, "learning_rate": 9.298059964726633e-06, "loss": 41.3521, "step": 11315 }, { "epoch": 269.42985074626864, "grad_norm": 6.146586894989014, "learning_rate": 9.297178130511465e-06, "loss": 41.1138, "step": 11316 }, { "epoch": 269.4537313432836, "grad_norm": 8.106992721557617, "learning_rate": 9.296296296296296e-06, "loss": 42.0936, "step": 11317 }, { "epoch": 269.4776119402985, "grad_norm": 7.75955867767334, "learning_rate": 9.295414462081129e-06, "loss": 43.3844, "step": 11318 }, { "epoch": 269.5014925373134, "grad_norm": 6.064945220947266, "learning_rate": 9.294532627865962e-06, "loss": 41.0685, "step": 11319 }, { "epoch": 269.52537313432833, "grad_norm": 6.63120698928833, "learning_rate": 9.293650793650795e-06, "loss": 42.355, "step": 11320 }, { "epoch": 269.5492537313433, "grad_norm": 6.117424488067627, "learning_rate": 9.292768959435628e-06, "loss": 42.0119, "step": 11321 }, { "epoch": 269.5731343283582, "grad_norm": 6.111680030822754, "learning_rate": 9.29188712522046e-06, "loss": 42.2276, "step": 11322 }, { "epoch": 269.5970149253731, "grad_norm": 6.126545429229736, "learning_rate": 9.291005291005291e-06, "loss": 42.0912, "step": 11323 }, { "epoch": 269.6208955223881, "grad_norm": 6.412890911102295, "learning_rate": 9.290123456790124e-06, "loss": 42.0028, "step": 11324 }, { "epoch": 269.644776119403, "grad_norm": 5.817540645599365, "learning_rate": 9.289241622574957e-06, "loss": 42.2875, "step": 11325 }, { "epoch": 269.6686567164179, "grad_norm": 6.118572235107422, "learning_rate": 9.288359788359788e-06, "loss": 42.5534, "step": 11326 }, { "epoch": 269.6925373134328, "grad_norm": 6.080970287322998, "learning_rate": 9.287477954144621e-06, "loss": 43.6037, "step": 11327 }, { "epoch": 269.7164179104478, "grad_norm": 5.193153381347656, "learning_rate": 9.286596119929454e-06, "loss": 42.6308, "step": 11328 }, { "epoch": 269.7402985074627, "grad_norm": 5.416698932647705, "learning_rate": 9.285714285714288e-06, "loss": 44.5091, "step": 11329 }, { "epoch": 269.7641791044776, "grad_norm": 5.074041843414307, "learning_rate": 9.284832451499119e-06, "loss": 42.7925, "step": 11330 }, { "epoch": 269.78805970149256, "grad_norm": 5.407505989074707, "learning_rate": 9.28395061728395e-06, "loss": 44.0499, "step": 11331 }, { "epoch": 269.81194029850747, "grad_norm": 5.836013317108154, "learning_rate": 9.283068783068783e-06, "loss": 43.2748, "step": 11332 }, { "epoch": 269.8358208955224, "grad_norm": 5.183172225952148, "learning_rate": 9.282186948853616e-06, "loss": 43.8168, "step": 11333 }, { "epoch": 269.85970149253734, "grad_norm": 5.2737345695495605, "learning_rate": 9.28130511463845e-06, "loss": 42.8413, "step": 11334 }, { "epoch": 269.88358208955225, "grad_norm": 5.377220153808594, "learning_rate": 9.280423280423281e-06, "loss": 43.2146, "step": 11335 }, { "epoch": 269.90746268656716, "grad_norm": 5.465785503387451, "learning_rate": 9.279541446208114e-06, "loss": 42.9949, "step": 11336 }, { "epoch": 269.93134328358207, "grad_norm": 5.2123637199401855, "learning_rate": 9.278659611992947e-06, "loss": 43.3199, "step": 11337 }, { "epoch": 269.95522388059703, "grad_norm": 5.227701187133789, "learning_rate": 9.277777777777778e-06, "loss": 41.394, "step": 11338 }, { "epoch": 269.97910447761194, "grad_norm": 4.940923690795898, "learning_rate": 9.27689594356261e-06, "loss": 41.6295, "step": 11339 }, { "epoch": 270.0, "grad_norm": 5.49793004989624, "learning_rate": 9.276014109347443e-06, "loss": 37.226, "step": 11340 }, { "epoch": 270.0, "step": 11340, "total_flos": 5.6264462565033504e+17, "train_loss": 2.9621054977336256, "train_runtime": 25250.6185, "train_samples_per_second": 57.228, "train_steps_per_second": 0.449 }, { "epoch": 270.0238805970149, "grad_norm": 2.2582380771636963, "learning_rate": 1e-05, "loss": 39.9467, "step": 11341 }, { "epoch": 270.0477611940299, "grad_norm": 2.156954050064087, "learning_rate": 9.999178981937603e-06, "loss": 41.1306, "step": 11342 }, { "epoch": 270.0716417910448, "grad_norm": 1.877746820449829, "learning_rate": 9.998357963875206e-06, "loss": 40.4109, "step": 11343 }, { "epoch": 270.0955223880597, "grad_norm": 1.6870440244674683, "learning_rate": 9.997536945812809e-06, "loss": 40.0705, "step": 11344 }, { "epoch": 270.1194029850746, "grad_norm": 1.5905808210372925, "learning_rate": 9.996715927750411e-06, "loss": 38.9129, "step": 11345 }, { "epoch": 270.14328358208957, "grad_norm": NaN, "learning_rate": 9.995894909688014e-06, "loss": 35.5724, "step": 11346 }, { "epoch": 270.1671641791045, "grad_norm": 1.4721838235855103, "learning_rate": 9.995894909688014e-06, "loss": 39.237, "step": 11347 }, { "epoch": 270.1910447761194, "grad_norm": 1.4327396154403687, "learning_rate": 9.995073891625616e-06, "loss": 38.8482, "step": 11348 }, { "epoch": 270.21492537313435, "grad_norm": 1.3669203519821167, "learning_rate": 9.994252873563219e-06, "loss": 40.1917, "step": 11349 }, { "epoch": 270.23880597014926, "grad_norm": 1.4164584875106812, "learning_rate": 9.993431855500822e-06, "loss": 39.8277, "step": 11350 }, { "epoch": 270.26268656716417, "grad_norm": 1.3933510780334473, "learning_rate": 9.992610837438424e-06, "loss": 40.7077, "step": 11351 }, { "epoch": 270.28656716417913, "grad_norm": 1.486974835395813, "learning_rate": 9.991789819376027e-06, "loss": 40.3996, "step": 11352 }, { "epoch": 270.31044776119404, "grad_norm": 1.488309621810913, "learning_rate": 9.99096880131363e-06, "loss": 39.316, "step": 11353 }, { "epoch": 270.33432835820895, "grad_norm": 1.543847918510437, "learning_rate": 9.990147783251232e-06, "loss": 38.638, "step": 11354 }, { "epoch": 270.35820895522386, "grad_norm": 1.49534010887146, "learning_rate": 9.989326765188835e-06, "loss": 39.5232, "step": 11355 }, { "epoch": 270.3820895522388, "grad_norm": 1.4518961906433105, "learning_rate": 9.988505747126437e-06, "loss": 39.4265, "step": 11356 }, { "epoch": 270.40597014925373, "grad_norm": 1.3872349262237549, "learning_rate": 9.98768472906404e-06, "loss": 40.5645, "step": 11357 }, { "epoch": 270.42985074626864, "grad_norm": 1.4110816717147827, "learning_rate": 9.986863711001643e-06, "loss": 38.857, "step": 11358 }, { "epoch": 270.4537313432836, "grad_norm": 1.3822296857833862, "learning_rate": 9.986042692939245e-06, "loss": 40.2424, "step": 11359 }, { "epoch": 270.4776119402985, "grad_norm": 1.3543936014175415, "learning_rate": 9.985221674876848e-06, "loss": 40.601, "step": 11360 }, { "epoch": 270.5014925373134, "grad_norm": 1.3451188802719116, "learning_rate": 9.98440065681445e-06, "loss": 39.6254, "step": 11361 }, { "epoch": 270.52537313432833, "grad_norm": 1.3535270690917969, "learning_rate": 9.983579638752053e-06, "loss": 39.5326, "step": 11362 }, { "epoch": 270.5492537313433, "grad_norm": 1.3799840211868286, "learning_rate": 9.982758620689656e-06, "loss": 38.7527, "step": 11363 }, { "epoch": 270.5731343283582, "grad_norm": 1.3957335948944092, "learning_rate": 9.981937602627258e-06, "loss": 38.9209, "step": 11364 }, { "epoch": 270.5970149253731, "grad_norm": 1.4114607572555542, "learning_rate": 9.981116584564861e-06, "loss": 40.6002, "step": 11365 }, { "epoch": 270.6208955223881, "grad_norm": 1.3602135181427002, "learning_rate": 9.980295566502464e-06, "loss": 40.0615, "step": 11366 }, { "epoch": 270.644776119403, "grad_norm": 1.3935151100158691, "learning_rate": 9.979474548440066e-06, "loss": 39.2374, "step": 11367 }, { "epoch": 270.6686567164179, "grad_norm": 1.3441381454467773, "learning_rate": 9.978653530377669e-06, "loss": 40.6967, "step": 11368 }, { "epoch": 270.6925373134328, "grad_norm": 1.3946877717971802, "learning_rate": 9.977832512315271e-06, "loss": 39.0201, "step": 11369 }, { "epoch": 270.7164179104478, "grad_norm": 1.3401134014129639, "learning_rate": 9.977011494252874e-06, "loss": 39.9738, "step": 11370 }, { "epoch": 270.7402985074627, "grad_norm": 1.3460355997085571, "learning_rate": 9.976190476190477e-06, "loss": 39.8069, "step": 11371 }, { "epoch": 270.7641791044776, "grad_norm": 1.3310937881469727, "learning_rate": 9.97536945812808e-06, "loss": 39.8323, "step": 11372 }, { "epoch": 270.78805970149256, "grad_norm": 1.389211893081665, "learning_rate": 9.974548440065682e-06, "loss": 39.5147, "step": 11373 }, { "epoch": 270.81194029850747, "grad_norm": 1.3449784517288208, "learning_rate": 9.973727422003286e-06, "loss": 40.4358, "step": 11374 }, { "epoch": 270.8358208955224, "grad_norm": 1.3473438024520874, "learning_rate": 9.972906403940887e-06, "loss": 40.1456, "step": 11375 }, { "epoch": 270.85970149253734, "grad_norm": 1.3507215976715088, "learning_rate": 9.972085385878491e-06, "loss": 40.34, "step": 11376 }, { "epoch": 270.88358208955225, "grad_norm": 1.4201089143753052, "learning_rate": 9.971264367816092e-06, "loss": 39.8901, "step": 11377 }, { "epoch": 270.90746268656716, "grad_norm": 1.3668638467788696, "learning_rate": 9.970443349753695e-06, "loss": 40.08, "step": 11378 }, { "epoch": 270.93134328358207, "grad_norm": 1.3822969198226929, "learning_rate": 9.969622331691297e-06, "loss": 39.0003, "step": 11379 }, { "epoch": 270.95522388059703, "grad_norm": 1.3498021364212036, "learning_rate": 9.9688013136289e-06, "loss": 40.7779, "step": 11380 }, { "epoch": 270.97910447761194, "grad_norm": 1.324081301689148, "learning_rate": 9.967980295566503e-06, "loss": 40.0153, "step": 11381 }, { "epoch": 271.0, "grad_norm": 1.3712002038955688, "learning_rate": 9.967159277504105e-06, "loss": 35.1735, "step": 11382 }, { "epoch": 271.0238805970149, "grad_norm": 1.3464804887771606, "learning_rate": 9.96633825944171e-06, "loss": 40.0871, "step": 11383 }, { "epoch": 271.0477611940299, "grad_norm": 1.3546299934387207, "learning_rate": 9.96551724137931e-06, "loss": 38.9851, "step": 11384 }, { "epoch": 271.0716417910448, "grad_norm": 1.347378134727478, "learning_rate": 9.964696223316915e-06, "loss": 38.8195, "step": 11385 }, { "epoch": 271.0955223880597, "grad_norm": 1.3649744987487793, "learning_rate": 9.963875205254516e-06, "loss": 38.9017, "step": 11386 }, { "epoch": 271.1194029850746, "grad_norm": 1.3365916013717651, "learning_rate": 9.96305418719212e-06, "loss": 39.838, "step": 11387 }, { "epoch": 271.14328358208957, "grad_norm": 1.343192458152771, "learning_rate": 9.962233169129721e-06, "loss": 39.7197, "step": 11388 }, { "epoch": 271.1671641791045, "grad_norm": 1.3785459995269775, "learning_rate": 9.961412151067325e-06, "loss": 39.961, "step": 11389 }, { "epoch": 271.1910447761194, "grad_norm": 1.3873153924942017, "learning_rate": 9.960591133004926e-06, "loss": 40.9939, "step": 11390 }, { "epoch": 271.21492537313435, "grad_norm": 1.348963975906372, "learning_rate": 9.959770114942529e-06, "loss": 39.9175, "step": 11391 }, { "epoch": 271.23880597014926, "grad_norm": 1.371946096420288, "learning_rate": 9.958949096880131e-06, "loss": 39.8573, "step": 11392 }, { "epoch": 271.26268656716417, "grad_norm": 1.3823647499084473, "learning_rate": 9.958128078817734e-06, "loss": 39.6953, "step": 11393 }, { "epoch": 271.28656716417913, "grad_norm": 1.3596516847610474, "learning_rate": 9.957307060755338e-06, "loss": 40.6921, "step": 11394 }, { "epoch": 271.31044776119404, "grad_norm": 1.3577091693878174, "learning_rate": 9.95648604269294e-06, "loss": 39.187, "step": 11395 }, { "epoch": 271.33432835820895, "grad_norm": 1.3265928030014038, "learning_rate": 9.955665024630544e-06, "loss": 40.9507, "step": 11396 }, { "epoch": 271.35820895522386, "grad_norm": 1.3636842966079712, "learning_rate": 9.954844006568145e-06, "loss": 39.0207, "step": 11397 }, { "epoch": 271.3820895522388, "grad_norm": 1.36272132396698, "learning_rate": 9.954022988505749e-06, "loss": 38.5517, "step": 11398 }, { "epoch": 271.40597014925373, "grad_norm": 1.3675718307495117, "learning_rate": 9.95320197044335e-06, "loss": 40.4416, "step": 11399 }, { "epoch": 271.42985074626864, "grad_norm": 1.3872342109680176, "learning_rate": 9.952380952380954e-06, "loss": 39.3496, "step": 11400 }, { "epoch": 271.4537313432836, "grad_norm": 1.394783616065979, "learning_rate": 9.951559934318555e-06, "loss": 40.3345, "step": 11401 }, { "epoch": 271.4776119402985, "grad_norm": 1.3891410827636719, "learning_rate": 9.95073891625616e-06, "loss": 38.2147, "step": 11402 }, { "epoch": 271.5014925373134, "grad_norm": 1.3526653051376343, "learning_rate": 9.94991789819376e-06, "loss": 40.2283, "step": 11403 }, { "epoch": 271.52537313432833, "grad_norm": 1.3503080606460571, "learning_rate": 9.949096880131363e-06, "loss": 40.1368, "step": 11404 }, { "epoch": 271.5492537313433, "grad_norm": 1.3795528411865234, "learning_rate": 9.948275862068967e-06, "loss": 39.549, "step": 11405 }, { "epoch": 271.5731343283582, "grad_norm": 1.3139026165008545, "learning_rate": 9.947454844006568e-06, "loss": 39.0888, "step": 11406 }, { "epoch": 271.5970149253731, "grad_norm": 1.3451224565505981, "learning_rate": 9.946633825944172e-06, "loss": 41.1423, "step": 11407 }, { "epoch": 271.6208955223881, "grad_norm": 1.5092791318893433, "learning_rate": 9.945812807881773e-06, "loss": 40.5404, "step": 11408 }, { "epoch": 271.644776119403, "grad_norm": 1.4309519529342651, "learning_rate": 9.944991789819378e-06, "loss": 40.4663, "step": 11409 }, { "epoch": 271.6686567164179, "grad_norm": 1.3244554996490479, "learning_rate": 9.944170771756978e-06, "loss": 39.5538, "step": 11410 }, { "epoch": 271.6925373134328, "grad_norm": 1.3314380645751953, "learning_rate": 9.943349753694583e-06, "loss": 40.5222, "step": 11411 }, { "epoch": 271.7164179104478, "grad_norm": 1.32453453540802, "learning_rate": 9.942528735632184e-06, "loss": 40.3935, "step": 11412 }, { "epoch": 271.7402985074627, "grad_norm": 1.3552045822143555, "learning_rate": 9.941707717569788e-06, "loss": 40.4285, "step": 11413 }, { "epoch": 271.7641791044776, "grad_norm": 1.3180350065231323, "learning_rate": 9.94088669950739e-06, "loss": 38.9323, "step": 11414 }, { "epoch": 271.78805970149256, "grad_norm": 1.3588346242904663, "learning_rate": 9.940065681444993e-06, "loss": 40.895, "step": 11415 }, { "epoch": 271.81194029850747, "grad_norm": 1.346092939376831, "learning_rate": 9.939244663382596e-06, "loss": 39.0624, "step": 11416 }, { "epoch": 271.8358208955224, "grad_norm": 1.3351956605911255, "learning_rate": 9.938423645320198e-06, "loss": 40.7776, "step": 11417 }, { "epoch": 271.85970149253734, "grad_norm": 1.3789234161376953, "learning_rate": 9.937602627257801e-06, "loss": 38.07, "step": 11418 }, { "epoch": 271.88358208955225, "grad_norm": 1.3581385612487793, "learning_rate": 9.936781609195402e-06, "loss": 40.2401, "step": 11419 }, { "epoch": 271.90746268656716, "grad_norm": 1.3527662754058838, "learning_rate": 9.935960591133006e-06, "loss": 39.1883, "step": 11420 }, { "epoch": 271.93134328358207, "grad_norm": 1.325161099433899, "learning_rate": 9.935139573070607e-06, "loss": 40.5752, "step": 11421 }, { "epoch": 271.95522388059703, "grad_norm": 1.3543221950531006, "learning_rate": 9.934318555008212e-06, "loss": 40.2314, "step": 11422 }, { "epoch": 271.97910447761194, "grad_norm": 1.3477526903152466, "learning_rate": 9.933497536945812e-06, "loss": 40.1227, "step": 11423 }, { "epoch": 272.0, "grad_norm": 1.338706612586975, "learning_rate": 9.932676518883417e-06, "loss": 34.9266, "step": 11424 }, { "epoch": 272.0238805970149, "grad_norm": 1.3557696342468262, "learning_rate": 9.93185550082102e-06, "loss": 40.2023, "step": 11425 }, { "epoch": 272.0477611940299, "grad_norm": 1.354399561882019, "learning_rate": 9.931034482758622e-06, "loss": 39.4547, "step": 11426 }, { "epoch": 272.0716417910448, "grad_norm": 1.3481345176696777, "learning_rate": 9.930213464696225e-06, "loss": 39.6242, "step": 11427 }, { "epoch": 272.0955223880597, "grad_norm": 1.3583927154541016, "learning_rate": 9.929392446633827e-06, "loss": 39.6862, "step": 11428 }, { "epoch": 272.1194029850746, "grad_norm": 1.3198394775390625, "learning_rate": 9.92857142857143e-06, "loss": 39.6296, "step": 11429 }, { "epoch": 272.14328358208957, "grad_norm": 1.3469576835632324, "learning_rate": 9.927750410509032e-06, "loss": 40.2131, "step": 11430 }, { "epoch": 272.1671641791045, "grad_norm": 1.387058138847351, "learning_rate": 9.926929392446635e-06, "loss": 39.8859, "step": 11431 }, { "epoch": 272.1910447761194, "grad_norm": NaN, "learning_rate": 9.926108374384236e-06, "loss": 44.3251, "step": 11432 }, { "epoch": 272.21492537313435, "grad_norm": 1.3279821872711182, "learning_rate": 9.926108374384236e-06, "loss": 40.3774, "step": 11433 }, { "epoch": 272.23880597014926, "grad_norm": 1.3658136129379272, "learning_rate": 9.92528735632184e-06, "loss": 39.4612, "step": 11434 }, { "epoch": 272.26268656716417, "grad_norm": 1.3207398653030396, "learning_rate": 9.924466338259443e-06, "loss": 40.4838, "step": 11435 }, { "epoch": 272.28656716417913, "grad_norm": 1.3156050443649292, "learning_rate": 9.923645320197046e-06, "loss": 40.143, "step": 11436 }, { "epoch": 272.31044776119404, "grad_norm": 1.360360026359558, "learning_rate": 9.922824302134648e-06, "loss": 39.8376, "step": 11437 }, { "epoch": 272.33432835820895, "grad_norm": 1.3482239246368408, "learning_rate": 9.92200328407225e-06, "loss": 39.0036, "step": 11438 }, { "epoch": 272.35820895522386, "grad_norm": 1.361928105354309, "learning_rate": 9.921182266009853e-06, "loss": 41.1169, "step": 11439 }, { "epoch": 272.3820895522388, "grad_norm": 1.3539904356002808, "learning_rate": 9.920361247947456e-06, "loss": 39.8099, "step": 11440 }, { "epoch": 272.40597014925373, "grad_norm": 1.3652112483978271, "learning_rate": 9.919540229885059e-06, "loss": 39.0428, "step": 11441 }, { "epoch": 272.42985074626864, "grad_norm": 1.3437588214874268, "learning_rate": 9.918719211822661e-06, "loss": 39.8617, "step": 11442 }, { "epoch": 272.4537313432836, "grad_norm": 1.380807876586914, "learning_rate": 9.917898193760264e-06, "loss": 39.6081, "step": 11443 }, { "epoch": 272.4776119402985, "grad_norm": 1.3435747623443604, "learning_rate": 9.917077175697866e-06, "loss": 40.2943, "step": 11444 }, { "epoch": 272.5014925373134, "grad_norm": 1.3881514072418213, "learning_rate": 9.916256157635469e-06, "loss": 40.6677, "step": 11445 }, { "epoch": 272.52537313432833, "grad_norm": 1.354986310005188, "learning_rate": 9.915435139573072e-06, "loss": 39.6363, "step": 11446 }, { "epoch": 272.5492537313433, "grad_norm": 1.3531990051269531, "learning_rate": 9.914614121510674e-06, "loss": 39.1064, "step": 11447 }, { "epoch": 272.5731343283582, "grad_norm": 1.3716650009155273, "learning_rate": 9.913793103448277e-06, "loss": 39.9586, "step": 11448 }, { "epoch": 272.5970149253731, "grad_norm": 1.4199860095977783, "learning_rate": 9.91297208538588e-06, "loss": 39.4721, "step": 11449 }, { "epoch": 272.6208955223881, "grad_norm": 1.3671207427978516, "learning_rate": 9.912151067323482e-06, "loss": 40.5746, "step": 11450 }, { "epoch": 272.644776119403, "grad_norm": 1.3928481340408325, "learning_rate": 9.911330049261085e-06, "loss": 40.7968, "step": 11451 }, { "epoch": 272.6686567164179, "grad_norm": 1.3343513011932373, "learning_rate": 9.910509031198687e-06, "loss": 40.2671, "step": 11452 }, { "epoch": 272.6925373134328, "grad_norm": 1.3890724182128906, "learning_rate": 9.90968801313629e-06, "loss": 39.9341, "step": 11453 }, { "epoch": 272.7164179104478, "grad_norm": 1.3770949840545654, "learning_rate": 9.908866995073893e-06, "loss": 40.5385, "step": 11454 }, { "epoch": 272.7402985074627, "grad_norm": 1.3699489831924438, "learning_rate": 9.908045977011495e-06, "loss": 40.1904, "step": 11455 }, { "epoch": 272.7641791044776, "grad_norm": 1.3163769245147705, "learning_rate": 9.907224958949098e-06, "loss": 39.089, "step": 11456 }, { "epoch": 272.78805970149256, "grad_norm": 1.3478753566741943, "learning_rate": 9.9064039408867e-06, "loss": 40.2592, "step": 11457 }, { "epoch": 272.81194029850747, "grad_norm": 1.3343896865844727, "learning_rate": 9.905582922824303e-06, "loss": 39.3507, "step": 11458 }, { "epoch": 272.8358208955224, "grad_norm": 1.3388372659683228, "learning_rate": 9.904761904761906e-06, "loss": 40.2437, "step": 11459 }, { "epoch": 272.85970149253734, "grad_norm": 1.3794981241226196, "learning_rate": 9.903940886699508e-06, "loss": 39.3234, "step": 11460 }, { "epoch": 272.88358208955225, "grad_norm": 1.3890377283096313, "learning_rate": 9.90311986863711e-06, "loss": 38.7076, "step": 11461 }, { "epoch": 272.90746268656716, "grad_norm": 1.3496688604354858, "learning_rate": 9.902298850574713e-06, "loss": 39.6921, "step": 11462 }, { "epoch": 272.93134328358207, "grad_norm": 1.3611198663711548, "learning_rate": 9.901477832512316e-06, "loss": 38.5018, "step": 11463 }, { "epoch": 272.95522388059703, "grad_norm": 1.4181770086288452, "learning_rate": 9.900656814449919e-06, "loss": 39.5498, "step": 11464 }, { "epoch": 272.97910447761194, "grad_norm": 1.3540639877319336, "learning_rate": 9.899835796387521e-06, "loss": 39.488, "step": 11465 }, { "epoch": 273.0, "grad_norm": 1.3305574655532837, "learning_rate": 9.899014778325124e-06, "loss": 35.4592, "step": 11466 }, { "epoch": 273.0238805970149, "grad_norm": 1.3683315515518188, "learning_rate": 9.898193760262727e-06, "loss": 40.3062, "step": 11467 }, { "epoch": 273.0477611940299, "grad_norm": 1.3444764614105225, "learning_rate": 9.897372742200329e-06, "loss": 38.3484, "step": 11468 }, { "epoch": 273.0716417910448, "grad_norm": 1.347292423248291, "learning_rate": 9.896551724137932e-06, "loss": 39.2825, "step": 11469 }, { "epoch": 273.0955223880597, "grad_norm": NaN, "learning_rate": 9.895730706075534e-06, "loss": 68.0354, "step": 11470 }, { "epoch": 273.1194029850746, "grad_norm": 1.4353687763214111, "learning_rate": 9.895730706075534e-06, "loss": 39.2691, "step": 11471 }, { "epoch": 273.14328358208957, "grad_norm": 1.3362637758255005, "learning_rate": 9.894909688013137e-06, "loss": 40.6233, "step": 11472 }, { "epoch": 273.1671641791045, "grad_norm": 1.3131297826766968, "learning_rate": 9.89408866995074e-06, "loss": 40.3821, "step": 11473 }, { "epoch": 273.1910447761194, "grad_norm": 1.382434368133545, "learning_rate": 9.893267651888342e-06, "loss": 40.3595, "step": 11474 }, { "epoch": 273.21492537313435, "grad_norm": 1.342218041419983, "learning_rate": 9.892446633825945e-06, "loss": 39.5253, "step": 11475 }, { "epoch": 273.23880597014926, "grad_norm": 1.3150750398635864, "learning_rate": 9.891625615763547e-06, "loss": 41.1932, "step": 11476 }, { "epoch": 273.26268656716417, "grad_norm": 1.3550199270248413, "learning_rate": 9.89080459770115e-06, "loss": 39.7846, "step": 11477 }, { "epoch": 273.28656716417913, "grad_norm": 1.3065959215164185, "learning_rate": 9.889983579638753e-06, "loss": 40.1973, "step": 11478 }, { "epoch": 273.31044776119404, "grad_norm": 1.3835101127624512, "learning_rate": 9.889162561576355e-06, "loss": 40.5183, "step": 11479 }, { "epoch": 273.33432835820895, "grad_norm": 1.4211043119430542, "learning_rate": 9.888341543513958e-06, "loss": 38.54, "step": 11480 }, { "epoch": 273.35820895522386, "grad_norm": 1.3525831699371338, "learning_rate": 9.88752052545156e-06, "loss": 40.0609, "step": 11481 }, { "epoch": 273.3820895522388, "grad_norm": 1.3815395832061768, "learning_rate": 9.886699507389163e-06, "loss": 39.0538, "step": 11482 }, { "epoch": 273.40597014925373, "grad_norm": 1.3275748491287231, "learning_rate": 9.885878489326766e-06, "loss": 40.9561, "step": 11483 }, { "epoch": 273.42985074626864, "grad_norm": 1.3523539304733276, "learning_rate": 9.885057471264368e-06, "loss": 39.8244, "step": 11484 }, { "epoch": 273.4537313432836, "grad_norm": 1.3455837965011597, "learning_rate": 9.884236453201971e-06, "loss": 39.4088, "step": 11485 }, { "epoch": 273.4776119402985, "grad_norm": 1.3159815073013306, "learning_rate": 9.883415435139574e-06, "loss": 39.9533, "step": 11486 }, { "epoch": 273.5014925373134, "grad_norm": 1.3663238286972046, "learning_rate": 9.882594417077178e-06, "loss": 40.6952, "step": 11487 }, { "epoch": 273.52537313432833, "grad_norm": 1.3961787223815918, "learning_rate": 9.881773399014779e-06, "loss": 40.5881, "step": 11488 }, { "epoch": 273.5492537313433, "grad_norm": 1.3764269351959229, "learning_rate": 9.880952380952381e-06, "loss": 39.4769, "step": 11489 }, { "epoch": 273.5731343283582, "grad_norm": 1.3749858140945435, "learning_rate": 9.880131362889984e-06, "loss": 39.7185, "step": 11490 }, { "epoch": 273.5970149253731, "grad_norm": 1.3697513341903687, "learning_rate": 9.879310344827587e-06, "loss": 40.3587, "step": 11491 }, { "epoch": 273.6208955223881, "grad_norm": 1.3643203973770142, "learning_rate": 9.87848932676519e-06, "loss": 40.6902, "step": 11492 }, { "epoch": 273.644776119403, "grad_norm": 1.3774889707565308, "learning_rate": 9.877668308702792e-06, "loss": 39.5932, "step": 11493 }, { "epoch": 273.6686567164179, "grad_norm": 1.3360941410064697, "learning_rate": 9.876847290640394e-06, "loss": 39.4057, "step": 11494 }, { "epoch": 273.6925373134328, "grad_norm": 1.3023254871368408, "learning_rate": 9.876026272577997e-06, "loss": 40.1394, "step": 11495 }, { "epoch": 273.7164179104478, "grad_norm": 1.3619617223739624, "learning_rate": 9.8752052545156e-06, "loss": 39.337, "step": 11496 }, { "epoch": 273.7402985074627, "grad_norm": 1.3319300413131714, "learning_rate": 9.874384236453202e-06, "loss": 38.9605, "step": 11497 }, { "epoch": 273.7641791044776, "grad_norm": 1.3464267253875732, "learning_rate": 9.873563218390807e-06, "loss": 40.2333, "step": 11498 }, { "epoch": 273.78805970149256, "grad_norm": 1.3331403732299805, "learning_rate": 9.872742200328408e-06, "loss": 40.5256, "step": 11499 }, { "epoch": 273.81194029850747, "grad_norm": 1.3356510400772095, "learning_rate": 9.871921182266012e-06, "loss": 40.2901, "step": 11500 }, { "epoch": 273.8358208955224, "grad_norm": 1.3314740657806396, "learning_rate": 9.871100164203613e-06, "loss": 40.6376, "step": 11501 }, { "epoch": 273.85970149253734, "grad_norm": 1.3721628189086914, "learning_rate": 9.870279146141215e-06, "loss": 38.7016, "step": 11502 }, { "epoch": 273.88358208955225, "grad_norm": 1.5134670734405518, "learning_rate": 9.869458128078818e-06, "loss": 38.9832, "step": 11503 }, { "epoch": 273.90746268656716, "grad_norm": 1.334246277809143, "learning_rate": 9.86863711001642e-06, "loss": 39.6318, "step": 11504 }, { "epoch": 273.93134328358207, "grad_norm": 1.3774943351745605, "learning_rate": 9.867816091954023e-06, "loss": 39.7439, "step": 11505 }, { "epoch": 273.95522388059703, "grad_norm": 1.3764276504516602, "learning_rate": 9.866995073891626e-06, "loss": 39.2214, "step": 11506 }, { "epoch": 273.97910447761194, "grad_norm": 1.3664847612380981, "learning_rate": 9.86617405582923e-06, "loss": 38.3916, "step": 11507 }, { "epoch": 274.0, "grad_norm": 1.3682974576950073, "learning_rate": 9.865353037766831e-06, "loss": 35.7319, "step": 11508 }, { "epoch": 274.0238805970149, "grad_norm": 1.344473123550415, "learning_rate": 9.864532019704435e-06, "loss": 40.4695, "step": 11509 }, { "epoch": 274.0477611940299, "grad_norm": 1.462465763092041, "learning_rate": 9.863711001642036e-06, "loss": 40.2105, "step": 11510 }, { "epoch": 274.0716417910448, "grad_norm": 1.325078010559082, "learning_rate": 9.86288998357964e-06, "loss": 39.5781, "step": 11511 }, { "epoch": 274.0955223880597, "grad_norm": 1.3723514080047607, "learning_rate": 9.862068965517241e-06, "loss": 40.2789, "step": 11512 }, { "epoch": 274.1194029850746, "grad_norm": 1.3319478034973145, "learning_rate": 9.861247947454846e-06, "loss": 40.3897, "step": 11513 }, { "epoch": 274.14328358208957, "grad_norm": 1.3659120798110962, "learning_rate": 9.860426929392447e-06, "loss": 40.3287, "step": 11514 }, { "epoch": 274.1671641791045, "grad_norm": 1.3306403160095215, "learning_rate": 9.859605911330051e-06, "loss": 39.8212, "step": 11515 }, { "epoch": 274.1910447761194, "grad_norm": 1.381819248199463, "learning_rate": 9.858784893267652e-06, "loss": 40.0699, "step": 11516 }, { "epoch": 274.21492537313435, "grad_norm": 1.4327735900878906, "learning_rate": 9.857963875205255e-06, "loss": 39.686, "step": 11517 }, { "epoch": 274.23880597014926, "grad_norm": 1.338531732559204, "learning_rate": 9.857142857142859e-06, "loss": 39.9451, "step": 11518 }, { "epoch": 274.26268656716417, "grad_norm": 1.3409231901168823, "learning_rate": 9.85632183908046e-06, "loss": 40.4096, "step": 11519 }, { "epoch": 274.28656716417913, "grad_norm": 1.3235801458358765, "learning_rate": 9.855500821018064e-06, "loss": 39.4983, "step": 11520 }, { "epoch": 274.31044776119404, "grad_norm": 1.3218097686767578, "learning_rate": 9.854679802955665e-06, "loss": 39.6712, "step": 11521 }, { "epoch": 274.33432835820895, "grad_norm": 1.3857669830322266, "learning_rate": 9.85385878489327e-06, "loss": 38.8015, "step": 11522 }, { "epoch": 274.35820895522386, "grad_norm": 1.372125506401062, "learning_rate": 9.85303776683087e-06, "loss": 39.6353, "step": 11523 }, { "epoch": 274.3820895522388, "grad_norm": 1.319035530090332, "learning_rate": 9.852216748768475e-06, "loss": 39.8946, "step": 11524 }, { "epoch": 274.40597014925373, "grad_norm": 1.3371225595474243, "learning_rate": 9.851395730706075e-06, "loss": 39.7481, "step": 11525 }, { "epoch": 274.42985074626864, "grad_norm": 1.3654625415802002, "learning_rate": 9.85057471264368e-06, "loss": 40.5926, "step": 11526 }, { "epoch": 274.4537313432836, "grad_norm": 1.3628865480422974, "learning_rate": 9.84975369458128e-06, "loss": 39.9236, "step": 11527 }, { "epoch": 274.4776119402985, "grad_norm": 1.3776682615280151, "learning_rate": 9.848932676518885e-06, "loss": 39.7369, "step": 11528 }, { "epoch": 274.5014925373134, "grad_norm": 1.3233544826507568, "learning_rate": 9.848111658456488e-06, "loss": 40.3149, "step": 11529 }, { "epoch": 274.52537313432833, "grad_norm": 1.316408634185791, "learning_rate": 9.847290640394089e-06, "loss": 38.6128, "step": 11530 }, { "epoch": 274.5492537313433, "grad_norm": 1.36700439453125, "learning_rate": 9.846469622331693e-06, "loss": 39.4498, "step": 11531 }, { "epoch": 274.5731343283582, "grad_norm": 1.3765718936920166, "learning_rate": 9.845648604269294e-06, "loss": 40.3312, "step": 11532 }, { "epoch": 274.5970149253731, "grad_norm": 1.320173978805542, "learning_rate": 9.844827586206898e-06, "loss": 38.7702, "step": 11533 }, { "epoch": 274.6208955223881, "grad_norm": 1.362824559211731, "learning_rate": 9.844006568144499e-06, "loss": 40.046, "step": 11534 }, { "epoch": 274.644776119403, "grad_norm": 1.3326882123947144, "learning_rate": 9.843185550082103e-06, "loss": 39.0821, "step": 11535 }, { "epoch": 274.6686567164179, "grad_norm": 1.3776286840438843, "learning_rate": 9.842364532019704e-06, "loss": 40.2641, "step": 11536 }, { "epoch": 274.6925373134328, "grad_norm": 1.3461642265319824, "learning_rate": 9.841543513957308e-06, "loss": 39.9777, "step": 11537 }, { "epoch": 274.7164179104478, "grad_norm": 1.370586633682251, "learning_rate": 9.840722495894911e-06, "loss": 38.8936, "step": 11538 }, { "epoch": 274.7402985074627, "grad_norm": 1.392506718635559, "learning_rate": 9.839901477832514e-06, "loss": 40.2227, "step": 11539 }, { "epoch": 274.7641791044776, "grad_norm": 1.4234501123428345, "learning_rate": 9.839080459770116e-06, "loss": 39.209, "step": 11540 }, { "epoch": 274.78805970149256, "grad_norm": 1.3679349422454834, "learning_rate": 9.838259441707719e-06, "loss": 40.2638, "step": 11541 }, { "epoch": 274.81194029850747, "grad_norm": 1.347678780555725, "learning_rate": 9.837438423645322e-06, "loss": 40.3452, "step": 11542 }, { "epoch": 274.8358208955224, "grad_norm": 1.3528029918670654, "learning_rate": 9.836617405582924e-06, "loss": 38.5756, "step": 11543 }, { "epoch": 274.85970149253734, "grad_norm": 1.3705270290374756, "learning_rate": 9.835796387520527e-06, "loss": 39.6535, "step": 11544 }, { "epoch": 274.88358208955225, "grad_norm": 1.3650108575820923, "learning_rate": 9.834975369458128e-06, "loss": 38.8286, "step": 11545 }, { "epoch": 274.90746268656716, "grad_norm": 1.404683232307434, "learning_rate": 9.834154351395732e-06, "loss": 40.4585, "step": 11546 }, { "epoch": 274.93134328358207, "grad_norm": 1.3635413646697998, "learning_rate": 9.833333333333333e-06, "loss": 40.0257, "step": 11547 }, { "epoch": 274.95522388059703, "grad_norm": 1.3415180444717407, "learning_rate": 9.832512315270937e-06, "loss": 40.192, "step": 11548 }, { "epoch": 274.97910447761194, "grad_norm": 1.355381727218628, "learning_rate": 9.83169129720854e-06, "loss": 39.1033, "step": 11549 }, { "epoch": 275.0, "grad_norm": 1.3558181524276733, "learning_rate": 9.830870279146142e-06, "loss": 36.3284, "step": 11550 }, { "epoch": 275.0238805970149, "grad_norm": 1.3797030448913574, "learning_rate": 9.830049261083745e-06, "loss": 39.1154, "step": 11551 }, { "epoch": 275.0477611940299, "grad_norm": 1.3311920166015625, "learning_rate": 9.829228243021348e-06, "loss": 38.567, "step": 11552 }, { "epoch": 275.0716417910448, "grad_norm": 1.4319273233413696, "learning_rate": 9.82840722495895e-06, "loss": 39.9545, "step": 11553 }, { "epoch": 275.0955223880597, "grad_norm": 1.3858566284179688, "learning_rate": 9.827586206896553e-06, "loss": 38.7988, "step": 11554 }, { "epoch": 275.1194029850746, "grad_norm": 1.3661202192306519, "learning_rate": 9.826765188834156e-06, "loss": 39.5316, "step": 11555 }, { "epoch": 275.14328358208957, "grad_norm": 1.37949538230896, "learning_rate": 9.825944170771758e-06, "loss": 40.5463, "step": 11556 }, { "epoch": 275.1671641791045, "grad_norm": 1.3278319835662842, "learning_rate": 9.82512315270936e-06, "loss": 40.6695, "step": 11557 }, { "epoch": 275.1910447761194, "grad_norm": 1.3847211599349976, "learning_rate": 9.824302134646963e-06, "loss": 40.5394, "step": 11558 }, { "epoch": 275.21492537313435, "grad_norm": 1.4148375988006592, "learning_rate": 9.823481116584566e-06, "loss": 39.3012, "step": 11559 }, { "epoch": 275.23880597014926, "grad_norm": 1.3519713878631592, "learning_rate": 9.822660098522169e-06, "loss": 39.1399, "step": 11560 }, { "epoch": 275.26268656716417, "grad_norm": 1.3264535665512085, "learning_rate": 9.821839080459771e-06, "loss": 39.7737, "step": 11561 }, { "epoch": 275.28656716417913, "grad_norm": 1.3501996994018555, "learning_rate": 9.821018062397374e-06, "loss": 39.8691, "step": 11562 }, { "epoch": 275.31044776119404, "grad_norm": 1.3557547330856323, "learning_rate": 9.820197044334976e-06, "loss": 38.5651, "step": 11563 }, { "epoch": 275.33432835820895, "grad_norm": NaN, "learning_rate": 9.819376026272579e-06, "loss": 45.7521, "step": 11564 }, { "epoch": 275.35820895522386, "grad_norm": 1.3535321950912476, "learning_rate": 9.819376026272579e-06, "loss": 40.9501, "step": 11565 }, { "epoch": 275.3820895522388, "grad_norm": 1.3384616374969482, "learning_rate": 9.818555008210182e-06, "loss": 38.1257, "step": 11566 }, { "epoch": 275.40597014925373, "grad_norm": 1.317982792854309, "learning_rate": 9.817733990147784e-06, "loss": 39.6894, "step": 11567 }, { "epoch": 275.42985074626864, "grad_norm": 1.3631418943405151, "learning_rate": 9.816912972085387e-06, "loss": 38.2729, "step": 11568 }, { "epoch": 275.4537313432836, "grad_norm": 1.3180822134017944, "learning_rate": 9.81609195402299e-06, "loss": 39.5682, "step": 11569 }, { "epoch": 275.4776119402985, "grad_norm": 1.41645085811615, "learning_rate": 9.815270935960592e-06, "loss": 38.743, "step": 11570 }, { "epoch": 275.5014925373134, "grad_norm": 1.349867343902588, "learning_rate": 9.814449917898195e-06, "loss": 39.5229, "step": 11571 }, { "epoch": 275.52537313432833, "grad_norm": 1.340968132019043, "learning_rate": 9.813628899835797e-06, "loss": 41.1356, "step": 11572 }, { "epoch": 275.5492537313433, "grad_norm": 1.315514326095581, "learning_rate": 9.8128078817734e-06, "loss": 39.973, "step": 11573 }, { "epoch": 275.5731343283582, "grad_norm": 1.3461905717849731, "learning_rate": 9.811986863711003e-06, "loss": 39.3419, "step": 11574 }, { "epoch": 275.5970149253731, "grad_norm": 1.3017078638076782, "learning_rate": 9.811165845648605e-06, "loss": 39.5278, "step": 11575 }, { "epoch": 275.6208955223881, "grad_norm": 1.3510876893997192, "learning_rate": 9.810344827586208e-06, "loss": 40.0768, "step": 11576 }, { "epoch": 275.644776119403, "grad_norm": 1.3621482849121094, "learning_rate": 9.80952380952381e-06, "loss": 40.3365, "step": 11577 }, { "epoch": 275.6686567164179, "grad_norm": 1.3430763483047485, "learning_rate": 9.808702791461413e-06, "loss": 38.6227, "step": 11578 }, { "epoch": 275.6925373134328, "grad_norm": 1.3810365200042725, "learning_rate": 9.807881773399016e-06, "loss": 40.8713, "step": 11579 }, { "epoch": 275.7164179104478, "grad_norm": 1.3274611234664917, "learning_rate": 9.807060755336618e-06, "loss": 39.7823, "step": 11580 }, { "epoch": 275.7402985074627, "grad_norm": 1.3653665781021118, "learning_rate": 9.80623973727422e-06, "loss": 40.0611, "step": 11581 }, { "epoch": 275.7641791044776, "grad_norm": 1.3306093215942383, "learning_rate": 9.805418719211823e-06, "loss": 40.6952, "step": 11582 }, { "epoch": 275.78805970149256, "grad_norm": 1.32741379737854, "learning_rate": 9.804597701149426e-06, "loss": 39.9457, "step": 11583 }, { "epoch": 275.81194029850747, "grad_norm": 1.3246517181396484, "learning_rate": 9.803776683087029e-06, "loss": 39.7621, "step": 11584 }, { "epoch": 275.8358208955224, "grad_norm": 1.3413844108581543, "learning_rate": 9.802955665024631e-06, "loss": 40.5518, "step": 11585 }, { "epoch": 275.85970149253734, "grad_norm": 1.333431601524353, "learning_rate": 9.802134646962234e-06, "loss": 41.0542, "step": 11586 }, { "epoch": 275.88358208955225, "grad_norm": 1.33649480342865, "learning_rate": 9.801313628899837e-06, "loss": 40.9428, "step": 11587 }, { "epoch": 275.90746268656716, "grad_norm": 1.4069945812225342, "learning_rate": 9.800492610837439e-06, "loss": 39.9321, "step": 11588 }, { "epoch": 275.93134328358207, "grad_norm": 1.3309872150421143, "learning_rate": 9.799671592775042e-06, "loss": 40.729, "step": 11589 }, { "epoch": 275.95522388059703, "grad_norm": 1.3662846088409424, "learning_rate": 9.798850574712644e-06, "loss": 39.9154, "step": 11590 }, { "epoch": 275.97910447761194, "grad_norm": 1.3414400815963745, "learning_rate": 9.798029556650247e-06, "loss": 38.8845, "step": 11591 }, { "epoch": 276.0, "grad_norm": 1.3233407735824585, "learning_rate": 9.79720853858785e-06, "loss": 35.1131, "step": 11592 }, { "epoch": 276.0238805970149, "grad_norm": 1.3331599235534668, "learning_rate": 9.796387520525452e-06, "loss": 39.369, "step": 11593 }, { "epoch": 276.0477611940299, "grad_norm": 1.3787893056869507, "learning_rate": 9.795566502463055e-06, "loss": 38.449, "step": 11594 }, { "epoch": 276.0716417910448, "grad_norm": 1.3106399774551392, "learning_rate": 9.794745484400657e-06, "loss": 40.4221, "step": 11595 }, { "epoch": 276.0955223880597, "grad_norm": 1.354848027229309, "learning_rate": 9.79392446633826e-06, "loss": 37.9631, "step": 11596 }, { "epoch": 276.1194029850746, "grad_norm": 1.356934666633606, "learning_rate": 9.793103448275863e-06, "loss": 40.0908, "step": 11597 }, { "epoch": 276.14328358208957, "grad_norm": 1.2960082292556763, "learning_rate": 9.792282430213465e-06, "loss": 39.7205, "step": 11598 }, { "epoch": 276.1671641791045, "grad_norm": 1.3476006984710693, "learning_rate": 9.791461412151068e-06, "loss": 39.4874, "step": 11599 }, { "epoch": 276.1910447761194, "grad_norm": 1.3129602670669556, "learning_rate": 9.79064039408867e-06, "loss": 40.7432, "step": 11600 }, { "epoch": 276.21492537313435, "grad_norm": 1.360498070716858, "learning_rate": 9.789819376026273e-06, "loss": 40.4017, "step": 11601 }, { "epoch": 276.23880597014926, "grad_norm": 1.339138388633728, "learning_rate": 9.788998357963876e-06, "loss": 40.9112, "step": 11602 }, { "epoch": 276.26268656716417, "grad_norm": 1.360205888748169, "learning_rate": 9.788177339901478e-06, "loss": 39.0968, "step": 11603 }, { "epoch": 276.28656716417913, "grad_norm": 1.3664600849151611, "learning_rate": 9.787356321839081e-06, "loss": 39.1289, "step": 11604 }, { "epoch": 276.31044776119404, "grad_norm": 1.3917111158370972, "learning_rate": 9.786535303776684e-06, "loss": 39.5174, "step": 11605 }, { "epoch": 276.33432835820895, "grad_norm": 1.3233826160430908, "learning_rate": 9.785714285714286e-06, "loss": 40.9184, "step": 11606 }, { "epoch": 276.35820895522386, "grad_norm": 1.4078781604766846, "learning_rate": 9.784893267651889e-06, "loss": 39.2551, "step": 11607 }, { "epoch": 276.3820895522388, "grad_norm": 1.3450231552124023, "learning_rate": 9.784072249589491e-06, "loss": 41.0977, "step": 11608 }, { "epoch": 276.40597014925373, "grad_norm": 1.3607983589172363, "learning_rate": 9.783251231527094e-06, "loss": 39.6859, "step": 11609 }, { "epoch": 276.42985074626864, "grad_norm": 1.353265404701233, "learning_rate": 9.782430213464698e-06, "loss": 39.3888, "step": 11610 }, { "epoch": 276.4537313432836, "grad_norm": 1.3483481407165527, "learning_rate": 9.7816091954023e-06, "loss": 40.331, "step": 11611 }, { "epoch": 276.4776119402985, "grad_norm": 1.336894154548645, "learning_rate": 9.780788177339904e-06, "loss": 38.5016, "step": 11612 }, { "epoch": 276.5014925373134, "grad_norm": 1.3806915283203125, "learning_rate": 9.779967159277504e-06, "loss": 40.4124, "step": 11613 }, { "epoch": 276.52537313432833, "grad_norm": 1.3453514575958252, "learning_rate": 9.779146141215107e-06, "loss": 39.8298, "step": 11614 }, { "epoch": 276.5492537313433, "grad_norm": 1.3483303785324097, "learning_rate": 9.77832512315271e-06, "loss": 40.756, "step": 11615 }, { "epoch": 276.5731343283582, "grad_norm": 1.331068754196167, "learning_rate": 9.777504105090312e-06, "loss": 40.647, "step": 11616 }, { "epoch": 276.5970149253731, "grad_norm": 1.3093875646591187, "learning_rate": 9.776683087027915e-06, "loss": 40.9602, "step": 11617 }, { "epoch": 276.6208955223881, "grad_norm": 1.5198326110839844, "learning_rate": 9.775862068965518e-06, "loss": 38.0091, "step": 11618 }, { "epoch": 276.644776119403, "grad_norm": 1.2934813499450684, "learning_rate": 9.77504105090312e-06, "loss": 40.5637, "step": 11619 }, { "epoch": 276.6686567164179, "grad_norm": 1.3661613464355469, "learning_rate": 9.774220032840723e-06, "loss": 39.2915, "step": 11620 }, { "epoch": 276.6925373134328, "grad_norm": 1.360903263092041, "learning_rate": 9.773399014778327e-06, "loss": 38.7833, "step": 11621 }, { "epoch": 276.7164179104478, "grad_norm": 1.372452974319458, "learning_rate": 9.772577996715928e-06, "loss": 41.0226, "step": 11622 }, { "epoch": 276.7402985074627, "grad_norm": 1.3259432315826416, "learning_rate": 9.771756978653532e-06, "loss": 40.4512, "step": 11623 }, { "epoch": 276.7641791044776, "grad_norm": 1.3579132556915283, "learning_rate": 9.770935960591133e-06, "loss": 38.3991, "step": 11624 }, { "epoch": 276.78805970149256, "grad_norm": 1.407126545906067, "learning_rate": 9.770114942528738e-06, "loss": 40.0239, "step": 11625 }, { "epoch": 276.81194029850747, "grad_norm": 1.3101906776428223, "learning_rate": 9.769293924466338e-06, "loss": 39.2311, "step": 11626 }, { "epoch": 276.8358208955224, "grad_norm": 1.339065432548523, "learning_rate": 9.768472906403941e-06, "loss": 41.0064, "step": 11627 }, { "epoch": 276.85970149253734, "grad_norm": 1.3791290521621704, "learning_rate": 9.767651888341544e-06, "loss": 39.0704, "step": 11628 }, { "epoch": 276.88358208955225, "grad_norm": 1.3812305927276611, "learning_rate": 9.766830870279146e-06, "loss": 40.0349, "step": 11629 }, { "epoch": 276.90746268656716, "grad_norm": 1.3833733797073364, "learning_rate": 9.766009852216749e-06, "loss": 40.3702, "step": 11630 }, { "epoch": 276.93134328358207, "grad_norm": 1.3461819887161255, "learning_rate": 9.765188834154351e-06, "loss": 40.303, "step": 11631 }, { "epoch": 276.95522388059703, "grad_norm": 1.3289510011672974, "learning_rate": 9.764367816091956e-06, "loss": 38.3763, "step": 11632 }, { "epoch": 276.97910447761194, "grad_norm": 1.3302462100982666, "learning_rate": 9.763546798029557e-06, "loss": 40.1835, "step": 11633 }, { "epoch": 277.0, "grad_norm": 1.3437044620513916, "learning_rate": 9.762725779967161e-06, "loss": 34.297, "step": 11634 }, { "epoch": 277.0238805970149, "grad_norm": 1.3625767230987549, "learning_rate": 9.761904761904762e-06, "loss": 40.01, "step": 11635 }, { "epoch": 277.0477611940299, "grad_norm": 1.3469363451004028, "learning_rate": 9.761083743842366e-06, "loss": 40.1554, "step": 11636 }, { "epoch": 277.0716417910448, "grad_norm": 1.3263814449310303, "learning_rate": 9.760262725779967e-06, "loss": 39.216, "step": 11637 }, { "epoch": 277.0955223880597, "grad_norm": 1.4002785682678223, "learning_rate": 9.759441707717571e-06, "loss": 38.7878, "step": 11638 }, { "epoch": 277.1194029850746, "grad_norm": 1.3299949169158936, "learning_rate": 9.758620689655172e-06, "loss": 39.366, "step": 11639 }, { "epoch": 277.14328358208957, "grad_norm": 1.3796664476394653, "learning_rate": 9.757799671592777e-06, "loss": 40.0343, "step": 11640 }, { "epoch": 277.1671641791045, "grad_norm": 1.338396430015564, "learning_rate": 9.75697865353038e-06, "loss": 40.4448, "step": 11641 }, { "epoch": 277.1910447761194, "grad_norm": 1.7591688632965088, "learning_rate": 9.75615763546798e-06, "loss": 38.5019, "step": 11642 }, { "epoch": 277.21492537313435, "grad_norm": 1.3313260078430176, "learning_rate": 9.755336617405585e-06, "loss": 38.9699, "step": 11643 }, { "epoch": 277.23880597014926, "grad_norm": 1.3336726427078247, "learning_rate": 9.754515599343185e-06, "loss": 39.6001, "step": 11644 }, { "epoch": 277.26268656716417, "grad_norm": 1.3575516939163208, "learning_rate": 9.75369458128079e-06, "loss": 40.6891, "step": 11645 }, { "epoch": 277.28656716417913, "grad_norm": 1.385605812072754, "learning_rate": 9.75287356321839e-06, "loss": 40.1691, "step": 11646 }, { "epoch": 277.31044776119404, "grad_norm": 1.3508843183517456, "learning_rate": 9.752052545155995e-06, "loss": 38.9363, "step": 11647 }, { "epoch": 277.33432835820895, "grad_norm": 1.3967753648757935, "learning_rate": 9.751231527093596e-06, "loss": 39.0644, "step": 11648 }, { "epoch": 277.35820895522386, "grad_norm": 1.378440499305725, "learning_rate": 9.7504105090312e-06, "loss": 39.9505, "step": 11649 }, { "epoch": 277.3820895522388, "grad_norm": 1.3923918008804321, "learning_rate": 9.749589490968801e-06, "loss": 40.459, "step": 11650 }, { "epoch": 277.40597014925373, "grad_norm": 1.3399168252944946, "learning_rate": 9.748768472906405e-06, "loss": 39.8737, "step": 11651 }, { "epoch": 277.42985074626864, "grad_norm": 1.383942723274231, "learning_rate": 9.747947454844008e-06, "loss": 40.4263, "step": 11652 }, { "epoch": 277.4537313432836, "grad_norm": 1.350550651550293, "learning_rate": 9.74712643678161e-06, "loss": 39.5517, "step": 11653 }, { "epoch": 277.4776119402985, "grad_norm": 1.3841460943222046, "learning_rate": 9.746305418719213e-06, "loss": 40.2678, "step": 11654 }, { "epoch": 277.5014925373134, "grad_norm": 1.3291102647781372, "learning_rate": 9.745484400656814e-06, "loss": 39.3504, "step": 11655 }, { "epoch": 277.52537313432833, "grad_norm": 1.3733913898468018, "learning_rate": 9.744663382594419e-06, "loss": 39.5769, "step": 11656 }, { "epoch": 277.5492537313433, "grad_norm": 1.3393101692199707, "learning_rate": 9.74384236453202e-06, "loss": 40.8507, "step": 11657 }, { "epoch": 277.5731343283582, "grad_norm": 1.3489649295806885, "learning_rate": 9.743021346469624e-06, "loss": 39.2453, "step": 11658 }, { "epoch": 277.5970149253731, "grad_norm": 1.3450591564178467, "learning_rate": 9.742200328407225e-06, "loss": 38.9515, "step": 11659 }, { "epoch": 277.6208955223881, "grad_norm": 1.3487718105316162, "learning_rate": 9.741379310344829e-06, "loss": 40.3647, "step": 11660 }, { "epoch": 277.644776119403, "grad_norm": 1.3576897382736206, "learning_rate": 9.740558292282432e-06, "loss": 39.6214, "step": 11661 }, { "epoch": 277.6686567164179, "grad_norm": 1.3763697147369385, "learning_rate": 9.739737274220034e-06, "loss": 39.7085, "step": 11662 }, { "epoch": 277.6925373134328, "grad_norm": 1.4239908456802368, "learning_rate": 9.738916256157637e-06, "loss": 38.7127, "step": 11663 }, { "epoch": 277.7164179104478, "grad_norm": 1.3776895999908447, "learning_rate": 9.73809523809524e-06, "loss": 40.683, "step": 11664 }, { "epoch": 277.7402985074627, "grad_norm": 1.3789119720458984, "learning_rate": 9.737274220032842e-06, "loss": 39.3626, "step": 11665 }, { "epoch": 277.7641791044776, "grad_norm": 1.3817105293273926, "learning_rate": 9.736453201970445e-06, "loss": 39.5767, "step": 11666 }, { "epoch": 277.78805970149256, "grad_norm": 1.3462010622024536, "learning_rate": 9.735632183908047e-06, "loss": 39.84, "step": 11667 }, { "epoch": 277.81194029850747, "grad_norm": 1.38581120967865, "learning_rate": 9.734811165845648e-06, "loss": 40.4681, "step": 11668 }, { "epoch": 277.8358208955224, "grad_norm": 1.3573493957519531, "learning_rate": 9.733990147783252e-06, "loss": 41.1285, "step": 11669 }, { "epoch": 277.85970149253734, "grad_norm": 1.34071683883667, "learning_rate": 9.733169129720853e-06, "loss": 40.3347, "step": 11670 }, { "epoch": 277.88358208955225, "grad_norm": 1.3129396438598633, "learning_rate": 9.732348111658458e-06, "loss": 39.6487, "step": 11671 }, { "epoch": 277.90746268656716, "grad_norm": 1.3167649507522583, "learning_rate": 9.73152709359606e-06, "loss": 39.7368, "step": 11672 }, { "epoch": 277.93134328358207, "grad_norm": 1.3468724489212036, "learning_rate": 9.730706075533663e-06, "loss": 40.484, "step": 11673 }, { "epoch": 277.95522388059703, "grad_norm": 1.3059656620025635, "learning_rate": 9.729885057471266e-06, "loss": 39.9374, "step": 11674 }, { "epoch": 277.97910447761194, "grad_norm": 1.309106469154358, "learning_rate": 9.729064039408868e-06, "loss": 40.1204, "step": 11675 }, { "epoch": 278.0, "grad_norm": 1.3557480573654175, "learning_rate": 9.72824302134647e-06, "loss": 33.956, "step": 11676 }, { "epoch": 278.0238805970149, "grad_norm": 1.4195126295089722, "learning_rate": 9.727422003284073e-06, "loss": 40.4538, "step": 11677 }, { "epoch": 278.0477611940299, "grad_norm": 1.3410896062850952, "learning_rate": 9.726600985221676e-06, "loss": 40.0618, "step": 11678 }, { "epoch": 278.0716417910448, "grad_norm": 1.3140747547149658, "learning_rate": 9.725779967159279e-06, "loss": 40.9335, "step": 11679 }, { "epoch": 278.0955223880597, "grad_norm": 1.3329393863677979, "learning_rate": 9.724958949096881e-06, "loss": 40.4365, "step": 11680 }, { "epoch": 278.1194029850746, "grad_norm": 1.3449944257736206, "learning_rate": 9.724137931034484e-06, "loss": 41.2457, "step": 11681 }, { "epoch": 278.14328358208957, "grad_norm": 1.356674313545227, "learning_rate": 9.723316912972086e-06, "loss": 37.6034, "step": 11682 }, { "epoch": 278.1671641791045, "grad_norm": 1.3560938835144043, "learning_rate": 9.722495894909689e-06, "loss": 40.1485, "step": 11683 }, { "epoch": 278.1910447761194, "grad_norm": 1.367944359779358, "learning_rate": 9.721674876847292e-06, "loss": 39.27, "step": 11684 }, { "epoch": 278.21492537313435, "grad_norm": 1.3749209642410278, "learning_rate": 9.720853858784894e-06, "loss": 40.6461, "step": 11685 }, { "epoch": 278.23880597014926, "grad_norm": 1.3278100490570068, "learning_rate": 9.720032840722497e-06, "loss": 39.9005, "step": 11686 }, { "epoch": 278.26268656716417, "grad_norm": 1.3942480087280273, "learning_rate": 9.7192118226601e-06, "loss": 39.4353, "step": 11687 }, { "epoch": 278.28656716417913, "grad_norm": NaN, "learning_rate": 9.718390804597702e-06, "loss": 34.6306, "step": 11688 }, { "epoch": 278.31044776119404, "grad_norm": 1.2947641611099243, "learning_rate": 9.718390804597702e-06, "loss": 39.7259, "step": 11689 }, { "epoch": 278.33432835820895, "grad_norm": NaN, "learning_rate": 9.717569786535305e-06, "loss": 55.7191, "step": 11690 }, { "epoch": 278.35820895522386, "grad_norm": 1.3165181875228882, "learning_rate": 9.717569786535305e-06, "loss": 40.2017, "step": 11691 }, { "epoch": 278.3820895522388, "grad_norm": 1.3223551511764526, "learning_rate": 9.716748768472907e-06, "loss": 40.4751, "step": 11692 }, { "epoch": 278.40597014925373, "grad_norm": 1.3297650814056396, "learning_rate": 9.71592775041051e-06, "loss": 40.1078, "step": 11693 }, { "epoch": 278.42985074626864, "grad_norm": 1.404539942741394, "learning_rate": 9.715106732348113e-06, "loss": 38.9561, "step": 11694 }, { "epoch": 278.4537313432836, "grad_norm": 1.3677030801773071, "learning_rate": 9.714285714285715e-06, "loss": 38.7873, "step": 11695 }, { "epoch": 278.4776119402985, "grad_norm": 1.3696281909942627, "learning_rate": 9.713464696223318e-06, "loss": 40.0667, "step": 11696 }, { "epoch": 278.5014925373134, "grad_norm": 1.3446904420852661, "learning_rate": 9.71264367816092e-06, "loss": 40.2341, "step": 11697 }, { "epoch": 278.52537313432833, "grad_norm": 1.3593041896820068, "learning_rate": 9.711822660098523e-06, "loss": 39.3956, "step": 11698 }, { "epoch": 278.5492537313433, "grad_norm": 1.3542275428771973, "learning_rate": 9.711001642036126e-06, "loss": 38.0366, "step": 11699 }, { "epoch": 278.5731343283582, "grad_norm": 1.3503575325012207, "learning_rate": 9.710180623973728e-06, "loss": 40.6686, "step": 11700 }, { "epoch": 278.5970149253731, "grad_norm": 1.3333566188812256, "learning_rate": 9.709359605911331e-06, "loss": 39.5821, "step": 11701 }, { "epoch": 278.6208955223881, "grad_norm": 1.3420039415359497, "learning_rate": 9.708538587848933e-06, "loss": 39.3282, "step": 11702 }, { "epoch": 278.644776119403, "grad_norm": 1.316603422164917, "learning_rate": 9.707717569786536e-06, "loss": 40.5579, "step": 11703 }, { "epoch": 278.6686567164179, "grad_norm": 1.367704153060913, "learning_rate": 9.706896551724139e-06, "loss": 40.5864, "step": 11704 }, { "epoch": 278.6925373134328, "grad_norm": 1.3511896133422852, "learning_rate": 9.706075533661741e-06, "loss": 39.3801, "step": 11705 }, { "epoch": 278.7164179104478, "grad_norm": 1.3597991466522217, "learning_rate": 9.705254515599344e-06, "loss": 40.1995, "step": 11706 }, { "epoch": 278.7402985074627, "grad_norm": 1.3694653511047363, "learning_rate": 9.704433497536947e-06, "loss": 37.8542, "step": 11707 }, { "epoch": 278.7641791044776, "grad_norm": 1.3622801303863525, "learning_rate": 9.703612479474549e-06, "loss": 39.3383, "step": 11708 }, { "epoch": 278.78805970149256, "grad_norm": 1.3569239377975464, "learning_rate": 9.702791461412152e-06, "loss": 40.5579, "step": 11709 }, { "epoch": 278.81194029850747, "grad_norm": 1.38229501247406, "learning_rate": 9.701970443349754e-06, "loss": 39.8366, "step": 11710 }, { "epoch": 278.8358208955224, "grad_norm": 1.335414171218872, "learning_rate": 9.701149425287357e-06, "loss": 39.8925, "step": 11711 }, { "epoch": 278.85970149253734, "grad_norm": 1.4135106801986694, "learning_rate": 9.70032840722496e-06, "loss": 39.8226, "step": 11712 }, { "epoch": 278.88358208955225, "grad_norm": 1.3445255756378174, "learning_rate": 9.699507389162562e-06, "loss": 39.7422, "step": 11713 }, { "epoch": 278.90746268656716, "grad_norm": 1.373277187347412, "learning_rate": 9.698686371100165e-06, "loss": 38.7098, "step": 11714 }, { "epoch": 278.93134328358207, "grad_norm": 1.317012071609497, "learning_rate": 9.697865353037767e-06, "loss": 39.5745, "step": 11715 }, { "epoch": 278.95522388059703, "grad_norm": 1.3623042106628418, "learning_rate": 9.69704433497537e-06, "loss": 39.7018, "step": 11716 }, { "epoch": 278.97910447761194, "grad_norm": 1.385007619857788, "learning_rate": 9.696223316912973e-06, "loss": 40.0628, "step": 11717 }, { "epoch": 279.0, "grad_norm": 1.3600530624389648, "learning_rate": 9.695402298850575e-06, "loss": 34.3755, "step": 11718 }, { "epoch": 279.0238805970149, "grad_norm": 1.3523074388504028, "learning_rate": 9.694581280788178e-06, "loss": 39.5053, "step": 11719 }, { "epoch": 279.0477611940299, "grad_norm": 1.343103051185608, "learning_rate": 9.69376026272578e-06, "loss": 39.4008, "step": 11720 }, { "epoch": 279.0716417910448, "grad_norm": 1.3498467206954956, "learning_rate": 9.692939244663383e-06, "loss": 40.1401, "step": 11721 }, { "epoch": 279.0955223880597, "grad_norm": 1.3027307987213135, "learning_rate": 9.692118226600986e-06, "loss": 39.188, "step": 11722 }, { "epoch": 279.1194029850746, "grad_norm": 1.4043124914169312, "learning_rate": 9.691297208538588e-06, "loss": 40.14, "step": 11723 }, { "epoch": 279.14328358208957, "grad_norm": 1.3594958782196045, "learning_rate": 9.690476190476191e-06, "loss": 38.2481, "step": 11724 }, { "epoch": 279.1671641791045, "grad_norm": 1.367806077003479, "learning_rate": 9.689655172413794e-06, "loss": 38.4196, "step": 11725 }, { "epoch": 279.1910447761194, "grad_norm": 1.3454105854034424, "learning_rate": 9.688834154351396e-06, "loss": 40.3694, "step": 11726 }, { "epoch": 279.21492537313435, "grad_norm": 1.3665401935577393, "learning_rate": 9.688013136288999e-06, "loss": 39.1504, "step": 11727 }, { "epoch": 279.23880597014926, "grad_norm": 1.4170808792114258, "learning_rate": 9.687192118226601e-06, "loss": 39.3193, "step": 11728 }, { "epoch": 279.26268656716417, "grad_norm": 1.3672001361846924, "learning_rate": 9.686371100164204e-06, "loss": 41.3121, "step": 11729 }, { "epoch": 279.28656716417913, "grad_norm": 1.3567603826522827, "learning_rate": 9.685550082101807e-06, "loss": 39.5761, "step": 11730 }, { "epoch": 279.31044776119404, "grad_norm": 1.3821710348129272, "learning_rate": 9.68472906403941e-06, "loss": 40.2029, "step": 11731 }, { "epoch": 279.33432835820895, "grad_norm": 1.39670991897583, "learning_rate": 9.683908045977012e-06, "loss": 38.528, "step": 11732 }, { "epoch": 279.35820895522386, "grad_norm": 1.2990940809249878, "learning_rate": 9.683087027914614e-06, "loss": 37.5465, "step": 11733 }, { "epoch": 279.3820895522388, "grad_norm": 1.3748326301574707, "learning_rate": 9.682266009852219e-06, "loss": 39.92, "step": 11734 }, { "epoch": 279.40597014925373, "grad_norm": 1.3493742942810059, "learning_rate": 9.68144499178982e-06, "loss": 39.4079, "step": 11735 }, { "epoch": 279.42985074626864, "grad_norm": 1.322428584098816, "learning_rate": 9.680623973727424e-06, "loss": 40.1684, "step": 11736 }, { "epoch": 279.4537313432836, "grad_norm": 1.3710432052612305, "learning_rate": 9.679802955665025e-06, "loss": 38.8359, "step": 11737 }, { "epoch": 279.4776119402985, "grad_norm": 1.3573514223098755, "learning_rate": 9.67898193760263e-06, "loss": 40.1631, "step": 11738 }, { "epoch": 279.5014925373134, "grad_norm": 1.364327073097229, "learning_rate": 9.67816091954023e-06, "loss": 39.9625, "step": 11739 }, { "epoch": 279.52537313432833, "grad_norm": 1.3695443868637085, "learning_rate": 9.677339901477833e-06, "loss": 39.276, "step": 11740 }, { "epoch": 279.5492537313433, "grad_norm": 1.3574298620224, "learning_rate": 9.676518883415435e-06, "loss": 39.914, "step": 11741 }, { "epoch": 279.5731343283582, "grad_norm": 1.3801554441452026, "learning_rate": 9.675697865353038e-06, "loss": 40.4483, "step": 11742 }, { "epoch": 279.5970149253731, "grad_norm": 1.34360671043396, "learning_rate": 9.67487684729064e-06, "loss": 39.6304, "step": 11743 }, { "epoch": 279.6208955223881, "grad_norm": 1.328654408454895, "learning_rate": 9.674055829228243e-06, "loss": 39.9369, "step": 11744 }, { "epoch": 279.644776119403, "grad_norm": 1.4023579359054565, "learning_rate": 9.673234811165848e-06, "loss": 40.578, "step": 11745 }, { "epoch": 279.6686567164179, "grad_norm": 1.369316816329956, "learning_rate": 9.672413793103448e-06, "loss": 40.0573, "step": 11746 }, { "epoch": 279.6925373134328, "grad_norm": 1.3656753301620483, "learning_rate": 9.671592775041053e-06, "loss": 40.0835, "step": 11747 }, { "epoch": 279.7164179104478, "grad_norm": 1.374408483505249, "learning_rate": 9.670771756978654e-06, "loss": 39.7513, "step": 11748 }, { "epoch": 279.7402985074627, "grad_norm": 1.3652024269104004, "learning_rate": 9.669950738916258e-06, "loss": 40.7837, "step": 11749 }, { "epoch": 279.7641791044776, "grad_norm": 1.3292500972747803, "learning_rate": 9.669129720853859e-06, "loss": 41.2348, "step": 11750 }, { "epoch": 279.78805970149256, "grad_norm": 1.3645533323287964, "learning_rate": 9.668308702791463e-06, "loss": 40.3109, "step": 11751 }, { "epoch": 279.81194029850747, "grad_norm": 1.3830903768539429, "learning_rate": 9.667487684729064e-06, "loss": 39.5796, "step": 11752 }, { "epoch": 279.8358208955224, "grad_norm": 1.2952982187271118, "learning_rate": 9.666666666666667e-06, "loss": 40.2291, "step": 11753 }, { "epoch": 279.85970149253734, "grad_norm": 1.3664523363113403, "learning_rate": 9.66584564860427e-06, "loss": 39.955, "step": 11754 }, { "epoch": 279.88358208955225, "grad_norm": 1.3321058750152588, "learning_rate": 9.665024630541872e-06, "loss": 39.5515, "step": 11755 }, { "epoch": 279.90746268656716, "grad_norm": 1.3413417339324951, "learning_rate": 9.664203612479476e-06, "loss": 39.8464, "step": 11756 }, { "epoch": 279.93134328358207, "grad_norm": 1.4030283689498901, "learning_rate": 9.663382594417077e-06, "loss": 39.8173, "step": 11757 }, { "epoch": 279.95522388059703, "grad_norm": 1.3264789581298828, "learning_rate": 9.662561576354681e-06, "loss": 41.3448, "step": 11758 }, { "epoch": 279.97910447761194, "grad_norm": 1.3557759523391724, "learning_rate": 9.661740558292282e-06, "loss": 40.0458, "step": 11759 }, { "epoch": 280.0, "grad_norm": 1.343653678894043, "learning_rate": 9.660919540229887e-06, "loss": 34.4738, "step": 11760 }, { "epoch": 280.0238805970149, "grad_norm": 1.3720611333847046, "learning_rate": 9.660098522167488e-06, "loss": 40.7389, "step": 11761 }, { "epoch": 280.0477611940299, "grad_norm": 1.3643609285354614, "learning_rate": 9.659277504105092e-06, "loss": 40.2394, "step": 11762 }, { "epoch": 280.0716417910448, "grad_norm": 1.328127384185791, "learning_rate": 9.658456486042693e-06, "loss": 39.8801, "step": 11763 }, { "epoch": 280.0955223880597, "grad_norm": 1.3566992282867432, "learning_rate": 9.657635467980297e-06, "loss": 40.7611, "step": 11764 }, { "epoch": 280.1194029850746, "grad_norm": 1.3575495481491089, "learning_rate": 9.6568144499179e-06, "loss": 39.3506, "step": 11765 }, { "epoch": 280.14328358208957, "grad_norm": 1.3816113471984863, "learning_rate": 9.6559934318555e-06, "loss": 40.1278, "step": 11766 }, { "epoch": 280.1671641791045, "grad_norm": 1.3884199857711792, "learning_rate": 9.655172413793105e-06, "loss": 40.2182, "step": 11767 }, { "epoch": 280.1910447761194, "grad_norm": 1.3913698196411133, "learning_rate": 9.654351395730706e-06, "loss": 40.05, "step": 11768 }, { "epoch": 280.21492537313435, "grad_norm": 1.379381537437439, "learning_rate": 9.65353037766831e-06, "loss": 39.0546, "step": 11769 }, { "epoch": 280.23880597014926, "grad_norm": 1.3298817873001099, "learning_rate": 9.652709359605911e-06, "loss": 39.4882, "step": 11770 }, { "epoch": 280.26268656716417, "grad_norm": 1.3298147916793823, "learning_rate": 9.651888341543515e-06, "loss": 39.3515, "step": 11771 }, { "epoch": 280.28656716417913, "grad_norm": 1.3605128526687622, "learning_rate": 9.651067323481116e-06, "loss": 40.8241, "step": 11772 }, { "epoch": 280.31044776119404, "grad_norm": 1.321576714515686, "learning_rate": 9.65024630541872e-06, "loss": 39.7421, "step": 11773 }, { "epoch": 280.33432835820895, "grad_norm": 1.3021749258041382, "learning_rate": 9.649425287356322e-06, "loss": 39.3649, "step": 11774 }, { "epoch": 280.35820895522386, "grad_norm": 1.3297158479690552, "learning_rate": 9.648604269293926e-06, "loss": 39.3021, "step": 11775 }, { "epoch": 280.3820895522388, "grad_norm": 1.3627159595489502, "learning_rate": 9.647783251231529e-06, "loss": 39.0007, "step": 11776 }, { "epoch": 280.40597014925373, "grad_norm": 1.3631556034088135, "learning_rate": 9.646962233169131e-06, "loss": 39.5366, "step": 11777 }, { "epoch": 280.42985074626864, "grad_norm": 1.3101710081100464, "learning_rate": 9.646141215106734e-06, "loss": 40.2786, "step": 11778 }, { "epoch": 280.4537313432836, "grad_norm": 1.3566820621490479, "learning_rate": 9.645320197044336e-06, "loss": 40.1488, "step": 11779 }, { "epoch": 280.4776119402985, "grad_norm": 1.329225778579712, "learning_rate": 9.644499178981939e-06, "loss": 39.9253, "step": 11780 }, { "epoch": 280.5014925373134, "grad_norm": 1.3892849683761597, "learning_rate": 9.64367816091954e-06, "loss": 38.5882, "step": 11781 }, { "epoch": 280.52537313432833, "grad_norm": 1.3692129850387573, "learning_rate": 9.642857142857144e-06, "loss": 39.3804, "step": 11782 }, { "epoch": 280.5492537313433, "grad_norm": 1.343116044998169, "learning_rate": 9.642036124794745e-06, "loss": 39.4893, "step": 11783 }, { "epoch": 280.5731343283582, "grad_norm": 1.3804268836975098, "learning_rate": 9.64121510673235e-06, "loss": 40.0689, "step": 11784 }, { "epoch": 280.5970149253731, "grad_norm": 1.3471994400024414, "learning_rate": 9.640394088669952e-06, "loss": 39.7808, "step": 11785 }, { "epoch": 280.6208955223881, "grad_norm": 1.3751758337020874, "learning_rate": 9.639573070607555e-06, "loss": 38.2746, "step": 11786 }, { "epoch": 280.644776119403, "grad_norm": 1.382051944732666, "learning_rate": 9.638752052545157e-06, "loss": 40.906, "step": 11787 }, { "epoch": 280.6686567164179, "grad_norm": 1.3337938785552979, "learning_rate": 9.63793103448276e-06, "loss": 40.1857, "step": 11788 }, { "epoch": 280.6925373134328, "grad_norm": 1.3310809135437012, "learning_rate": 9.637110016420362e-06, "loss": 41.342, "step": 11789 }, { "epoch": 280.7164179104478, "grad_norm": 1.345390796661377, "learning_rate": 9.636288998357965e-06, "loss": 39.7888, "step": 11790 }, { "epoch": 280.7402985074627, "grad_norm": 1.351646065711975, "learning_rate": 9.635467980295568e-06, "loss": 39.0408, "step": 11791 }, { "epoch": 280.7641791044776, "grad_norm": 1.3307909965515137, "learning_rate": 9.63464696223317e-06, "loss": 39.6272, "step": 11792 }, { "epoch": 280.78805970149256, "grad_norm": 1.3255401849746704, "learning_rate": 9.633825944170773e-06, "loss": 40.3905, "step": 11793 }, { "epoch": 280.81194029850747, "grad_norm": 1.327307939529419, "learning_rate": 9.633004926108374e-06, "loss": 39.8626, "step": 11794 }, { "epoch": 280.8358208955224, "grad_norm": 1.3939112424850464, "learning_rate": 9.632183908045978e-06, "loss": 39.5933, "step": 11795 }, { "epoch": 280.85970149253734, "grad_norm": 1.3109122514724731, "learning_rate": 9.63136288998358e-06, "loss": 39.8738, "step": 11796 }, { "epoch": 280.88358208955225, "grad_norm": 1.3322111368179321, "learning_rate": 9.630541871921183e-06, "loss": 39.1215, "step": 11797 }, { "epoch": 280.90746268656716, "grad_norm": 1.3130526542663574, "learning_rate": 9.629720853858786e-06, "loss": 39.8124, "step": 11798 }, { "epoch": 280.93134328358207, "grad_norm": 1.337220311164856, "learning_rate": 9.628899835796389e-06, "loss": 40.5813, "step": 11799 }, { "epoch": 280.95522388059703, "grad_norm": 1.577943205833435, "learning_rate": 9.628078817733991e-06, "loss": 39.7071, "step": 11800 }, { "epoch": 280.97910447761194, "grad_norm": 1.3655011653900146, "learning_rate": 9.627257799671594e-06, "loss": 39.1063, "step": 11801 }, { "epoch": 281.0, "grad_norm": 1.3910720348358154, "learning_rate": 9.626436781609196e-06, "loss": 33.6716, "step": 11802 }, { "epoch": 281.0238805970149, "grad_norm": 1.3038498163223267, "learning_rate": 9.625615763546799e-06, "loss": 40.4412, "step": 11803 }, { "epoch": 281.0477611940299, "grad_norm": 1.3397432565689087, "learning_rate": 9.624794745484402e-06, "loss": 40.4539, "step": 11804 }, { "epoch": 281.0716417910448, "grad_norm": 1.3492815494537354, "learning_rate": 9.623973727422004e-06, "loss": 40.1209, "step": 11805 }, { "epoch": 281.0955223880597, "grad_norm": 1.348578929901123, "learning_rate": 9.623152709359607e-06, "loss": 38.333, "step": 11806 }, { "epoch": 281.1194029850746, "grad_norm": 1.3512033224105835, "learning_rate": 9.62233169129721e-06, "loss": 39.0776, "step": 11807 }, { "epoch": 281.14328358208957, "grad_norm": 1.3896512985229492, "learning_rate": 9.621510673234812e-06, "loss": 39.7392, "step": 11808 }, { "epoch": 281.1671641791045, "grad_norm": 1.334478497505188, "learning_rate": 9.620689655172415e-06, "loss": 40.7877, "step": 11809 }, { "epoch": 281.1910447761194, "grad_norm": 1.3311280012130737, "learning_rate": 9.619868637110017e-06, "loss": 39.2882, "step": 11810 }, { "epoch": 281.21492537313435, "grad_norm": 1.355263113975525, "learning_rate": 9.61904761904762e-06, "loss": 41.7534, "step": 11811 }, { "epoch": 281.23880597014926, "grad_norm": 1.4211317300796509, "learning_rate": 9.618226600985223e-06, "loss": 39.3712, "step": 11812 }, { "epoch": 281.26268656716417, "grad_norm": 1.352872371673584, "learning_rate": 9.617405582922825e-06, "loss": 39.2463, "step": 11813 }, { "epoch": 281.28656716417913, "grad_norm": 1.3403711318969727, "learning_rate": 9.616584564860428e-06, "loss": 40.653, "step": 11814 }, { "epoch": 281.31044776119404, "grad_norm": 1.2997252941131592, "learning_rate": 9.61576354679803e-06, "loss": 40.3346, "step": 11815 }, { "epoch": 281.33432835820895, "grad_norm": NaN, "learning_rate": 9.614942528735633e-06, "loss": 33.9981, "step": 11816 }, { "epoch": 281.35820895522386, "grad_norm": 1.4084361791610718, "learning_rate": 9.614942528735633e-06, "loss": 41.313, "step": 11817 }, { "epoch": 281.3820895522388, "grad_norm": 1.3483589887619019, "learning_rate": 9.614121510673236e-06, "loss": 40.4862, "step": 11818 }, { "epoch": 281.40597014925373, "grad_norm": 1.3940383195877075, "learning_rate": 9.613300492610838e-06, "loss": 39.4095, "step": 11819 }, { "epoch": 281.42985074626864, "grad_norm": 1.329891562461853, "learning_rate": 9.612479474548441e-06, "loss": 39.357, "step": 11820 }, { "epoch": 281.4537313432836, "grad_norm": 1.36066472530365, "learning_rate": 9.611658456486043e-06, "loss": 39.1256, "step": 11821 }, { "epoch": 281.4776119402985, "grad_norm": 1.3627562522888184, "learning_rate": 9.610837438423646e-06, "loss": 40.0713, "step": 11822 }, { "epoch": 281.5014925373134, "grad_norm": 1.3368167877197266, "learning_rate": 9.610016420361249e-06, "loss": 39.7038, "step": 11823 }, { "epoch": 281.52537313432833, "grad_norm": 1.3594447374343872, "learning_rate": 9.609195402298851e-06, "loss": 40.8709, "step": 11824 }, { "epoch": 281.5492537313433, "grad_norm": 1.3808660507202148, "learning_rate": 9.608374384236454e-06, "loss": 39.6555, "step": 11825 }, { "epoch": 281.5731343283582, "grad_norm": 1.3527019023895264, "learning_rate": 9.607553366174057e-06, "loss": 39.9215, "step": 11826 }, { "epoch": 281.5970149253731, "grad_norm": 1.3433126211166382, "learning_rate": 9.60673234811166e-06, "loss": 40.4791, "step": 11827 }, { "epoch": 281.6208955223881, "grad_norm": 1.4003667831420898, "learning_rate": 9.605911330049262e-06, "loss": 38.866, "step": 11828 }, { "epoch": 281.644776119403, "grad_norm": 1.3374242782592773, "learning_rate": 9.605090311986864e-06, "loss": 39.8778, "step": 11829 }, { "epoch": 281.6686567164179, "grad_norm": 1.4075448513031006, "learning_rate": 9.604269293924467e-06, "loss": 38.7555, "step": 11830 }, { "epoch": 281.6925373134328, "grad_norm": 1.3414239883422852, "learning_rate": 9.60344827586207e-06, "loss": 38.7269, "step": 11831 }, { "epoch": 281.7164179104478, "grad_norm": 1.3326551914215088, "learning_rate": 9.602627257799672e-06, "loss": 40.7774, "step": 11832 }, { "epoch": 281.7402985074627, "grad_norm": 1.3588840961456299, "learning_rate": 9.601806239737275e-06, "loss": 40.262, "step": 11833 }, { "epoch": 281.7641791044776, "grad_norm": 1.346191644668579, "learning_rate": 9.600985221674877e-06, "loss": 40.0516, "step": 11834 }, { "epoch": 281.78805970149256, "grad_norm": 1.3806320428848267, "learning_rate": 9.60016420361248e-06, "loss": 39.225, "step": 11835 }, { "epoch": 281.81194029850747, "grad_norm": 1.3348801136016846, "learning_rate": 9.599343185550083e-06, "loss": 40.4851, "step": 11836 }, { "epoch": 281.8358208955224, "grad_norm": 1.4145880937576294, "learning_rate": 9.598522167487685e-06, "loss": 39.0674, "step": 11837 }, { "epoch": 281.85970149253734, "grad_norm": 1.3552706241607666, "learning_rate": 9.597701149425288e-06, "loss": 39.9885, "step": 11838 }, { "epoch": 281.88358208955225, "grad_norm": 1.4069424867630005, "learning_rate": 9.59688013136289e-06, "loss": 39.3331, "step": 11839 }, { "epoch": 281.90746268656716, "grad_norm": 1.3222402334213257, "learning_rate": 9.596059113300493e-06, "loss": 39.4501, "step": 11840 }, { "epoch": 281.93134328358207, "grad_norm": 1.3706156015396118, "learning_rate": 9.595238095238096e-06, "loss": 38.647, "step": 11841 }, { "epoch": 281.95522388059703, "grad_norm": 1.3273464441299438, "learning_rate": 9.594417077175698e-06, "loss": 39.056, "step": 11842 }, { "epoch": 281.97910447761194, "grad_norm": 1.3767000436782837, "learning_rate": 9.593596059113301e-06, "loss": 39.8, "step": 11843 }, { "epoch": 282.0, "grad_norm": 1.3136667013168335, "learning_rate": 9.592775041050904e-06, "loss": 34.3953, "step": 11844 }, { "epoch": 282.0238805970149, "grad_norm": 1.3465033769607544, "learning_rate": 9.591954022988506e-06, "loss": 39.9137, "step": 11845 }, { "epoch": 282.0477611940299, "grad_norm": 1.3379756212234497, "learning_rate": 9.591133004926109e-06, "loss": 39.2801, "step": 11846 }, { "epoch": 282.0716417910448, "grad_norm": NaN, "learning_rate": 9.590311986863711e-06, "loss": 69.2768, "step": 11847 }, { "epoch": 282.0955223880597, "grad_norm": 1.3555760383605957, "learning_rate": 9.590311986863711e-06, "loss": 38.9841, "step": 11848 }, { "epoch": 282.1194029850746, "grad_norm": 1.3611550331115723, "learning_rate": 9.589490968801316e-06, "loss": 39.1936, "step": 11849 }, { "epoch": 282.14328358208957, "grad_norm": 1.3621294498443604, "learning_rate": 9.588669950738917e-06, "loss": 39.6546, "step": 11850 }, { "epoch": 282.1671641791045, "grad_norm": 1.3605669736862183, "learning_rate": 9.58784893267652e-06, "loss": 40.2574, "step": 11851 }, { "epoch": 282.1910447761194, "grad_norm": 1.329823613166809, "learning_rate": 9.587027914614122e-06, "loss": 40.5696, "step": 11852 }, { "epoch": 282.21492537313435, "grad_norm": 1.3053267002105713, "learning_rate": 9.586206896551724e-06, "loss": 40.1896, "step": 11853 }, { "epoch": 282.23880597014926, "grad_norm": 1.3640660047531128, "learning_rate": 9.585385878489327e-06, "loss": 39.232, "step": 11854 }, { "epoch": 282.26268656716417, "grad_norm": 1.307161569595337, "learning_rate": 9.58456486042693e-06, "loss": 39.3547, "step": 11855 }, { "epoch": 282.28656716417913, "grad_norm": 1.3663139343261719, "learning_rate": 9.583743842364532e-06, "loss": 39.6703, "step": 11856 }, { "epoch": 282.31044776119404, "grad_norm": 1.378938913345337, "learning_rate": 9.582922824302135e-06, "loss": 41.1813, "step": 11857 }, { "epoch": 282.33432835820895, "grad_norm": 1.3852180242538452, "learning_rate": 9.582101806239738e-06, "loss": 39.4605, "step": 11858 }, { "epoch": 282.35820895522386, "grad_norm": 1.3536908626556396, "learning_rate": 9.58128078817734e-06, "loss": 39.7071, "step": 11859 }, { "epoch": 282.3820895522388, "grad_norm": 1.3937203884124756, "learning_rate": 9.580459770114944e-06, "loss": 41.0286, "step": 11860 }, { "epoch": 282.40597014925373, "grad_norm": 1.2877973318099976, "learning_rate": 9.579638752052545e-06, "loss": 38.9527, "step": 11861 }, { "epoch": 282.42985074626864, "grad_norm": 1.304263710975647, "learning_rate": 9.57881773399015e-06, "loss": 39.9856, "step": 11862 }, { "epoch": 282.4537313432836, "grad_norm": NaN, "learning_rate": 9.57799671592775e-06, "loss": 35.041, "step": 11863 }, { "epoch": 282.4776119402985, "grad_norm": 1.3463139533996582, "learning_rate": 9.57799671592775e-06, "loss": 39.93, "step": 11864 }, { "epoch": 282.5014925373134, "grad_norm": 1.3442052602767944, "learning_rate": 9.577175697865353e-06, "loss": 38.175, "step": 11865 }, { "epoch": 282.52537313432833, "grad_norm": 1.3571784496307373, "learning_rate": 9.576354679802956e-06, "loss": 39.6169, "step": 11866 }, { "epoch": 282.5492537313433, "grad_norm": 1.3411413431167603, "learning_rate": 9.575533661740558e-06, "loss": 39.341, "step": 11867 }, { "epoch": 282.5731343283582, "grad_norm": 1.3290903568267822, "learning_rate": 9.574712643678161e-06, "loss": 38.8325, "step": 11868 }, { "epoch": 282.5970149253731, "grad_norm": 1.3402847051620483, "learning_rate": 9.573891625615764e-06, "loss": 39.6931, "step": 11869 }, { "epoch": 282.6208955223881, "grad_norm": 1.3513362407684326, "learning_rate": 9.573070607553368e-06, "loss": 39.863, "step": 11870 }, { "epoch": 282.644776119403, "grad_norm": 1.389910340309143, "learning_rate": 9.572249589490969e-06, "loss": 40.436, "step": 11871 }, { "epoch": 282.6686567164179, "grad_norm": 1.3640393018722534, "learning_rate": 9.571428571428573e-06, "loss": 40.1791, "step": 11872 }, { "epoch": 282.6925373134328, "grad_norm": 1.338409662246704, "learning_rate": 9.570607553366174e-06, "loss": 39.0555, "step": 11873 }, { "epoch": 282.7164179104478, "grad_norm": 1.3443025350570679, "learning_rate": 9.569786535303778e-06, "loss": 40.4081, "step": 11874 }, { "epoch": 282.7402985074627, "grad_norm": 1.3714196681976318, "learning_rate": 9.56896551724138e-06, "loss": 40.9347, "step": 11875 }, { "epoch": 282.7641791044776, "grad_norm": 1.3201544284820557, "learning_rate": 9.568144499178984e-06, "loss": 40.4139, "step": 11876 }, { "epoch": 282.78805970149256, "grad_norm": 1.3567554950714111, "learning_rate": 9.567323481116585e-06, "loss": 40.7567, "step": 11877 }, { "epoch": 282.81194029850747, "grad_norm": 1.3780186176300049, "learning_rate": 9.566502463054189e-06, "loss": 39.1304, "step": 11878 }, { "epoch": 282.8358208955224, "grad_norm": 1.3305038213729858, "learning_rate": 9.56568144499179e-06, "loss": 38.9808, "step": 11879 }, { "epoch": 282.85970149253734, "grad_norm": 1.396088719367981, "learning_rate": 9.564860426929392e-06, "loss": 39.9869, "step": 11880 }, { "epoch": 282.88358208955225, "grad_norm": 1.3291435241699219, "learning_rate": 9.564039408866997e-06, "loss": 39.5252, "step": 11881 }, { "epoch": 282.90746268656716, "grad_norm": 1.358914852142334, "learning_rate": 9.563218390804598e-06, "loss": 40.981, "step": 11882 }, { "epoch": 282.93134328358207, "grad_norm": 1.4290634393692017, "learning_rate": 9.562397372742202e-06, "loss": 38.7869, "step": 11883 }, { "epoch": 282.95522388059703, "grad_norm": NaN, "learning_rate": 9.561576354679803e-06, "loss": 45.3124, "step": 11884 }, { "epoch": 282.97910447761194, "grad_norm": 1.2943735122680664, "learning_rate": 9.561576354679803e-06, "loss": 40.4777, "step": 11885 }, { "epoch": 283.0, "grad_norm": 1.3545540571212769, "learning_rate": 9.560755336617407e-06, "loss": 34.3591, "step": 11886 }, { "epoch": 283.0238805970149, "grad_norm": 1.3214783668518066, "learning_rate": 9.559934318555008e-06, "loss": 39.6863, "step": 11887 }, { "epoch": 283.0477611940299, "grad_norm": 1.3652448654174805, "learning_rate": 9.559113300492612e-06, "loss": 39.3451, "step": 11888 }, { "epoch": 283.0716417910448, "grad_norm": 1.3633819818496704, "learning_rate": 9.558292282430213e-06, "loss": 39.7779, "step": 11889 }, { "epoch": 283.0955223880597, "grad_norm": 1.355879783630371, "learning_rate": 9.557471264367818e-06, "loss": 39.7355, "step": 11890 }, { "epoch": 283.1194029850746, "grad_norm": 1.3735915422439575, "learning_rate": 9.55665024630542e-06, "loss": 40.5468, "step": 11891 }, { "epoch": 283.14328358208957, "grad_norm": 1.3793094158172607, "learning_rate": 9.555829228243023e-06, "loss": 40.846, "step": 11892 }, { "epoch": 283.1671641791045, "grad_norm": 1.3825353384017944, "learning_rate": 9.555008210180625e-06, "loss": 39.5127, "step": 11893 }, { "epoch": 283.1910447761194, "grad_norm": 1.3445032835006714, "learning_rate": 9.554187192118226e-06, "loss": 39.7402, "step": 11894 }, { "epoch": 283.21492537313435, "grad_norm": 1.3300297260284424, "learning_rate": 9.55336617405583e-06, "loss": 39.351, "step": 11895 }, { "epoch": 283.23880597014926, "grad_norm": 1.3213928937911987, "learning_rate": 9.552545155993432e-06, "loss": 39.7011, "step": 11896 }, { "epoch": 283.26268656716417, "grad_norm": 1.337455153465271, "learning_rate": 9.551724137931036e-06, "loss": 40.3707, "step": 11897 }, { "epoch": 283.28656716417913, "grad_norm": 1.317324161529541, "learning_rate": 9.550903119868637e-06, "loss": 40.1612, "step": 11898 }, { "epoch": 283.31044776119404, "grad_norm": 1.3434256315231323, "learning_rate": 9.550082101806241e-06, "loss": 39.3482, "step": 11899 }, { "epoch": 283.33432835820895, "grad_norm": 1.2933021783828735, "learning_rate": 9.549261083743842e-06, "loss": 40.3544, "step": 11900 }, { "epoch": 283.35820895522386, "grad_norm": 1.3081268072128296, "learning_rate": 9.548440065681446e-06, "loss": 39.8239, "step": 11901 }, { "epoch": 283.3820895522388, "grad_norm": 1.3516889810562134, "learning_rate": 9.547619047619049e-06, "loss": 38.7712, "step": 11902 }, { "epoch": 283.40597014925373, "grad_norm": 1.308171272277832, "learning_rate": 9.546798029556652e-06, "loss": 40.4489, "step": 11903 }, { "epoch": 283.42985074626864, "grad_norm": 1.3518333435058594, "learning_rate": 9.545977011494254e-06, "loss": 38.4873, "step": 11904 }, { "epoch": 283.4537313432836, "grad_norm": 1.3313915729522705, "learning_rate": 9.545155993431857e-06, "loss": 40.4301, "step": 11905 }, { "epoch": 283.4776119402985, "grad_norm": 1.360315203666687, "learning_rate": 9.54433497536946e-06, "loss": 39.8452, "step": 11906 }, { "epoch": 283.5014925373134, "grad_norm": 1.341408610343933, "learning_rate": 9.543513957307062e-06, "loss": 40.1803, "step": 11907 }, { "epoch": 283.52537313432833, "grad_norm": 1.3586150407791138, "learning_rate": 9.542692939244665e-06, "loss": 40.5622, "step": 11908 }, { "epoch": 283.5492537313433, "grad_norm": 1.3538596630096436, "learning_rate": 9.541871921182266e-06, "loss": 39.2206, "step": 11909 }, { "epoch": 283.5731343283582, "grad_norm": 1.3425670862197876, "learning_rate": 9.54105090311987e-06, "loss": 40.5059, "step": 11910 }, { "epoch": 283.5970149253731, "grad_norm": 1.3247332572937012, "learning_rate": 9.54022988505747e-06, "loss": 38.3481, "step": 11911 }, { "epoch": 283.6208955223881, "grad_norm": 1.493769645690918, "learning_rate": 9.539408866995075e-06, "loss": 40.585, "step": 11912 }, { "epoch": 283.644776119403, "grad_norm": NaN, "learning_rate": 9.538587848932678e-06, "loss": 46.5762, "step": 11913 }, { "epoch": 283.6686567164179, "grad_norm": 1.3103183507919312, "learning_rate": 9.538587848932678e-06, "loss": 40.1121, "step": 11914 }, { "epoch": 283.6925373134328, "grad_norm": 1.3623749017715454, "learning_rate": 9.53776683087028e-06, "loss": 39.6951, "step": 11915 }, { "epoch": 283.7164179104478, "grad_norm": 1.3505587577819824, "learning_rate": 9.536945812807883e-06, "loss": 40.6442, "step": 11916 }, { "epoch": 283.7402985074627, "grad_norm": 1.3500257730484009, "learning_rate": 9.536124794745486e-06, "loss": 39.1807, "step": 11917 }, { "epoch": 283.7641791044776, "grad_norm": 1.3905893564224243, "learning_rate": 9.535303776683088e-06, "loss": 38.9479, "step": 11918 }, { "epoch": 283.78805970149256, "grad_norm": 1.367517352104187, "learning_rate": 9.53448275862069e-06, "loss": 39.0293, "step": 11919 }, { "epoch": 283.81194029850747, "grad_norm": 1.3210357427597046, "learning_rate": 9.533661740558293e-06, "loss": 39.4259, "step": 11920 }, { "epoch": 283.8358208955224, "grad_norm": 1.3142633438110352, "learning_rate": 9.532840722495896e-06, "loss": 40.5491, "step": 11921 }, { "epoch": 283.85970149253734, "grad_norm": 1.3067231178283691, "learning_rate": 9.532019704433499e-06, "loss": 40.5314, "step": 11922 }, { "epoch": 283.88358208955225, "grad_norm": 1.3167855739593506, "learning_rate": 9.531198686371101e-06, "loss": 40.4086, "step": 11923 }, { "epoch": 283.90746268656716, "grad_norm": 1.321997046470642, "learning_rate": 9.530377668308704e-06, "loss": 39.5793, "step": 11924 }, { "epoch": 283.93134328358207, "grad_norm": 1.3430708646774292, "learning_rate": 9.529556650246306e-06, "loss": 40.9633, "step": 11925 }, { "epoch": 283.95522388059703, "grad_norm": 1.3015934228897095, "learning_rate": 9.528735632183909e-06, "loss": 38.2675, "step": 11926 }, { "epoch": 283.97910447761194, "grad_norm": 1.3627605438232422, "learning_rate": 9.527914614121512e-06, "loss": 39.9407, "step": 11927 }, { "epoch": 284.0, "grad_norm": 1.3759921789169312, "learning_rate": 9.527093596059114e-06, "loss": 34.8649, "step": 11928 }, { "epoch": 284.0238805970149, "grad_norm": 1.3582794666290283, "learning_rate": 9.526272577996717e-06, "loss": 40.2747, "step": 11929 }, { "epoch": 284.0477611940299, "grad_norm": 1.307494044303894, "learning_rate": 9.52545155993432e-06, "loss": 39.4686, "step": 11930 }, { "epoch": 284.0716417910448, "grad_norm": 1.312878966331482, "learning_rate": 9.524630541871922e-06, "loss": 40.3985, "step": 11931 }, { "epoch": 284.0955223880597, "grad_norm": 1.3382452726364136, "learning_rate": 9.523809523809525e-06, "loss": 39.9966, "step": 11932 }, { "epoch": 284.1194029850746, "grad_norm": 1.3519777059555054, "learning_rate": 9.522988505747127e-06, "loss": 40.7874, "step": 11933 }, { "epoch": 284.14328358208957, "grad_norm": 1.3059759140014648, "learning_rate": 9.52216748768473e-06, "loss": 38.4909, "step": 11934 }, { "epoch": 284.1671641791045, "grad_norm": 1.3455674648284912, "learning_rate": 9.521346469622333e-06, "loss": 39.1882, "step": 11935 }, { "epoch": 284.1910447761194, "grad_norm": 1.3589744567871094, "learning_rate": 9.520525451559935e-06, "loss": 39.2948, "step": 11936 }, { "epoch": 284.21492537313435, "grad_norm": 1.35715913772583, "learning_rate": 9.519704433497538e-06, "loss": 39.7151, "step": 11937 }, { "epoch": 284.23880597014926, "grad_norm": 1.3418114185333252, "learning_rate": 9.51888341543514e-06, "loss": 40.0125, "step": 11938 }, { "epoch": 284.26268656716417, "grad_norm": 1.3140277862548828, "learning_rate": 9.518062397372743e-06, "loss": 39.7694, "step": 11939 }, { "epoch": 284.28656716417913, "grad_norm": 1.3323945999145508, "learning_rate": 9.517241379310346e-06, "loss": 39.2686, "step": 11940 }, { "epoch": 284.31044776119404, "grad_norm": 1.3288826942443848, "learning_rate": 9.516420361247948e-06, "loss": 38.8648, "step": 11941 }, { "epoch": 284.33432835820895, "grad_norm": 1.3686535358428955, "learning_rate": 9.515599343185551e-06, "loss": 40.7751, "step": 11942 }, { "epoch": 284.35820895522386, "grad_norm": 1.3436570167541504, "learning_rate": 9.514778325123153e-06, "loss": 39.8278, "step": 11943 }, { "epoch": 284.3820895522388, "grad_norm": 1.3437795639038086, "learning_rate": 9.513957307060756e-06, "loss": 40.2607, "step": 11944 }, { "epoch": 284.40597014925373, "grad_norm": 1.3410104513168335, "learning_rate": 9.513136288998359e-06, "loss": 40.3866, "step": 11945 }, { "epoch": 284.42985074626864, "grad_norm": 1.3147099018096924, "learning_rate": 9.512315270935961e-06, "loss": 39.7324, "step": 11946 }, { "epoch": 284.4537313432836, "grad_norm": 1.3391120433807373, "learning_rate": 9.511494252873564e-06, "loss": 40.0456, "step": 11947 }, { "epoch": 284.4776119402985, "grad_norm": 1.3288546800613403, "learning_rate": 9.510673234811167e-06, "loss": 39.8694, "step": 11948 }, { "epoch": 284.5014925373134, "grad_norm": 1.4134904146194458, "learning_rate": 9.50985221674877e-06, "loss": 39.82, "step": 11949 }, { "epoch": 284.52537313432833, "grad_norm": 1.3806240558624268, "learning_rate": 9.509031198686372e-06, "loss": 40.2971, "step": 11950 }, { "epoch": 284.5492537313433, "grad_norm": 1.3287502527236938, "learning_rate": 9.508210180623974e-06, "loss": 40.3293, "step": 11951 }, { "epoch": 284.5731343283582, "grad_norm": 1.320640206336975, "learning_rate": 9.507389162561577e-06, "loss": 39.1729, "step": 11952 }, { "epoch": 284.5970149253731, "grad_norm": 1.2898826599121094, "learning_rate": 9.50656814449918e-06, "loss": 39.777, "step": 11953 }, { "epoch": 284.6208955223881, "grad_norm": 1.3118760585784912, "learning_rate": 9.505747126436782e-06, "loss": 39.5815, "step": 11954 }, { "epoch": 284.644776119403, "grad_norm": 1.5062029361724854, "learning_rate": 9.504926108374385e-06, "loss": 39.2295, "step": 11955 }, { "epoch": 284.6686567164179, "grad_norm": 1.342293381690979, "learning_rate": 9.504105090311987e-06, "loss": 40.4088, "step": 11956 }, { "epoch": 284.6925373134328, "grad_norm": 1.3186544179916382, "learning_rate": 9.50328407224959e-06, "loss": 38.4205, "step": 11957 }, { "epoch": 284.7164179104478, "grad_norm": 1.3191159963607788, "learning_rate": 9.502463054187193e-06, "loss": 39.4358, "step": 11958 }, { "epoch": 284.7402985074627, "grad_norm": 1.3679149150848389, "learning_rate": 9.501642036124795e-06, "loss": 39.7786, "step": 11959 }, { "epoch": 284.7641791044776, "grad_norm": 1.3559149503707886, "learning_rate": 9.500821018062398e-06, "loss": 39.815, "step": 11960 }, { "epoch": 284.78805970149256, "grad_norm": 1.3416454792022705, "learning_rate": 9.5e-06, "loss": 40.1882, "step": 11961 }, { "epoch": 284.81194029850747, "grad_norm": 1.3638110160827637, "learning_rate": 9.499178981937603e-06, "loss": 38.5806, "step": 11962 }, { "epoch": 284.8358208955224, "grad_norm": 1.3675178289413452, "learning_rate": 9.498357963875206e-06, "loss": 39.4168, "step": 11963 }, { "epoch": 284.85970149253734, "grad_norm": 1.3780865669250488, "learning_rate": 9.497536945812808e-06, "loss": 40.1945, "step": 11964 }, { "epoch": 284.88358208955225, "grad_norm": 1.35861337184906, "learning_rate": 9.496715927750411e-06, "loss": 38.7086, "step": 11965 }, { "epoch": 284.90746268656716, "grad_norm": 1.3294793367385864, "learning_rate": 9.495894909688014e-06, "loss": 39.2988, "step": 11966 }, { "epoch": 284.93134328358207, "grad_norm": 1.342605710029602, "learning_rate": 9.495073891625616e-06, "loss": 39.9808, "step": 11967 }, { "epoch": 284.95522388059703, "grad_norm": 1.3244298696517944, "learning_rate": 9.494252873563219e-06, "loss": 39.9199, "step": 11968 }, { "epoch": 284.97910447761194, "grad_norm": 1.3561193943023682, "learning_rate": 9.493431855500821e-06, "loss": 41.5488, "step": 11969 }, { "epoch": 285.0, "grad_norm": 1.3420791625976562, "learning_rate": 9.492610837438424e-06, "loss": 35.0245, "step": 11970 }, { "epoch": 285.0238805970149, "grad_norm": 1.325765609741211, "learning_rate": 9.491789819376027e-06, "loss": 39.8946, "step": 11971 }, { "epoch": 285.0477611940299, "grad_norm": 1.3523355722427368, "learning_rate": 9.49096880131363e-06, "loss": 38.7463, "step": 11972 }, { "epoch": 285.0716417910448, "grad_norm": 1.3092608451843262, "learning_rate": 9.490147783251232e-06, "loss": 39.8912, "step": 11973 }, { "epoch": 285.0955223880597, "grad_norm": 1.3294633626937866, "learning_rate": 9.489326765188836e-06, "loss": 40.1957, "step": 11974 }, { "epoch": 285.1194029850746, "grad_norm": 1.3833338022232056, "learning_rate": 9.488505747126437e-06, "loss": 41.0949, "step": 11975 }, { "epoch": 285.14328358208957, "grad_norm": 1.3215701580047607, "learning_rate": 9.487684729064041e-06, "loss": 39.3961, "step": 11976 }, { "epoch": 285.1671641791045, "grad_norm": 1.3544162511825562, "learning_rate": 9.486863711001642e-06, "loss": 38.4648, "step": 11977 }, { "epoch": 285.1910447761194, "grad_norm": 1.3482720851898193, "learning_rate": 9.486042692939245e-06, "loss": 40.9547, "step": 11978 }, { "epoch": 285.21492537313435, "grad_norm": 1.3439419269561768, "learning_rate": 9.485221674876848e-06, "loss": 40.3735, "step": 11979 }, { "epoch": 285.23880597014926, "grad_norm": 1.3262200355529785, "learning_rate": 9.48440065681445e-06, "loss": 40.0938, "step": 11980 }, { "epoch": 285.26268656716417, "grad_norm": 1.3577295541763306, "learning_rate": 9.483579638752053e-06, "loss": 39.6172, "step": 11981 }, { "epoch": 285.28656716417913, "grad_norm": 1.3551108837127686, "learning_rate": 9.482758620689655e-06, "loss": 40.4057, "step": 11982 }, { "epoch": 285.31044776119404, "grad_norm": 1.3524619340896606, "learning_rate": 9.481937602627258e-06, "loss": 39.8293, "step": 11983 }, { "epoch": 285.33432835820895, "grad_norm": 1.3342063426971436, "learning_rate": 9.48111658456486e-06, "loss": 39.5822, "step": 11984 }, { "epoch": 285.35820895522386, "grad_norm": 1.3398579359054565, "learning_rate": 9.480295566502465e-06, "loss": 39.5923, "step": 11985 }, { "epoch": 285.3820895522388, "grad_norm": 1.326357364654541, "learning_rate": 9.479474548440066e-06, "loss": 39.632, "step": 11986 }, { "epoch": 285.40597014925373, "grad_norm": 1.3539799451828003, "learning_rate": 9.47865353037767e-06, "loss": 39.9701, "step": 11987 }, { "epoch": 285.42985074626864, "grad_norm": 1.344017744064331, "learning_rate": 9.477832512315271e-06, "loss": 40.227, "step": 11988 }, { "epoch": 285.4537313432836, "grad_norm": 1.5489630699157715, "learning_rate": 9.477011494252875e-06, "loss": 40.0906, "step": 11989 }, { "epoch": 285.4776119402985, "grad_norm": 1.357753872871399, "learning_rate": 9.476190476190476e-06, "loss": 40.1217, "step": 11990 }, { "epoch": 285.5014925373134, "grad_norm": 1.324532389640808, "learning_rate": 9.475369458128079e-06, "loss": 40.1295, "step": 11991 }, { "epoch": 285.52537313432833, "grad_norm": 1.332438349723816, "learning_rate": 9.474548440065682e-06, "loss": 39.1041, "step": 11992 }, { "epoch": 285.5492537313433, "grad_norm": 1.4030191898345947, "learning_rate": 9.473727422003284e-06, "loss": 38.6854, "step": 11993 }, { "epoch": 285.5731343283582, "grad_norm": 1.3927640914916992, "learning_rate": 9.472906403940888e-06, "loss": 41.4538, "step": 11994 }, { "epoch": 285.5970149253731, "grad_norm": 1.3396673202514648, "learning_rate": 9.47208538587849e-06, "loss": 39.1006, "step": 11995 }, { "epoch": 285.6208955223881, "grad_norm": 1.3158622980117798, "learning_rate": 9.471264367816094e-06, "loss": 40.0337, "step": 11996 }, { "epoch": 285.644776119403, "grad_norm": 1.3092865943908691, "learning_rate": 9.470443349753695e-06, "loss": 39.6654, "step": 11997 }, { "epoch": 285.6686567164179, "grad_norm": 1.3266792297363281, "learning_rate": 9.469622331691299e-06, "loss": 39.4552, "step": 11998 }, { "epoch": 285.6925373134328, "grad_norm": 1.3640193939208984, "learning_rate": 9.4688013136289e-06, "loss": 39.0904, "step": 11999 }, { "epoch": 285.7164179104478, "grad_norm": 1.3546675443649292, "learning_rate": 9.467980295566504e-06, "loss": 40.2603, "step": 12000 }, { "epoch": 285.7402985074627, "grad_norm": 1.3463852405548096, "learning_rate": 9.467159277504105e-06, "loss": 39.9828, "step": 12001 }, { "epoch": 285.7641791044776, "grad_norm": 1.3430596590042114, "learning_rate": 9.46633825944171e-06, "loss": 39.4298, "step": 12002 }, { "epoch": 285.78805970149256, "grad_norm": 1.33284592628479, "learning_rate": 9.46551724137931e-06, "loss": 39.5975, "step": 12003 }, { "epoch": 285.81194029850747, "grad_norm": 1.3791635036468506, "learning_rate": 9.464696223316915e-06, "loss": 39.343, "step": 12004 }, { "epoch": 285.8358208955224, "grad_norm": 1.3806837797164917, "learning_rate": 9.463875205254517e-06, "loss": 38.9782, "step": 12005 }, { "epoch": 285.85970149253734, "grad_norm": 1.3434722423553467, "learning_rate": 9.463054187192118e-06, "loss": 40.304, "step": 12006 }, { "epoch": 285.88358208955225, "grad_norm": 1.3201708793640137, "learning_rate": 9.462233169129722e-06, "loss": 39.239, "step": 12007 }, { "epoch": 285.90746268656716, "grad_norm": 1.4660825729370117, "learning_rate": 9.461412151067323e-06, "loss": 39.92, "step": 12008 }, { "epoch": 285.93134328358207, "grad_norm": 1.3747000694274902, "learning_rate": 9.460591133004928e-06, "loss": 40.0984, "step": 12009 }, { "epoch": 285.95522388059703, "grad_norm": 1.3625351190567017, "learning_rate": 9.459770114942529e-06, "loss": 39.9479, "step": 12010 }, { "epoch": 285.97910447761194, "grad_norm": 1.361249327659607, "learning_rate": 9.458949096880133e-06, "loss": 39.0959, "step": 12011 }, { "epoch": 286.0, "grad_norm": 1.3813365697860718, "learning_rate": 9.458128078817734e-06, "loss": 33.7844, "step": 12012 }, { "epoch": 286.0238805970149, "grad_norm": 1.3590850830078125, "learning_rate": 9.457307060755338e-06, "loss": 40.3856, "step": 12013 }, { "epoch": 286.0477611940299, "grad_norm": 1.3383759260177612, "learning_rate": 9.45648604269294e-06, "loss": 40.2222, "step": 12014 }, { "epoch": 286.0716417910448, "grad_norm": 1.3400304317474365, "learning_rate": 9.455665024630543e-06, "loss": 39.0623, "step": 12015 }, { "epoch": 286.0955223880597, "grad_norm": 1.3649057149887085, "learning_rate": 9.454844006568146e-06, "loss": 38.7533, "step": 12016 }, { "epoch": 286.1194029850746, "grad_norm": 1.332308292388916, "learning_rate": 9.454022988505749e-06, "loss": 40.0165, "step": 12017 }, { "epoch": 286.14328358208957, "grad_norm": 1.3599814176559448, "learning_rate": 9.453201970443351e-06, "loss": 39.7133, "step": 12018 }, { "epoch": 286.1671641791045, "grad_norm": 1.3265804052352905, "learning_rate": 9.452380952380952e-06, "loss": 39.549, "step": 12019 }, { "epoch": 286.1910447761194, "grad_norm": 1.3522619009017944, "learning_rate": 9.451559934318556e-06, "loss": 40.3569, "step": 12020 }, { "epoch": 286.21492537313435, "grad_norm": 1.3753632307052612, "learning_rate": 9.450738916256157e-06, "loss": 40.1249, "step": 12021 }, { "epoch": 286.23880597014926, "grad_norm": 1.355027675628662, "learning_rate": 9.449917898193762e-06, "loss": 39.3429, "step": 12022 }, { "epoch": 286.26268656716417, "grad_norm": 1.3462893962860107, "learning_rate": 9.449096880131363e-06, "loss": 38.2519, "step": 12023 }, { "epoch": 286.28656716417913, "grad_norm": 1.3531821966171265, "learning_rate": 9.448275862068967e-06, "loss": 41.4809, "step": 12024 }, { "epoch": 286.31044776119404, "grad_norm": 1.3123825788497925, "learning_rate": 9.44745484400657e-06, "loss": 39.2694, "step": 12025 }, { "epoch": 286.33432835820895, "grad_norm": 1.3352797031402588, "learning_rate": 9.446633825944172e-06, "loss": 39.9163, "step": 12026 }, { "epoch": 286.35820895522386, "grad_norm": 1.3326116800308228, "learning_rate": 9.445812807881775e-06, "loss": 40.5402, "step": 12027 }, { "epoch": 286.3820895522388, "grad_norm": 1.3235381841659546, "learning_rate": 9.444991789819377e-06, "loss": 40.2323, "step": 12028 }, { "epoch": 286.40597014925373, "grad_norm": 1.38007652759552, "learning_rate": 9.44417077175698e-06, "loss": 39.8504, "step": 12029 }, { "epoch": 286.42985074626864, "grad_norm": 1.352645754814148, "learning_rate": 9.443349753694583e-06, "loss": 39.7557, "step": 12030 }, { "epoch": 286.4537313432836, "grad_norm": 1.3105220794677734, "learning_rate": 9.442528735632185e-06, "loss": 40.3491, "step": 12031 }, { "epoch": 286.4776119402985, "grad_norm": 1.2983927726745605, "learning_rate": 9.441707717569786e-06, "loss": 39.7236, "step": 12032 }, { "epoch": 286.5014925373134, "grad_norm": 1.396142840385437, "learning_rate": 9.44088669950739e-06, "loss": 38.147, "step": 12033 }, { "epoch": 286.52537313432833, "grad_norm": 1.3321326971054077, "learning_rate": 9.440065681444991e-06, "loss": 40.4164, "step": 12034 }, { "epoch": 286.5492537313433, "grad_norm": 1.3477524518966675, "learning_rate": 9.439244663382596e-06, "loss": 40.6168, "step": 12035 }, { "epoch": 286.5731343283582, "grad_norm": 1.3746423721313477, "learning_rate": 9.438423645320198e-06, "loss": 38.7871, "step": 12036 }, { "epoch": 286.5970149253731, "grad_norm": 1.3241422176361084, "learning_rate": 9.4376026272578e-06, "loss": 40.4838, "step": 12037 }, { "epoch": 286.6208955223881, "grad_norm": 1.3301538228988647, "learning_rate": 9.436781609195403e-06, "loss": 39.4907, "step": 12038 }, { "epoch": 286.644776119403, "grad_norm": 1.3196642398834229, "learning_rate": 9.435960591133006e-06, "loss": 39.6565, "step": 12039 }, { "epoch": 286.6686567164179, "grad_norm": 1.3390698432922363, "learning_rate": 9.435139573070609e-06, "loss": 40.2376, "step": 12040 }, { "epoch": 286.6925373134328, "grad_norm": 1.414223551750183, "learning_rate": 9.434318555008211e-06, "loss": 39.4786, "step": 12041 }, { "epoch": 286.7164179104478, "grad_norm": 1.3839342594146729, "learning_rate": 9.433497536945814e-06, "loss": 39.2884, "step": 12042 }, { "epoch": 286.7402985074627, "grad_norm": 1.3253860473632812, "learning_rate": 9.432676518883416e-06, "loss": 39.9886, "step": 12043 }, { "epoch": 286.7641791044776, "grad_norm": 1.359832525253296, "learning_rate": 9.431855500821019e-06, "loss": 41.7355, "step": 12044 }, { "epoch": 286.78805970149256, "grad_norm": 1.3610612154006958, "learning_rate": 9.431034482758622e-06, "loss": 40.2887, "step": 12045 }, { "epoch": 286.81194029850747, "grad_norm": 1.3641639947891235, "learning_rate": 9.430213464696224e-06, "loss": 37.7496, "step": 12046 }, { "epoch": 286.8358208955224, "grad_norm": 1.3601717948913574, "learning_rate": 9.429392446633827e-06, "loss": 40.2271, "step": 12047 }, { "epoch": 286.85970149253734, "grad_norm": 1.3584723472595215, "learning_rate": 9.42857142857143e-06, "loss": 39.6582, "step": 12048 }, { "epoch": 286.88358208955225, "grad_norm": 1.3685805797576904, "learning_rate": 9.427750410509032e-06, "loss": 39.4926, "step": 12049 }, { "epoch": 286.90746268656716, "grad_norm": 1.330118179321289, "learning_rate": 9.426929392446635e-06, "loss": 39.4859, "step": 12050 }, { "epoch": 286.93134328358207, "grad_norm": 1.3809008598327637, "learning_rate": 9.426108374384237e-06, "loss": 38.1363, "step": 12051 }, { "epoch": 286.95522388059703, "grad_norm": 1.4444806575775146, "learning_rate": 9.42528735632184e-06, "loss": 39.7658, "step": 12052 }, { "epoch": 286.97910447761194, "grad_norm": 1.383535623550415, "learning_rate": 9.424466338259443e-06, "loss": 39.3471, "step": 12053 }, { "epoch": 287.0, "grad_norm": 1.3740386962890625, "learning_rate": 9.423645320197045e-06, "loss": 35.069, "step": 12054 }, { "epoch": 287.0238805970149, "grad_norm": 1.3458162546157837, "learning_rate": 9.422824302134648e-06, "loss": 39.58, "step": 12055 }, { "epoch": 287.0477611940299, "grad_norm": 1.3797115087509155, "learning_rate": 9.42200328407225e-06, "loss": 39.6328, "step": 12056 }, { "epoch": 287.0716417910448, "grad_norm": 1.3459216356277466, "learning_rate": 9.421182266009853e-06, "loss": 40.1532, "step": 12057 }, { "epoch": 287.0955223880597, "grad_norm": 1.34609854221344, "learning_rate": 9.420361247947456e-06, "loss": 38.1373, "step": 12058 }, { "epoch": 287.1194029850746, "grad_norm": 1.347035527229309, "learning_rate": 9.419540229885058e-06, "loss": 39.6094, "step": 12059 }, { "epoch": 287.14328358208957, "grad_norm": 1.3476005792617798, "learning_rate": 9.418719211822661e-06, "loss": 39.8967, "step": 12060 }, { "epoch": 287.1671641791045, "grad_norm": 1.3420101404190063, "learning_rate": 9.417898193760264e-06, "loss": 40.5226, "step": 12061 }, { "epoch": 287.1910447761194, "grad_norm": 1.3359452486038208, "learning_rate": 9.417077175697866e-06, "loss": 39.8474, "step": 12062 }, { "epoch": 287.21492537313435, "grad_norm": 1.3590953350067139, "learning_rate": 9.416256157635469e-06, "loss": 40.1142, "step": 12063 }, { "epoch": 287.23880597014926, "grad_norm": 1.3580728769302368, "learning_rate": 9.415435139573071e-06, "loss": 39.1332, "step": 12064 }, { "epoch": 287.26268656716417, "grad_norm": 1.3304688930511475, "learning_rate": 9.414614121510674e-06, "loss": 39.5133, "step": 12065 }, { "epoch": 287.28656716417913, "grad_norm": 1.3703256845474243, "learning_rate": 9.413793103448277e-06, "loss": 40.3422, "step": 12066 }, { "epoch": 287.31044776119404, "grad_norm": 1.3297455310821533, "learning_rate": 9.41297208538588e-06, "loss": 39.7186, "step": 12067 }, { "epoch": 287.33432835820895, "grad_norm": 1.3614317178726196, "learning_rate": 9.412151067323482e-06, "loss": 39.0718, "step": 12068 }, { "epoch": 287.35820895522386, "grad_norm": 1.4350707530975342, "learning_rate": 9.411330049261084e-06, "loss": 40.2305, "step": 12069 }, { "epoch": 287.3820895522388, "grad_norm": 1.4209163188934326, "learning_rate": 9.410509031198687e-06, "loss": 39.2491, "step": 12070 }, { "epoch": 287.40597014925373, "grad_norm": 1.3288136720657349, "learning_rate": 9.40968801313629e-06, "loss": 40.2331, "step": 12071 }, { "epoch": 287.42985074626864, "grad_norm": 1.3289991617202759, "learning_rate": 9.408866995073892e-06, "loss": 39.9979, "step": 12072 }, { "epoch": 287.4537313432836, "grad_norm": 1.3223354816436768, "learning_rate": 9.408045977011495e-06, "loss": 38.7791, "step": 12073 }, { "epoch": 287.4776119402985, "grad_norm": 1.3896870613098145, "learning_rate": 9.407224958949097e-06, "loss": 39.8999, "step": 12074 }, { "epoch": 287.5014925373134, "grad_norm": 1.3930989503860474, "learning_rate": 9.4064039408867e-06, "loss": 38.8857, "step": 12075 }, { "epoch": 287.52537313432833, "grad_norm": 1.3310915231704712, "learning_rate": 9.405582922824303e-06, "loss": 40.4718, "step": 12076 }, { "epoch": 287.5492537313433, "grad_norm": 1.3599997758865356, "learning_rate": 9.404761904761905e-06, "loss": 40.528, "step": 12077 }, { "epoch": 287.5731343283582, "grad_norm": 1.3756223917007446, "learning_rate": 9.403940886699508e-06, "loss": 39.9717, "step": 12078 }, { "epoch": 287.5970149253731, "grad_norm": 1.3373993635177612, "learning_rate": 9.40311986863711e-06, "loss": 40.0362, "step": 12079 }, { "epoch": 287.6208955223881, "grad_norm": 1.3467637300491333, "learning_rate": 9.402298850574713e-06, "loss": 40.6357, "step": 12080 }, { "epoch": 287.644776119403, "grad_norm": 1.3440970182418823, "learning_rate": 9.401477832512316e-06, "loss": 40.7936, "step": 12081 }, { "epoch": 287.6686567164179, "grad_norm": 1.3733090162277222, "learning_rate": 9.400656814449918e-06, "loss": 39.835, "step": 12082 }, { "epoch": 287.6925373134328, "grad_norm": 1.3424888849258423, "learning_rate": 9.399835796387521e-06, "loss": 39.8239, "step": 12083 }, { "epoch": 287.7164179104478, "grad_norm": 1.3710572719573975, "learning_rate": 9.399014778325124e-06, "loss": 39.0114, "step": 12084 }, { "epoch": 287.7402985074627, "grad_norm": 1.345268964767456, "learning_rate": 9.398193760262726e-06, "loss": 39.0652, "step": 12085 }, { "epoch": 287.7641791044776, "grad_norm": 1.359329104423523, "learning_rate": 9.397372742200329e-06, "loss": 40.3537, "step": 12086 }, { "epoch": 287.78805970149256, "grad_norm": 1.3831673860549927, "learning_rate": 9.396551724137931e-06, "loss": 39.5398, "step": 12087 }, { "epoch": 287.81194029850747, "grad_norm": 1.3822691440582275, "learning_rate": 9.395730706075534e-06, "loss": 39.2652, "step": 12088 }, { "epoch": 287.8358208955224, "grad_norm": 1.3973900079727173, "learning_rate": 9.394909688013137e-06, "loss": 39.5414, "step": 12089 }, { "epoch": 287.85970149253734, "grad_norm": 1.3571640253067017, "learning_rate": 9.39408866995074e-06, "loss": 40.3729, "step": 12090 }, { "epoch": 287.88358208955225, "grad_norm": 1.3366930484771729, "learning_rate": 9.393267651888342e-06, "loss": 38.4591, "step": 12091 }, { "epoch": 287.90746268656716, "grad_norm": 1.362608551979065, "learning_rate": 9.392446633825945e-06, "loss": 38.5737, "step": 12092 }, { "epoch": 287.93134328358207, "grad_norm": 1.404809832572937, "learning_rate": 9.391625615763547e-06, "loss": 40.3336, "step": 12093 }, { "epoch": 287.95522388059703, "grad_norm": 1.3897837400436401, "learning_rate": 9.39080459770115e-06, "loss": 40.6722, "step": 12094 }, { "epoch": 287.97910447761194, "grad_norm": 1.407723069190979, "learning_rate": 9.389983579638752e-06, "loss": 39.2952, "step": 12095 }, { "epoch": 288.0, "grad_norm": 1.3592262268066406, "learning_rate": 9.389162561576357e-06, "loss": 35.5168, "step": 12096 }, { "epoch": 288.0238805970149, "grad_norm": 1.4204654693603516, "learning_rate": 9.388341543513958e-06, "loss": 39.9802, "step": 12097 }, { "epoch": 288.0477611940299, "grad_norm": 1.357765555381775, "learning_rate": 9.387520525451562e-06, "loss": 39.9175, "step": 12098 }, { "epoch": 288.0716417910448, "grad_norm": 1.325820803642273, "learning_rate": 9.386699507389163e-06, "loss": 40.3686, "step": 12099 }, { "epoch": 288.0955223880597, "grad_norm": 1.3697806596755981, "learning_rate": 9.385878489326767e-06, "loss": 39.062, "step": 12100 }, { "epoch": 288.1194029850746, "grad_norm": 1.3903807401657104, "learning_rate": 9.385057471264368e-06, "loss": 39.7437, "step": 12101 }, { "epoch": 288.14328358208957, "grad_norm": 1.3544834852218628, "learning_rate": 9.38423645320197e-06, "loss": 39.98, "step": 12102 }, { "epoch": 288.1671641791045, "grad_norm": 1.3568726778030396, "learning_rate": 9.383415435139573e-06, "loss": 40.8354, "step": 12103 }, { "epoch": 288.1910447761194, "grad_norm": 1.3842867612838745, "learning_rate": 9.382594417077176e-06, "loss": 38.8091, "step": 12104 }, { "epoch": 288.21492537313435, "grad_norm": 1.3769088983535767, "learning_rate": 9.381773399014778e-06, "loss": 40.1718, "step": 12105 }, { "epoch": 288.23880597014926, "grad_norm": 1.3711633682250977, "learning_rate": 9.380952380952381e-06, "loss": 40.7246, "step": 12106 }, { "epoch": 288.26268656716417, "grad_norm": 1.3942248821258545, "learning_rate": 9.380131362889985e-06, "loss": 38.9379, "step": 12107 }, { "epoch": 288.28656716417913, "grad_norm": 1.79082190990448, "learning_rate": 9.379310344827586e-06, "loss": 40.3823, "step": 12108 }, { "epoch": 288.31044776119404, "grad_norm": 1.4232256412506104, "learning_rate": 9.37848932676519e-06, "loss": 40.0074, "step": 12109 }, { "epoch": 288.33432835820895, "grad_norm": 1.385221004486084, "learning_rate": 9.377668308702792e-06, "loss": 39.9915, "step": 12110 }, { "epoch": 288.35820895522386, "grad_norm": 1.330129861831665, "learning_rate": 9.376847290640396e-06, "loss": 39.8017, "step": 12111 }, { "epoch": 288.3820895522388, "grad_norm": 1.3856208324432373, "learning_rate": 9.376026272577997e-06, "loss": 39.9434, "step": 12112 }, { "epoch": 288.40597014925373, "grad_norm": 1.368296504020691, "learning_rate": 9.375205254515601e-06, "loss": 40.7453, "step": 12113 }, { "epoch": 288.42985074626864, "grad_norm": 1.3850822448730469, "learning_rate": 9.374384236453202e-06, "loss": 39.4254, "step": 12114 }, { "epoch": 288.4537313432836, "grad_norm": 1.3246488571166992, "learning_rate": 9.373563218390805e-06, "loss": 40.1301, "step": 12115 }, { "epoch": 288.4776119402985, "grad_norm": 1.3333349227905273, "learning_rate": 9.372742200328409e-06, "loss": 39.8603, "step": 12116 }, { "epoch": 288.5014925373134, "grad_norm": 1.3391931056976318, "learning_rate": 9.37192118226601e-06, "loss": 40.5734, "step": 12117 }, { "epoch": 288.52537313432833, "grad_norm": 1.3819667100906372, "learning_rate": 9.371100164203614e-06, "loss": 39.0863, "step": 12118 }, { "epoch": 288.5492537313433, "grad_norm": 1.3435198068618774, "learning_rate": 9.370279146141215e-06, "loss": 38.2158, "step": 12119 }, { "epoch": 288.5731343283582, "grad_norm": 1.373004674911499, "learning_rate": 9.36945812807882e-06, "loss": 39.9053, "step": 12120 }, { "epoch": 288.5970149253731, "grad_norm": 1.3708373308181763, "learning_rate": 9.36863711001642e-06, "loss": 38.8053, "step": 12121 }, { "epoch": 288.6208955223881, "grad_norm": 1.3539625406265259, "learning_rate": 9.367816091954025e-06, "loss": 39.4629, "step": 12122 }, { "epoch": 288.644776119403, "grad_norm": 1.3799657821655273, "learning_rate": 9.366995073891626e-06, "loss": 40.0463, "step": 12123 }, { "epoch": 288.6686567164179, "grad_norm": 1.3700947761535645, "learning_rate": 9.36617405582923e-06, "loss": 39.84, "step": 12124 }, { "epoch": 288.6925373134328, "grad_norm": 1.3710906505584717, "learning_rate": 9.36535303776683e-06, "loss": 40.6968, "step": 12125 }, { "epoch": 288.7164179104478, "grad_norm": 1.3552796840667725, "learning_rate": 9.364532019704435e-06, "loss": 39.521, "step": 12126 }, { "epoch": 288.7402985074627, "grad_norm": 1.3565925359725952, "learning_rate": 9.363711001642038e-06, "loss": 39.4545, "step": 12127 }, { "epoch": 288.7641791044776, "grad_norm": 1.3954670429229736, "learning_rate": 9.362889983579639e-06, "loss": 38.4475, "step": 12128 }, { "epoch": 288.78805970149256, "grad_norm": 1.389497995376587, "learning_rate": 9.362068965517243e-06, "loss": 40.1049, "step": 12129 }, { "epoch": 288.81194029850747, "grad_norm": 1.3239139318466187, "learning_rate": 9.361247947454844e-06, "loss": 40.175, "step": 12130 }, { "epoch": 288.8358208955224, "grad_norm": 1.3536763191223145, "learning_rate": 9.360426929392448e-06, "loss": 38.8056, "step": 12131 }, { "epoch": 288.85970149253734, "grad_norm": 1.3398267030715942, "learning_rate": 9.359605911330049e-06, "loss": 40.3835, "step": 12132 }, { "epoch": 288.88358208955225, "grad_norm": 1.3339135646820068, "learning_rate": 9.358784893267653e-06, "loss": 39.9772, "step": 12133 }, { "epoch": 288.90746268656716, "grad_norm": 1.3771159648895264, "learning_rate": 9.357963875205254e-06, "loss": 40.2363, "step": 12134 }, { "epoch": 288.93134328358207, "grad_norm": 1.380162239074707, "learning_rate": 9.357142857142859e-06, "loss": 39.3156, "step": 12135 }, { "epoch": 288.95522388059703, "grad_norm": 1.3060262203216553, "learning_rate": 9.35632183908046e-06, "loss": 39.4289, "step": 12136 }, { "epoch": 288.97910447761194, "grad_norm": 1.3248320817947388, "learning_rate": 9.355500821018064e-06, "loss": 38.7721, "step": 12137 }, { "epoch": 289.0, "grad_norm": 1.3786442279815674, "learning_rate": 9.354679802955666e-06, "loss": 34.1223, "step": 12138 }, { "epoch": 289.0238805970149, "grad_norm": 1.388615608215332, "learning_rate": 9.353858784893269e-06, "loss": 40.1124, "step": 12139 }, { "epoch": 289.0477611940299, "grad_norm": 1.3671848773956299, "learning_rate": 9.353037766830872e-06, "loss": 38.9666, "step": 12140 }, { "epoch": 289.0716417910448, "grad_norm": 1.3798962831497192, "learning_rate": 9.352216748768474e-06, "loss": 39.4771, "step": 12141 }, { "epoch": 289.0955223880597, "grad_norm": 1.4101697206497192, "learning_rate": 9.351395730706077e-06, "loss": 40.1218, "step": 12142 }, { "epoch": 289.1194029850746, "grad_norm": 1.3331687450408936, "learning_rate": 9.350574712643678e-06, "loss": 39.3175, "step": 12143 }, { "epoch": 289.14328358208957, "grad_norm": 1.3946161270141602, "learning_rate": 9.349753694581282e-06, "loss": 39.966, "step": 12144 }, { "epoch": 289.1671641791045, "grad_norm": 1.3444677591323853, "learning_rate": 9.348932676518883e-06, "loss": 39.3894, "step": 12145 }, { "epoch": 289.1910447761194, "grad_norm": 1.36371910572052, "learning_rate": 9.348111658456487e-06, "loss": 39.2107, "step": 12146 }, { "epoch": 289.21492537313435, "grad_norm": 1.4051777124404907, "learning_rate": 9.34729064039409e-06, "loss": 40.5467, "step": 12147 }, { "epoch": 289.23880597014926, "grad_norm": 1.384892463684082, "learning_rate": 9.346469622331693e-06, "loss": 39.1824, "step": 12148 }, { "epoch": 289.26268656716417, "grad_norm": 1.3494964838027954, "learning_rate": 9.345648604269295e-06, "loss": 40.8784, "step": 12149 }, { "epoch": 289.28656716417913, "grad_norm": 1.3326315879821777, "learning_rate": 9.344827586206898e-06, "loss": 40.0093, "step": 12150 }, { "epoch": 289.31044776119404, "grad_norm": 1.3965115547180176, "learning_rate": 9.3440065681445e-06, "loss": 39.3581, "step": 12151 }, { "epoch": 289.33432835820895, "grad_norm": 1.4182722568511963, "learning_rate": 9.343185550082103e-06, "loss": 38.8696, "step": 12152 }, { "epoch": 289.35820895522386, "grad_norm": 1.399808645248413, "learning_rate": 9.342364532019706e-06, "loss": 39.6902, "step": 12153 }, { "epoch": 289.3820895522388, "grad_norm": 1.3702127933502197, "learning_rate": 9.341543513957308e-06, "loss": 38.7257, "step": 12154 }, { "epoch": 289.40597014925373, "grad_norm": 1.3922392129898071, "learning_rate": 9.34072249589491e-06, "loss": 38.9035, "step": 12155 }, { "epoch": 289.42985074626864, "grad_norm": 1.3710225820541382, "learning_rate": 9.339901477832512e-06, "loss": 39.9873, "step": 12156 }, { "epoch": 289.4537313432836, "grad_norm": 1.4242440462112427, "learning_rate": 9.339080459770116e-06, "loss": 39.0114, "step": 12157 }, { "epoch": 289.4776119402985, "grad_norm": 1.4024817943572998, "learning_rate": 9.338259441707719e-06, "loss": 38.454, "step": 12158 }, { "epoch": 289.5014925373134, "grad_norm": 1.3712811470031738, "learning_rate": 9.337438423645321e-06, "loss": 39.5811, "step": 12159 }, { "epoch": 289.52537313432833, "grad_norm": 1.3915514945983887, "learning_rate": 9.336617405582924e-06, "loss": 40.7422, "step": 12160 }, { "epoch": 289.5492537313433, "grad_norm": 1.3839598894119263, "learning_rate": 9.335796387520526e-06, "loss": 40.6867, "step": 12161 }, { "epoch": 289.5731343283582, "grad_norm": 1.3878278732299805, "learning_rate": 9.334975369458129e-06, "loss": 39.1991, "step": 12162 }, { "epoch": 289.5970149253731, "grad_norm": 1.390815258026123, "learning_rate": 9.334154351395732e-06, "loss": 40.3274, "step": 12163 }, { "epoch": 289.6208955223881, "grad_norm": 1.5924125909805298, "learning_rate": 9.333333333333334e-06, "loss": 39.2823, "step": 12164 }, { "epoch": 289.644776119403, "grad_norm": 1.3608462810516357, "learning_rate": 9.332512315270937e-06, "loss": 40.4873, "step": 12165 }, { "epoch": 289.6686567164179, "grad_norm": NaN, "learning_rate": 9.33169129720854e-06, "loss": 54.1066, "step": 12166 }, { "epoch": 289.6925373134328, "grad_norm": 1.3943407535552979, "learning_rate": 9.33169129720854e-06, "loss": 40.0158, "step": 12167 }, { "epoch": 289.7164179104478, "grad_norm": 1.4122951030731201, "learning_rate": 9.330870279146142e-06, "loss": 40.4341, "step": 12168 }, { "epoch": 289.7402985074627, "grad_norm": 1.3910112380981445, "learning_rate": 9.330049261083745e-06, "loss": 39.9707, "step": 12169 }, { "epoch": 289.7641791044776, "grad_norm": 1.3792535066604614, "learning_rate": 9.329228243021347e-06, "loss": 40.3031, "step": 12170 }, { "epoch": 289.78805970149256, "grad_norm": 1.4171795845031738, "learning_rate": 9.32840722495895e-06, "loss": 40.015, "step": 12171 }, { "epoch": 289.81194029850747, "grad_norm": 1.3531039953231812, "learning_rate": 9.327586206896553e-06, "loss": 40.6819, "step": 12172 }, { "epoch": 289.8358208955224, "grad_norm": 1.3999453783035278, "learning_rate": 9.326765188834155e-06, "loss": 39.1966, "step": 12173 }, { "epoch": 289.85970149253734, "grad_norm": 1.6180684566497803, "learning_rate": 9.325944170771758e-06, "loss": 39.0058, "step": 12174 }, { "epoch": 289.88358208955225, "grad_norm": 1.3501311540603638, "learning_rate": 9.32512315270936e-06, "loss": 40.8581, "step": 12175 }, { "epoch": 289.90746268656716, "grad_norm": 1.3916213512420654, "learning_rate": 9.324302134646963e-06, "loss": 39.2074, "step": 12176 }, { "epoch": 289.93134328358207, "grad_norm": 1.3789334297180176, "learning_rate": 9.323481116584566e-06, "loss": 39.6283, "step": 12177 }, { "epoch": 289.95522388059703, "grad_norm": 1.3730876445770264, "learning_rate": 9.322660098522168e-06, "loss": 39.7883, "step": 12178 }, { "epoch": 289.97910447761194, "grad_norm": 1.3392094373703003, "learning_rate": 9.321839080459771e-06, "loss": 39.7968, "step": 12179 }, { "epoch": 290.0, "grad_norm": 1.3926756381988525, "learning_rate": 9.321018062397374e-06, "loss": 35.8075, "step": 12180 }, { "epoch": 290.0, "step": 12180, "total_flos": 6.047156584824383e+17, "train_loss": 2.743577060793421, "train_runtime": 25250.3561, "train_samples_per_second": 61.468, "train_steps_per_second": 0.482 }, { "epoch": 290.0238805970149, "grad_norm": 2.187229633331299, "learning_rate": 1e-05, "loss": 39.6158, "step": 12181 }, { "epoch": 290.0477611940299, "grad_norm": 2.125854730606079, "learning_rate": 9.999231950844855e-06, "loss": 40.2585, "step": 12182 }, { "epoch": 290.0716417910448, "grad_norm": 2.0182690620422363, "learning_rate": 9.99846390168971e-06, "loss": 39.4181, "step": 12183 }, { "epoch": 290.0955223880597, "grad_norm": 1.7249590158462524, "learning_rate": 9.997695852534564e-06, "loss": 40.6437, "step": 12184 }, { "epoch": 290.1194029850746, "grad_norm": 1.5269007682800293, "learning_rate": 9.996927803379417e-06, "loss": 40.5434, "step": 12185 }, { "epoch": 290.14328358208957, "grad_norm": 1.5596530437469482, "learning_rate": 9.996159754224271e-06, "loss": 38.7229, "step": 12186 }, { "epoch": 290.1671641791045, "grad_norm": 1.4000715017318726, "learning_rate": 9.995391705069125e-06, "loss": 40.2536, "step": 12187 }, { "epoch": 290.1910447761194, "grad_norm": 1.3920328617095947, "learning_rate": 9.99462365591398e-06, "loss": 39.2354, "step": 12188 }, { "epoch": 290.21492537313435, "grad_norm": 1.462067723274231, "learning_rate": 9.993855606758833e-06, "loss": 40.3453, "step": 12189 }, { "epoch": 290.23880597014926, "grad_norm": 1.4347699880599976, "learning_rate": 9.993087557603689e-06, "loss": 38.4607, "step": 12190 }, { "epoch": 290.26268656716417, "grad_norm": 1.4966435432434082, "learning_rate": 9.992319508448541e-06, "loss": 38.4966, "step": 12191 }, { "epoch": 290.28656716417913, "grad_norm": 1.4947675466537476, "learning_rate": 9.991551459293396e-06, "loss": 40.3305, "step": 12192 }, { "epoch": 290.31044776119404, "grad_norm": 1.46028733253479, "learning_rate": 9.99078341013825e-06, "loss": 40.7457, "step": 12193 }, { "epoch": 290.33432835820895, "grad_norm": 1.4292991161346436, "learning_rate": 9.990015360983104e-06, "loss": 39.7818, "step": 12194 }, { "epoch": 290.35820895522386, "grad_norm": 1.514564037322998, "learning_rate": 9.989247311827957e-06, "loss": 39.3232, "step": 12195 }, { "epoch": 290.3820895522388, "grad_norm": 1.3935164213180542, "learning_rate": 9.988479262672812e-06, "loss": 40.2875, "step": 12196 }, { "epoch": 290.40597014925373, "grad_norm": 1.3667718172073364, "learning_rate": 9.987711213517666e-06, "loss": 40.2339, "step": 12197 }, { "epoch": 290.42985074626864, "grad_norm": 1.3805837631225586, "learning_rate": 9.98694316436252e-06, "loss": 40.0503, "step": 12198 }, { "epoch": 290.4537313432836, "grad_norm": 1.3399449586868286, "learning_rate": 9.986175115207373e-06, "loss": 40.0582, "step": 12199 }, { "epoch": 290.4776119402985, "grad_norm": 1.3478096723556519, "learning_rate": 9.985407066052229e-06, "loss": 38.8075, "step": 12200 }, { "epoch": 290.5014925373134, "grad_norm": 1.3784856796264648, "learning_rate": 9.984639016897082e-06, "loss": 38.8901, "step": 12201 }, { "epoch": 290.52537313432833, "grad_norm": 1.4024072885513306, "learning_rate": 9.983870967741936e-06, "loss": 39.6142, "step": 12202 }, { "epoch": 290.5492537313433, "grad_norm": 1.3380460739135742, "learning_rate": 9.98310291858679e-06, "loss": 38.7442, "step": 12203 }, { "epoch": 290.5731343283582, "grad_norm": 1.3753461837768555, "learning_rate": 9.982334869431645e-06, "loss": 39.5337, "step": 12204 }, { "epoch": 290.5970149253731, "grad_norm": 1.3877496719360352, "learning_rate": 9.981566820276498e-06, "loss": 41.2327, "step": 12205 }, { "epoch": 290.6208955223881, "grad_norm": 1.3479077816009521, "learning_rate": 9.980798771121352e-06, "loss": 40.2475, "step": 12206 }, { "epoch": 290.644776119403, "grad_norm": 1.4183753728866577, "learning_rate": 9.980030721966206e-06, "loss": 38.8086, "step": 12207 }, { "epoch": 290.6686567164179, "grad_norm": 1.3632620573043823, "learning_rate": 9.97926267281106e-06, "loss": 40.4756, "step": 12208 }, { "epoch": 290.6925373134328, "grad_norm": 1.3214328289031982, "learning_rate": 9.978494623655915e-06, "loss": 40.5179, "step": 12209 }, { "epoch": 290.7164179104478, "grad_norm": 1.3546708822250366, "learning_rate": 9.97772657450077e-06, "loss": 39.9463, "step": 12210 }, { "epoch": 290.7402985074627, "grad_norm": 1.3408929109573364, "learning_rate": 9.976958525345622e-06, "loss": 38.019, "step": 12211 }, { "epoch": 290.7641791044776, "grad_norm": 1.4309005737304688, "learning_rate": 9.976190476190477e-06, "loss": 40.6944, "step": 12212 }, { "epoch": 290.78805970149256, "grad_norm": 1.3636168241500854, "learning_rate": 9.975422427035331e-06, "loss": 40.59, "step": 12213 }, { "epoch": 290.81194029850747, "grad_norm": 1.360452651977539, "learning_rate": 9.974654377880185e-06, "loss": 40.4378, "step": 12214 }, { "epoch": 290.8358208955224, "grad_norm": 1.344726800918579, "learning_rate": 9.973886328725038e-06, "loss": 40.4532, "step": 12215 }, { "epoch": 290.85970149253734, "grad_norm": 1.4301180839538574, "learning_rate": 9.973118279569894e-06, "loss": 40.2277, "step": 12216 }, { "epoch": 290.88358208955225, "grad_norm": 1.4405748844146729, "learning_rate": 9.972350230414747e-06, "loss": 40.5534, "step": 12217 }, { "epoch": 290.90746268656716, "grad_norm": 1.3778495788574219, "learning_rate": 9.971582181259601e-06, "loss": 40.653, "step": 12218 }, { "epoch": 290.93134328358207, "grad_norm": 1.396307110786438, "learning_rate": 9.970814132104456e-06, "loss": 38.9595, "step": 12219 }, { "epoch": 290.95522388059703, "grad_norm": 1.3496581315994263, "learning_rate": 9.97004608294931e-06, "loss": 40.3811, "step": 12220 }, { "epoch": 290.97910447761194, "grad_norm": 1.4102702140808105, "learning_rate": 9.969278033794164e-06, "loss": 40.0592, "step": 12221 }, { "epoch": 291.0, "grad_norm": 1.323774814605713, "learning_rate": 9.968509984639017e-06, "loss": 34.6397, "step": 12222 }, { "epoch": 291.0238805970149, "grad_norm": 1.3738030195236206, "learning_rate": 9.967741935483871e-06, "loss": 39.6393, "step": 12223 }, { "epoch": 291.0477611940299, "grad_norm": 1.351022481918335, "learning_rate": 9.966973886328726e-06, "loss": 40.2416, "step": 12224 }, { "epoch": 291.0716417910448, "grad_norm": 1.3363291025161743, "learning_rate": 9.96620583717358e-06, "loss": 40.4824, "step": 12225 }, { "epoch": 291.0955223880597, "grad_norm": 1.352083683013916, "learning_rate": 9.965437788018435e-06, "loss": 39.4171, "step": 12226 }, { "epoch": 291.1194029850746, "grad_norm": 1.3153940439224243, "learning_rate": 9.964669738863289e-06, "loss": 40.12, "step": 12227 }, { "epoch": 291.14328358208957, "grad_norm": 1.3269046545028687, "learning_rate": 9.963901689708142e-06, "loss": 39.4338, "step": 12228 }, { "epoch": 291.1671641791045, "grad_norm": 1.3592276573181152, "learning_rate": 9.963133640552996e-06, "loss": 39.8768, "step": 12229 }, { "epoch": 291.1910447761194, "grad_norm": 1.3785326480865479, "learning_rate": 9.96236559139785e-06, "loss": 39.1169, "step": 12230 }, { "epoch": 291.21492537313435, "grad_norm": 1.344862461090088, "learning_rate": 9.961597542242705e-06, "loss": 39.2464, "step": 12231 }, { "epoch": 291.23880597014926, "grad_norm": 1.3600770235061646, "learning_rate": 9.960829493087558e-06, "loss": 39.8707, "step": 12232 }, { "epoch": 291.26268656716417, "grad_norm": 1.4016928672790527, "learning_rate": 9.960061443932414e-06, "loss": 40.242, "step": 12233 }, { "epoch": 291.28656716417913, "grad_norm": 1.317816972732544, "learning_rate": 9.959293394777266e-06, "loss": 38.6184, "step": 12234 }, { "epoch": 291.31044776119404, "grad_norm": 1.3038744926452637, "learning_rate": 9.95852534562212e-06, "loss": 41.1386, "step": 12235 }, { "epoch": 291.33432835820895, "grad_norm": 1.3284037113189697, "learning_rate": 9.957757296466975e-06, "loss": 39.6775, "step": 12236 }, { "epoch": 291.35820895522386, "grad_norm": 1.335965871810913, "learning_rate": 9.95698924731183e-06, "loss": 40.432, "step": 12237 }, { "epoch": 291.3820895522388, "grad_norm": 1.333279013633728, "learning_rate": 9.956221198156682e-06, "loss": 39.9944, "step": 12238 }, { "epoch": 291.40597014925373, "grad_norm": 1.3777012825012207, "learning_rate": 9.955453149001537e-06, "loss": 39.3127, "step": 12239 }, { "epoch": 291.42985074626864, "grad_norm": 1.424473524093628, "learning_rate": 9.954685099846391e-06, "loss": 40.2671, "step": 12240 }, { "epoch": 291.4537313432836, "grad_norm": 1.4042421579360962, "learning_rate": 9.953917050691245e-06, "loss": 38.4254, "step": 12241 }, { "epoch": 291.4776119402985, "grad_norm": 1.384373664855957, "learning_rate": 9.953149001536098e-06, "loss": 40.4964, "step": 12242 }, { "epoch": 291.5014925373134, "grad_norm": 1.3952655792236328, "learning_rate": 9.952380952380954e-06, "loss": 40.0472, "step": 12243 }, { "epoch": 291.52537313432833, "grad_norm": 1.3109982013702393, "learning_rate": 9.951612903225807e-06, "loss": 39.8129, "step": 12244 }, { "epoch": 291.5492537313433, "grad_norm": 1.3055343627929688, "learning_rate": 9.950844854070661e-06, "loss": 39.2807, "step": 12245 }, { "epoch": 291.5731343283582, "grad_norm": 1.3499794006347656, "learning_rate": 9.950076804915516e-06, "loss": 39.3244, "step": 12246 }, { "epoch": 291.5970149253731, "grad_norm": 1.4079376459121704, "learning_rate": 9.94930875576037e-06, "loss": 39.2825, "step": 12247 }, { "epoch": 291.6208955223881, "grad_norm": 1.3798563480377197, "learning_rate": 9.948540706605223e-06, "loss": 39.7619, "step": 12248 }, { "epoch": 291.644776119403, "grad_norm": 1.3394330739974976, "learning_rate": 9.947772657450077e-06, "loss": 40.0787, "step": 12249 }, { "epoch": 291.6686567164179, "grad_norm": 1.449321985244751, "learning_rate": 9.947004608294931e-06, "loss": 40.1502, "step": 12250 }, { "epoch": 291.6925373134328, "grad_norm": 1.363323450088501, "learning_rate": 9.946236559139786e-06, "loss": 39.9351, "step": 12251 }, { "epoch": 291.7164179104478, "grad_norm": 1.3477826118469238, "learning_rate": 9.94546850998464e-06, "loss": 40.059, "step": 12252 }, { "epoch": 291.7402985074627, "grad_norm": 1.3488996028900146, "learning_rate": 9.944700460829495e-06, "loss": 40.5704, "step": 12253 }, { "epoch": 291.7641791044776, "grad_norm": 1.3353321552276611, "learning_rate": 9.943932411674347e-06, "loss": 39.9376, "step": 12254 }, { "epoch": 291.78805970149256, "grad_norm": 1.315295934677124, "learning_rate": 9.943164362519202e-06, "loss": 39.408, "step": 12255 }, { "epoch": 291.81194029850747, "grad_norm": 1.3749802112579346, "learning_rate": 9.942396313364056e-06, "loss": 39.5065, "step": 12256 }, { "epoch": 291.8358208955224, "grad_norm": 1.3404279947280884, "learning_rate": 9.94162826420891e-06, "loss": 39.4905, "step": 12257 }, { "epoch": 291.85970149253734, "grad_norm": 1.383751630783081, "learning_rate": 9.940860215053765e-06, "loss": 40.9799, "step": 12258 }, { "epoch": 291.88358208955225, "grad_norm": 1.3198999166488647, "learning_rate": 9.940092165898617e-06, "loss": 40.857, "step": 12259 }, { "epoch": 291.90746268656716, "grad_norm": 1.4303897619247437, "learning_rate": 9.939324116743474e-06, "loss": 40.3084, "step": 12260 }, { "epoch": 291.93134328358207, "grad_norm": 1.359878659248352, "learning_rate": 9.938556067588326e-06, "loss": 40.3561, "step": 12261 }, { "epoch": 291.95522388059703, "grad_norm": 1.3528491258621216, "learning_rate": 9.93778801843318e-06, "loss": 39.0939, "step": 12262 }, { "epoch": 291.97910447761194, "grad_norm": 1.393554925918579, "learning_rate": 9.937019969278035e-06, "loss": 39.1836, "step": 12263 }, { "epoch": 292.0, "grad_norm": 1.4480029344558716, "learning_rate": 9.93625192012289e-06, "loss": 34.9114, "step": 12264 }, { "epoch": 292.0238805970149, "grad_norm": 1.370217204093933, "learning_rate": 9.935483870967742e-06, "loss": 40.8482, "step": 12265 }, { "epoch": 292.0477611940299, "grad_norm": 1.3414603471755981, "learning_rate": 9.934715821812596e-06, "loss": 40.6067, "step": 12266 }, { "epoch": 292.0716417910448, "grad_norm": 1.3751657009124756, "learning_rate": 9.93394777265745e-06, "loss": 38.8394, "step": 12267 }, { "epoch": 292.0955223880597, "grad_norm": 1.4750065803527832, "learning_rate": 9.933179723502305e-06, "loss": 39.5157, "step": 12268 }, { "epoch": 292.1194029850746, "grad_norm": 1.3580468893051147, "learning_rate": 9.93241167434716e-06, "loss": 40.3099, "step": 12269 }, { "epoch": 292.14328358208957, "grad_norm": 1.396543264389038, "learning_rate": 9.931643625192014e-06, "loss": 40.6873, "step": 12270 }, { "epoch": 292.1671641791045, "grad_norm": 1.31378972530365, "learning_rate": 9.930875576036867e-06, "loss": 39.7371, "step": 12271 }, { "epoch": 292.1910447761194, "grad_norm": 1.377289056777954, "learning_rate": 9.930107526881721e-06, "loss": 40.2841, "step": 12272 }, { "epoch": 292.21492537313435, "grad_norm": 1.3177748918533325, "learning_rate": 9.929339477726575e-06, "loss": 39.3389, "step": 12273 }, { "epoch": 292.23880597014926, "grad_norm": 1.3776971101760864, "learning_rate": 9.92857142857143e-06, "loss": 39.892, "step": 12274 }, { "epoch": 292.26268656716417, "grad_norm": 1.3762120008468628, "learning_rate": 9.927803379416283e-06, "loss": 40.4783, "step": 12275 }, { "epoch": 292.28656716417913, "grad_norm": 1.3475521802902222, "learning_rate": 9.927035330261137e-06, "loss": 39.2856, "step": 12276 }, { "epoch": 292.31044776119404, "grad_norm": 1.345407247543335, "learning_rate": 9.926267281105991e-06, "loss": 40.5259, "step": 12277 }, { "epoch": 292.33432835820895, "grad_norm": 1.3649095296859741, "learning_rate": 9.925499231950846e-06, "loss": 39.4986, "step": 12278 }, { "epoch": 292.35820895522386, "grad_norm": 1.378341794013977, "learning_rate": 9.9247311827957e-06, "loss": 39.7791, "step": 12279 }, { "epoch": 292.3820895522388, "grad_norm": 1.3245428800582886, "learning_rate": 9.923963133640554e-06, "loss": 39.0579, "step": 12280 }, { "epoch": 292.40597014925373, "grad_norm": 1.3379567861557007, "learning_rate": 9.923195084485407e-06, "loss": 39.509, "step": 12281 }, { "epoch": 292.42985074626864, "grad_norm": 1.3211290836334229, "learning_rate": 9.922427035330262e-06, "loss": 40.2841, "step": 12282 }, { "epoch": 292.4537313432836, "grad_norm": 1.3774676322937012, "learning_rate": 9.921658986175116e-06, "loss": 39.8022, "step": 12283 }, { "epoch": 292.4776119402985, "grad_norm": 1.363139033317566, "learning_rate": 9.92089093701997e-06, "loss": 40.1412, "step": 12284 }, { "epoch": 292.5014925373134, "grad_norm": 1.3443852663040161, "learning_rate": 9.920122887864823e-06, "loss": 40.1017, "step": 12285 }, { "epoch": 292.52537313432833, "grad_norm": 1.3331952095031738, "learning_rate": 9.919354838709679e-06, "loss": 39.833, "step": 12286 }, { "epoch": 292.5492537313433, "grad_norm": 1.3992940187454224, "learning_rate": 9.918586789554532e-06, "loss": 39.5839, "step": 12287 }, { "epoch": 292.5731343283582, "grad_norm": 1.3336906433105469, "learning_rate": 9.917818740399386e-06, "loss": 40.1739, "step": 12288 }, { "epoch": 292.5970149253731, "grad_norm": 1.355184555053711, "learning_rate": 9.91705069124424e-06, "loss": 39.5491, "step": 12289 }, { "epoch": 292.6208955223881, "grad_norm": 1.3898571729660034, "learning_rate": 9.916282642089095e-06, "loss": 39.9478, "step": 12290 }, { "epoch": 292.644776119403, "grad_norm": 1.380679726600647, "learning_rate": 9.91551459293395e-06, "loss": 40.3048, "step": 12291 }, { "epoch": 292.6686567164179, "grad_norm": 1.35354745388031, "learning_rate": 9.914746543778802e-06, "loss": 38.1638, "step": 12292 }, { "epoch": 292.6925373134328, "grad_norm": 1.3399064540863037, "learning_rate": 9.913978494623658e-06, "loss": 39.3924, "step": 12293 }, { "epoch": 292.7164179104478, "grad_norm": 1.3798272609710693, "learning_rate": 9.91321044546851e-06, "loss": 39.9045, "step": 12294 }, { "epoch": 292.7402985074627, "grad_norm": 1.3887202739715576, "learning_rate": 9.912442396313365e-06, "loss": 40.2242, "step": 12295 }, { "epoch": 292.7641791044776, "grad_norm": 1.3300719261169434, "learning_rate": 9.91167434715822e-06, "loss": 40.2678, "step": 12296 }, { "epoch": 292.78805970149256, "grad_norm": 1.329648733139038, "learning_rate": 9.910906298003074e-06, "loss": 41.007, "step": 12297 }, { "epoch": 292.81194029850747, "grad_norm": 1.3512160778045654, "learning_rate": 9.910138248847927e-06, "loss": 39.0251, "step": 12298 }, { "epoch": 292.8358208955224, "grad_norm": 1.3126676082611084, "learning_rate": 9.909370199692781e-06, "loss": 41.6291, "step": 12299 }, { "epoch": 292.85970149253734, "grad_norm": 1.347164511680603, "learning_rate": 9.908602150537635e-06, "loss": 39.844, "step": 12300 }, { "epoch": 292.88358208955225, "grad_norm": 1.3604413270950317, "learning_rate": 9.90783410138249e-06, "loss": 39.7656, "step": 12301 }, { "epoch": 292.90746268656716, "grad_norm": 1.354586124420166, "learning_rate": 9.907066052227342e-06, "loss": 39.2175, "step": 12302 }, { "epoch": 292.93134328358207, "grad_norm": 1.3673714399337769, "learning_rate": 9.906298003072199e-06, "loss": 39.4876, "step": 12303 }, { "epoch": 292.95522388059703, "grad_norm": 1.3120675086975098, "learning_rate": 9.905529953917051e-06, "loss": 39.4556, "step": 12304 }, { "epoch": 292.97910447761194, "grad_norm": 1.3514020442962646, "learning_rate": 9.904761904761906e-06, "loss": 39.0623, "step": 12305 }, { "epoch": 293.0, "grad_norm": 1.3507524728775024, "learning_rate": 9.90399385560676e-06, "loss": 33.7345, "step": 12306 }, { "epoch": 293.0238805970149, "grad_norm": 1.348960041999817, "learning_rate": 9.903225806451614e-06, "loss": 39.0942, "step": 12307 }, { "epoch": 293.0477611940299, "grad_norm": 1.3466382026672363, "learning_rate": 9.902457757296467e-06, "loss": 40.3468, "step": 12308 }, { "epoch": 293.0716417910448, "grad_norm": NaN, "learning_rate": 9.901689708141321e-06, "loss": 35.9798, "step": 12309 }, { "epoch": 293.0955223880597, "grad_norm": 1.3499253988265991, "learning_rate": 9.901689708141321e-06, "loss": 39.7384, "step": 12310 }, { "epoch": 293.1194029850746, "grad_norm": 1.3508185148239136, "learning_rate": 9.900921658986176e-06, "loss": 39.3318, "step": 12311 }, { "epoch": 293.14328358208957, "grad_norm": 1.3511416912078857, "learning_rate": 9.90015360983103e-06, "loss": 41.0464, "step": 12312 }, { "epoch": 293.1671641791045, "grad_norm": 1.3250442743301392, "learning_rate": 9.899385560675883e-06, "loss": 40.0788, "step": 12313 }, { "epoch": 293.1910447761194, "grad_norm": 1.3546373844146729, "learning_rate": 9.898617511520739e-06, "loss": 39.7941, "step": 12314 }, { "epoch": 293.21492537313435, "grad_norm": 1.3829472064971924, "learning_rate": 9.897849462365592e-06, "loss": 39.2881, "step": 12315 }, { "epoch": 293.23880597014926, "grad_norm": 1.3366106748580933, "learning_rate": 9.897081413210446e-06, "loss": 39.7317, "step": 12316 }, { "epoch": 293.26268656716417, "grad_norm": 1.377528429031372, "learning_rate": 9.8963133640553e-06, "loss": 39.4248, "step": 12317 }, { "epoch": 293.28656716417913, "grad_norm": 1.3175816535949707, "learning_rate": 9.895545314900155e-06, "loss": 39.9607, "step": 12318 }, { "epoch": 293.31044776119404, "grad_norm": 1.3283554315567017, "learning_rate": 9.894777265745008e-06, "loss": 37.4407, "step": 12319 }, { "epoch": 293.33432835820895, "grad_norm": 1.3043280839920044, "learning_rate": 9.894009216589862e-06, "loss": 38.885, "step": 12320 }, { "epoch": 293.35820895522386, "grad_norm": 1.3711789846420288, "learning_rate": 9.893241167434716e-06, "loss": 39.3568, "step": 12321 }, { "epoch": 293.3820895522388, "grad_norm": 1.329928994178772, "learning_rate": 9.89247311827957e-06, "loss": 41.2592, "step": 12322 }, { "epoch": 293.40597014925373, "grad_norm": 1.4058866500854492, "learning_rate": 9.891705069124425e-06, "loss": 40.4492, "step": 12323 }, { "epoch": 293.42985074626864, "grad_norm": 1.4119224548339844, "learning_rate": 9.89093701996928e-06, "loss": 40.7067, "step": 12324 }, { "epoch": 293.4537313432836, "grad_norm": 1.3351854085922241, "learning_rate": 9.890168970814132e-06, "loss": 39.1904, "step": 12325 }, { "epoch": 293.4776119402985, "grad_norm": 1.3370182514190674, "learning_rate": 9.889400921658987e-06, "loss": 38.5764, "step": 12326 }, { "epoch": 293.5014925373134, "grad_norm": 1.3411718606948853, "learning_rate": 9.888632872503841e-06, "loss": 39.7541, "step": 12327 }, { "epoch": 293.52537313432833, "grad_norm": 1.3348056077957153, "learning_rate": 9.887864823348695e-06, "loss": 39.8756, "step": 12328 }, { "epoch": 293.5492537313433, "grad_norm": 1.3462568521499634, "learning_rate": 9.88709677419355e-06, "loss": 40.531, "step": 12329 }, { "epoch": 293.5731343283582, "grad_norm": 1.3246426582336426, "learning_rate": 9.886328725038404e-06, "loss": 39.9879, "step": 12330 }, { "epoch": 293.5970149253731, "grad_norm": 1.3670644760131836, "learning_rate": 9.885560675883258e-06, "loss": 39.5868, "step": 12331 }, { "epoch": 293.6208955223881, "grad_norm": 1.3316282033920288, "learning_rate": 9.884792626728111e-06, "loss": 38.8736, "step": 12332 }, { "epoch": 293.644776119403, "grad_norm": 1.3488140106201172, "learning_rate": 9.884024577572966e-06, "loss": 41.2744, "step": 12333 }, { "epoch": 293.6686567164179, "grad_norm": 1.3216819763183594, "learning_rate": 9.88325652841782e-06, "loss": 40.1679, "step": 12334 }, { "epoch": 293.6925373134328, "grad_norm": 1.4236552715301514, "learning_rate": 9.882488479262674e-06, "loss": 41.1272, "step": 12335 }, { "epoch": 293.7164179104478, "grad_norm": 1.4192159175872803, "learning_rate": 9.881720430107527e-06, "loss": 40.1785, "step": 12336 }, { "epoch": 293.7402985074627, "grad_norm": 1.3829222917556763, "learning_rate": 9.880952380952381e-06, "loss": 39.9183, "step": 12337 }, { "epoch": 293.7641791044776, "grad_norm": 1.408171534538269, "learning_rate": 9.880184331797236e-06, "loss": 39.6263, "step": 12338 }, { "epoch": 293.78805970149256, "grad_norm": NaN, "learning_rate": 9.87941628264209e-06, "loss": 68.5249, "step": 12339 }, { "epoch": 293.81194029850747, "grad_norm": 1.3765614032745361, "learning_rate": 9.87941628264209e-06, "loss": 39.841, "step": 12340 }, { "epoch": 293.8358208955224, "grad_norm": 1.3446934223175049, "learning_rate": 9.878648233486945e-06, "loss": 39.4282, "step": 12341 }, { "epoch": 293.85970149253734, "grad_norm": 1.3979926109313965, "learning_rate": 9.877880184331799e-06, "loss": 39.7232, "step": 12342 }, { "epoch": 293.88358208955225, "grad_norm": 1.3641797304153442, "learning_rate": 9.877112135176652e-06, "loss": 40.5165, "step": 12343 }, { "epoch": 293.90746268656716, "grad_norm": 1.3722354173660278, "learning_rate": 9.876344086021506e-06, "loss": 40.2726, "step": 12344 }, { "epoch": 293.93134328358207, "grad_norm": 1.3426923751831055, "learning_rate": 9.87557603686636e-06, "loss": 38.9672, "step": 12345 }, { "epoch": 293.95522388059703, "grad_norm": 1.3693019151687622, "learning_rate": 9.874807987711215e-06, "loss": 39.8454, "step": 12346 }, { "epoch": 293.97910447761194, "grad_norm": 1.3184973001480103, "learning_rate": 9.874039938556067e-06, "loss": 39.5763, "step": 12347 }, { "epoch": 294.0, "grad_norm": 1.3707966804504395, "learning_rate": 9.873271889400924e-06, "loss": 34.3092, "step": 12348 }, { "epoch": 294.0238805970149, "grad_norm": 1.448770523071289, "learning_rate": 9.872503840245776e-06, "loss": 39.5729, "step": 12349 }, { "epoch": 294.0477611940299, "grad_norm": 1.3332648277282715, "learning_rate": 9.87173579109063e-06, "loss": 40.1094, "step": 12350 }, { "epoch": 294.0716417910448, "grad_norm": 1.3298394680023193, "learning_rate": 9.870967741935485e-06, "loss": 39.8512, "step": 12351 }, { "epoch": 294.0955223880597, "grad_norm": 1.3499394655227661, "learning_rate": 9.87019969278034e-06, "loss": 40.0238, "step": 12352 }, { "epoch": 294.1194029850746, "grad_norm": 1.3356682062149048, "learning_rate": 9.869431643625192e-06, "loss": 40.0521, "step": 12353 }, { "epoch": 294.14328358208957, "grad_norm": 1.3421322107315063, "learning_rate": 9.868663594470046e-06, "loss": 39.0035, "step": 12354 }, { "epoch": 294.1671641791045, "grad_norm": 1.3489779233932495, "learning_rate": 9.8678955453149e-06, "loss": 39.5905, "step": 12355 }, { "epoch": 294.1910447761194, "grad_norm": 1.43755304813385, "learning_rate": 9.867127496159755e-06, "loss": 40.7001, "step": 12356 }, { "epoch": 294.21492537313435, "grad_norm": 1.290647268295288, "learning_rate": 9.866359447004608e-06, "loss": 41.5931, "step": 12357 }, { "epoch": 294.23880597014926, "grad_norm": 1.3299174308776855, "learning_rate": 9.865591397849464e-06, "loss": 39.366, "step": 12358 }, { "epoch": 294.26268656716417, "grad_norm": 1.373241901397705, "learning_rate": 9.864823348694317e-06, "loss": 38.7072, "step": 12359 }, { "epoch": 294.28656716417913, "grad_norm": 1.3651525974273682, "learning_rate": 9.864055299539171e-06, "loss": 39.1147, "step": 12360 }, { "epoch": 294.31044776119404, "grad_norm": 1.3363678455352783, "learning_rate": 9.863287250384025e-06, "loss": 39.9895, "step": 12361 }, { "epoch": 294.33432835820895, "grad_norm": 1.3632419109344482, "learning_rate": 9.86251920122888e-06, "loss": 40.3474, "step": 12362 }, { "epoch": 294.35820895522386, "grad_norm": 1.3491897583007812, "learning_rate": 9.861751152073733e-06, "loss": 39.5245, "step": 12363 }, { "epoch": 294.3820895522388, "grad_norm": 1.408182978630066, "learning_rate": 9.860983102918587e-06, "loss": 39.2552, "step": 12364 }, { "epoch": 294.40597014925373, "grad_norm": 1.3639758825302124, "learning_rate": 9.860215053763441e-06, "loss": 39.8525, "step": 12365 }, { "epoch": 294.42985074626864, "grad_norm": 1.3362997770309448, "learning_rate": 9.859447004608296e-06, "loss": 39.8525, "step": 12366 }, { "epoch": 294.4537313432836, "grad_norm": 1.354140281677246, "learning_rate": 9.85867895545315e-06, "loss": 39.2927, "step": 12367 }, { "epoch": 294.4776119402985, "grad_norm": 1.359771490097046, "learning_rate": 9.857910906298004e-06, "loss": 41.6432, "step": 12368 }, { "epoch": 294.5014925373134, "grad_norm": 1.3348644971847534, "learning_rate": 9.857142857142859e-06, "loss": 40.5762, "step": 12369 }, { "epoch": 294.52537313432833, "grad_norm": 1.3616501092910767, "learning_rate": 9.856374807987712e-06, "loss": 39.1923, "step": 12370 }, { "epoch": 294.5492537313433, "grad_norm": 1.3721320629119873, "learning_rate": 9.855606758832566e-06, "loss": 40.6647, "step": 12371 }, { "epoch": 294.5731343283582, "grad_norm": 1.364132285118103, "learning_rate": 9.85483870967742e-06, "loss": 40.5391, "step": 12372 }, { "epoch": 294.5970149253731, "grad_norm": 1.3246452808380127, "learning_rate": 9.854070660522275e-06, "loss": 40.3809, "step": 12373 }, { "epoch": 294.6208955223881, "grad_norm": 1.368088722229004, "learning_rate": 9.853302611367127e-06, "loss": 38.2561, "step": 12374 }, { "epoch": 294.644776119403, "grad_norm": 1.353561282157898, "learning_rate": 9.852534562211983e-06, "loss": 39.6824, "step": 12375 }, { "epoch": 294.6686567164179, "grad_norm": 1.3677457571029663, "learning_rate": 9.851766513056836e-06, "loss": 39.8913, "step": 12376 }, { "epoch": 294.6925373134328, "grad_norm": 1.3866013288497925, "learning_rate": 9.85099846390169e-06, "loss": 39.1976, "step": 12377 }, { "epoch": 294.7164179104478, "grad_norm": 1.3189245462417603, "learning_rate": 9.850230414746545e-06, "loss": 40.0962, "step": 12378 }, { "epoch": 294.7402985074627, "grad_norm": 1.3367594480514526, "learning_rate": 9.8494623655914e-06, "loss": 37.8186, "step": 12379 }, { "epoch": 294.7641791044776, "grad_norm": 1.41925048828125, "learning_rate": 9.848694316436252e-06, "loss": 39.9713, "step": 12380 }, { "epoch": 294.78805970149256, "grad_norm": 1.3583707809448242, "learning_rate": 9.847926267281106e-06, "loss": 39.8863, "step": 12381 }, { "epoch": 294.81194029850747, "grad_norm": 1.4177712202072144, "learning_rate": 9.84715821812596e-06, "loss": 39.9555, "step": 12382 }, { "epoch": 294.8358208955224, "grad_norm": 1.3697288036346436, "learning_rate": 9.846390168970815e-06, "loss": 38.6838, "step": 12383 }, { "epoch": 294.85970149253734, "grad_norm": 1.3519020080566406, "learning_rate": 9.84562211981567e-06, "loss": 40.3052, "step": 12384 }, { "epoch": 294.88358208955225, "grad_norm": 1.3402044773101807, "learning_rate": 9.844854070660524e-06, "loss": 40.7659, "step": 12385 }, { "epoch": 294.90746268656716, "grad_norm": 1.3074195384979248, "learning_rate": 9.844086021505377e-06, "loss": 39.6774, "step": 12386 }, { "epoch": 294.93134328358207, "grad_norm": 1.34259831905365, "learning_rate": 9.843317972350231e-06, "loss": 40.1926, "step": 12387 }, { "epoch": 294.95522388059703, "grad_norm": 1.3341453075408936, "learning_rate": 9.842549923195085e-06, "loss": 39.6371, "step": 12388 }, { "epoch": 294.97910447761194, "grad_norm": 1.3447167873382568, "learning_rate": 9.84178187403994e-06, "loss": 40.5673, "step": 12389 }, { "epoch": 295.0, "grad_norm": 1.352957844734192, "learning_rate": 9.841013824884792e-06, "loss": 33.8682, "step": 12390 }, { "epoch": 295.0238805970149, "grad_norm": 1.3507068157196045, "learning_rate": 9.840245775729647e-06, "loss": 39.3353, "step": 12391 }, { "epoch": 295.0477611940299, "grad_norm": 1.355608344078064, "learning_rate": 9.839477726574501e-06, "loss": 40.2836, "step": 12392 }, { "epoch": 295.0716417910448, "grad_norm": 1.3277877569198608, "learning_rate": 9.838709677419356e-06, "loss": 40.8571, "step": 12393 }, { "epoch": 295.0955223880597, "grad_norm": 1.3827916383743286, "learning_rate": 9.83794162826421e-06, "loss": 39.7672, "step": 12394 }, { "epoch": 295.1194029850746, "grad_norm": 1.3508166074752808, "learning_rate": 9.837173579109064e-06, "loss": 38.9275, "step": 12395 }, { "epoch": 295.14328358208957, "grad_norm": 1.3319300413131714, "learning_rate": 9.836405529953917e-06, "loss": 39.3434, "step": 12396 }, { "epoch": 295.1671641791045, "grad_norm": 1.4402680397033691, "learning_rate": 9.835637480798771e-06, "loss": 39.1217, "step": 12397 }, { "epoch": 295.1910447761194, "grad_norm": 1.3754019737243652, "learning_rate": 9.834869431643626e-06, "loss": 38.4191, "step": 12398 }, { "epoch": 295.21492537313435, "grad_norm": 1.3452411890029907, "learning_rate": 9.83410138248848e-06, "loss": 40.2707, "step": 12399 }, { "epoch": 295.23880597014926, "grad_norm": 1.3123892545700073, "learning_rate": 9.833333333333333e-06, "loss": 39.4907, "step": 12400 }, { "epoch": 295.26268656716417, "grad_norm": 1.3179347515106201, "learning_rate": 9.832565284178189e-06, "loss": 38.726, "step": 12401 }, { "epoch": 295.28656716417913, "grad_norm": 1.3267793655395508, "learning_rate": 9.831797235023042e-06, "loss": 39.3117, "step": 12402 }, { "epoch": 295.31044776119404, "grad_norm": 1.368974208831787, "learning_rate": 9.831029185867896e-06, "loss": 40.6424, "step": 12403 }, { "epoch": 295.33432835820895, "grad_norm": 1.3584645986557007, "learning_rate": 9.83026113671275e-06, "loss": 40.3675, "step": 12404 }, { "epoch": 295.35820895522386, "grad_norm": 1.3559929132461548, "learning_rate": 9.829493087557605e-06, "loss": 40.2845, "step": 12405 }, { "epoch": 295.3820895522388, "grad_norm": 1.3656026124954224, "learning_rate": 9.82872503840246e-06, "loss": 40.5562, "step": 12406 }, { "epoch": 295.40597014925373, "grad_norm": 1.384082317352295, "learning_rate": 9.827956989247312e-06, "loss": 38.9503, "step": 12407 }, { "epoch": 295.42985074626864, "grad_norm": 1.3512948751449585, "learning_rate": 9.827188940092168e-06, "loss": 38.3274, "step": 12408 }, { "epoch": 295.4537313432836, "grad_norm": 1.4056692123413086, "learning_rate": 9.82642089093702e-06, "loss": 40.6534, "step": 12409 }, { "epoch": 295.4776119402985, "grad_norm": 1.3707265853881836, "learning_rate": 9.825652841781875e-06, "loss": 39.1618, "step": 12410 }, { "epoch": 295.5014925373134, "grad_norm": 1.3655915260314941, "learning_rate": 9.82488479262673e-06, "loss": 40.2916, "step": 12411 }, { "epoch": 295.52537313432833, "grad_norm": 1.3609657287597656, "learning_rate": 9.824116743471584e-06, "loss": 40.1597, "step": 12412 }, { "epoch": 295.5492537313433, "grad_norm": 1.345861792564392, "learning_rate": 9.823348694316437e-06, "loss": 41.6828, "step": 12413 }, { "epoch": 295.5731343283582, "grad_norm": 1.347765326499939, "learning_rate": 9.822580645161291e-06, "loss": 39.9462, "step": 12414 }, { "epoch": 295.5970149253731, "grad_norm": 1.3266876935958862, "learning_rate": 9.821812596006145e-06, "loss": 40.1368, "step": 12415 }, { "epoch": 295.6208955223881, "grad_norm": 1.3858565092086792, "learning_rate": 9.821044546851e-06, "loss": 39.3976, "step": 12416 }, { "epoch": 295.644776119403, "grad_norm": 1.3477745056152344, "learning_rate": 9.820276497695852e-06, "loss": 40.3953, "step": 12417 }, { "epoch": 295.6686567164179, "grad_norm": 1.402645230293274, "learning_rate": 9.819508448540708e-06, "loss": 38.9428, "step": 12418 }, { "epoch": 295.6925373134328, "grad_norm": 1.3780109882354736, "learning_rate": 9.818740399385561e-06, "loss": 39.8974, "step": 12419 }, { "epoch": 295.7164179104478, "grad_norm": 1.3658784627914429, "learning_rate": 9.817972350230416e-06, "loss": 39.4101, "step": 12420 }, { "epoch": 295.7402985074627, "grad_norm": 1.3879115581512451, "learning_rate": 9.81720430107527e-06, "loss": 40.3312, "step": 12421 }, { "epoch": 295.7641791044776, "grad_norm": 1.3454707860946655, "learning_rate": 9.816436251920124e-06, "loss": 39.8484, "step": 12422 }, { "epoch": 295.78805970149256, "grad_norm": 1.3413714170455933, "learning_rate": 9.815668202764977e-06, "loss": 40.2047, "step": 12423 }, { "epoch": 295.81194029850747, "grad_norm": NaN, "learning_rate": 9.814900153609831e-06, "loss": 45.4153, "step": 12424 }, { "epoch": 295.8358208955224, "grad_norm": 1.3731598854064941, "learning_rate": 9.814900153609831e-06, "loss": 39.7404, "step": 12425 }, { "epoch": 295.85970149253734, "grad_norm": 1.3375922441482544, "learning_rate": 9.814132104454686e-06, "loss": 40.1134, "step": 12426 }, { "epoch": 295.88358208955225, "grad_norm": 1.3455806970596313, "learning_rate": 9.81336405529954e-06, "loss": 39.6467, "step": 12427 }, { "epoch": 295.90746268656716, "grad_norm": 1.3134150505065918, "learning_rate": 9.812596006144393e-06, "loss": 40.4202, "step": 12428 }, { "epoch": 295.93134328358207, "grad_norm": 1.3245601654052734, "learning_rate": 9.811827956989249e-06, "loss": 38.7007, "step": 12429 }, { "epoch": 295.95522388059703, "grad_norm": 1.3613170385360718, "learning_rate": 9.811059907834102e-06, "loss": 39.8665, "step": 12430 }, { "epoch": 295.97910447761194, "grad_norm": 1.3692213296890259, "learning_rate": 9.810291858678956e-06, "loss": 40.0719, "step": 12431 }, { "epoch": 296.0, "grad_norm": 1.3558101654052734, "learning_rate": 9.80952380952381e-06, "loss": 34.6985, "step": 12432 }, { "epoch": 296.0238805970149, "grad_norm": 1.3290834426879883, "learning_rate": 9.808755760368665e-06, "loss": 39.6145, "step": 12433 }, { "epoch": 296.0477611940299, "grad_norm": NaN, "learning_rate": 9.807987711213517e-06, "loss": 44.1457, "step": 12434 }, { "epoch": 296.0716417910448, "grad_norm": 1.378352403640747, "learning_rate": 9.807987711213517e-06, "loss": 39.2732, "step": 12435 }, { "epoch": 296.0955223880597, "grad_norm": 1.3713290691375732, "learning_rate": 9.807219662058372e-06, "loss": 40.7018, "step": 12436 }, { "epoch": 296.1194029850746, "grad_norm": 1.4330854415893555, "learning_rate": 9.806451612903226e-06, "loss": 39.5253, "step": 12437 }, { "epoch": 296.14328358208957, "grad_norm": 1.3374511003494263, "learning_rate": 9.80568356374808e-06, "loss": 39.7002, "step": 12438 }, { "epoch": 296.1671641791045, "grad_norm": 1.440302848815918, "learning_rate": 9.804915514592935e-06, "loss": 38.8295, "step": 12439 }, { "epoch": 296.1910447761194, "grad_norm": 1.3476799726486206, "learning_rate": 9.80414746543779e-06, "loss": 40.7003, "step": 12440 }, { "epoch": 296.21492537313435, "grad_norm": 1.3427562713623047, "learning_rate": 9.803379416282642e-06, "loss": 39.5533, "step": 12441 }, { "epoch": 296.23880597014926, "grad_norm": 1.4465036392211914, "learning_rate": 9.802611367127496e-06, "loss": 40.6758, "step": 12442 }, { "epoch": 296.26268656716417, "grad_norm": 1.3665467500686646, "learning_rate": 9.801843317972351e-06, "loss": 40.5398, "step": 12443 }, { "epoch": 296.28656716417913, "grad_norm": 1.3499517440795898, "learning_rate": 9.801075268817205e-06, "loss": 39.887, "step": 12444 }, { "epoch": 296.31044776119404, "grad_norm": 1.370651364326477, "learning_rate": 9.80030721966206e-06, "loss": 40.3238, "step": 12445 }, { "epoch": 296.33432835820895, "grad_norm": 1.3452070951461792, "learning_rate": 9.799539170506914e-06, "loss": 39.2191, "step": 12446 }, { "epoch": 296.35820895522386, "grad_norm": 1.3482840061187744, "learning_rate": 9.798771121351768e-06, "loss": 39.2773, "step": 12447 }, { "epoch": 296.3820895522388, "grad_norm": 1.3249294757843018, "learning_rate": 9.798003072196621e-06, "loss": 40.1094, "step": 12448 }, { "epoch": 296.40597014925373, "grad_norm": 1.3328710794448853, "learning_rate": 9.797235023041475e-06, "loss": 39.1164, "step": 12449 }, { "epoch": 296.42985074626864, "grad_norm": 1.3245398998260498, "learning_rate": 9.79646697388633e-06, "loss": 41.1482, "step": 12450 }, { "epoch": 296.4537313432836, "grad_norm": 1.3343639373779297, "learning_rate": 9.795698924731184e-06, "loss": 41.6784, "step": 12451 }, { "epoch": 296.4776119402985, "grad_norm": 1.3197343349456787, "learning_rate": 9.794930875576037e-06, "loss": 40.1885, "step": 12452 }, { "epoch": 296.5014925373134, "grad_norm": 1.363115668296814, "learning_rate": 9.794162826420891e-06, "loss": 40.0642, "step": 12453 }, { "epoch": 296.52537313432833, "grad_norm": 1.4106580018997192, "learning_rate": 9.793394777265746e-06, "loss": 39.6719, "step": 12454 }, { "epoch": 296.5492537313433, "grad_norm": 1.377137303352356, "learning_rate": 9.7926267281106e-06, "loss": 39.8489, "step": 12455 }, { "epoch": 296.5731343283582, "grad_norm": 1.4265022277832031, "learning_rate": 9.791858678955454e-06, "loss": 39.895, "step": 12456 }, { "epoch": 296.5970149253731, "grad_norm": 1.3463134765625, "learning_rate": 9.791090629800309e-06, "loss": 39.2087, "step": 12457 }, { "epoch": 296.6208955223881, "grad_norm": 1.3684382438659668, "learning_rate": 9.790322580645162e-06, "loss": 38.9459, "step": 12458 }, { "epoch": 296.644776119403, "grad_norm": 1.3666244745254517, "learning_rate": 9.789554531490016e-06, "loss": 39.2584, "step": 12459 }, { "epoch": 296.6686567164179, "grad_norm": 1.3380533456802368, "learning_rate": 9.78878648233487e-06, "loss": 40.807, "step": 12460 }, { "epoch": 296.6925373134328, "grad_norm": 1.3588554859161377, "learning_rate": 9.788018433179725e-06, "loss": 39.4138, "step": 12461 }, { "epoch": 296.7164179104478, "grad_norm": 1.447932481765747, "learning_rate": 9.787250384024577e-06, "loss": 40.5234, "step": 12462 }, { "epoch": 296.7402985074627, "grad_norm": 1.3398653268814087, "learning_rate": 9.786482334869433e-06, "loss": 39.1525, "step": 12463 }, { "epoch": 296.7641791044776, "grad_norm": 1.4131860733032227, "learning_rate": 9.785714285714286e-06, "loss": 39.6887, "step": 12464 }, { "epoch": 296.78805970149256, "grad_norm": 1.3282935619354248, "learning_rate": 9.78494623655914e-06, "loss": 39.9601, "step": 12465 }, { "epoch": 296.81194029850747, "grad_norm": 1.3364527225494385, "learning_rate": 9.784178187403995e-06, "loss": 40.7541, "step": 12466 }, { "epoch": 296.8358208955224, "grad_norm": 1.3570165634155273, "learning_rate": 9.78341013824885e-06, "loss": 40.1655, "step": 12467 }, { "epoch": 296.85970149253734, "grad_norm": 1.3552765846252441, "learning_rate": 9.782642089093702e-06, "loss": 39.6863, "step": 12468 }, { "epoch": 296.88358208955225, "grad_norm": 1.3201886415481567, "learning_rate": 9.781874039938556e-06, "loss": 39.365, "step": 12469 }, { "epoch": 296.90746268656716, "grad_norm": 1.3387086391448975, "learning_rate": 9.78110599078341e-06, "loss": 38.4495, "step": 12470 }, { "epoch": 296.93134328358207, "grad_norm": 1.3278813362121582, "learning_rate": 9.780337941628265e-06, "loss": 38.9737, "step": 12471 }, { "epoch": 296.95522388059703, "grad_norm": 1.3501360416412354, "learning_rate": 9.779569892473118e-06, "loss": 38.8957, "step": 12472 }, { "epoch": 296.97910447761194, "grad_norm": 1.3308594226837158, "learning_rate": 9.778801843317974e-06, "loss": 40.2225, "step": 12473 }, { "epoch": 297.0, "grad_norm": 1.3560295104980469, "learning_rate": 9.778033794162827e-06, "loss": 34.2742, "step": 12474 }, { "epoch": 297.0238805970149, "grad_norm": 1.3274568319320679, "learning_rate": 9.777265745007681e-06, "loss": 39.5358, "step": 12475 }, { "epoch": 297.0477611940299, "grad_norm": 1.3885964155197144, "learning_rate": 9.776497695852535e-06, "loss": 39.6968, "step": 12476 }, { "epoch": 297.0716417910448, "grad_norm": 1.465171217918396, "learning_rate": 9.77572964669739e-06, "loss": 40.0801, "step": 12477 }, { "epoch": 297.0955223880597, "grad_norm": 1.3482884168624878, "learning_rate": 9.774961597542244e-06, "loss": 40.6968, "step": 12478 }, { "epoch": 297.1194029850746, "grad_norm": 1.379891037940979, "learning_rate": 9.774193548387097e-06, "loss": 39.4729, "step": 12479 }, { "epoch": 297.14328358208957, "grad_norm": 1.3223867416381836, "learning_rate": 9.773425499231953e-06, "loss": 40.7983, "step": 12480 }, { "epoch": 297.1671641791045, "grad_norm": 1.389102816581726, "learning_rate": 9.772657450076806e-06, "loss": 39.9727, "step": 12481 }, { "epoch": 297.1910447761194, "grad_norm": 1.3546005487442017, "learning_rate": 9.77188940092166e-06, "loss": 38.0178, "step": 12482 }, { "epoch": 297.21492537313435, "grad_norm": 1.3473412990570068, "learning_rate": 9.771121351766514e-06, "loss": 39.8572, "step": 12483 }, { "epoch": 297.23880597014926, "grad_norm": 1.3369476795196533, "learning_rate": 9.770353302611369e-06, "loss": 40.5305, "step": 12484 }, { "epoch": 297.26268656716417, "grad_norm": 1.346184253692627, "learning_rate": 9.769585253456221e-06, "loss": 39.9643, "step": 12485 }, { "epoch": 297.28656716417913, "grad_norm": 1.3673268556594849, "learning_rate": 9.768817204301076e-06, "loss": 40.2279, "step": 12486 }, { "epoch": 297.31044776119404, "grad_norm": 1.3692402839660645, "learning_rate": 9.76804915514593e-06, "loss": 39.207, "step": 12487 }, { "epoch": 297.33432835820895, "grad_norm": 1.331133246421814, "learning_rate": 9.767281105990785e-06, "loss": 39.6682, "step": 12488 }, { "epoch": 297.35820895522386, "grad_norm": 1.3367843627929688, "learning_rate": 9.766513056835637e-06, "loss": 40.6476, "step": 12489 }, { "epoch": 297.3820895522388, "grad_norm": 1.3441169261932373, "learning_rate": 9.765745007680493e-06, "loss": 39.373, "step": 12490 }, { "epoch": 297.40597014925373, "grad_norm": 1.3363523483276367, "learning_rate": 9.764976958525346e-06, "loss": 39.263, "step": 12491 }, { "epoch": 297.42985074626864, "grad_norm": 1.3479928970336914, "learning_rate": 9.7642089093702e-06, "loss": 39.8337, "step": 12492 }, { "epoch": 297.4537313432836, "grad_norm": 1.3329163789749146, "learning_rate": 9.763440860215055e-06, "loss": 39.6877, "step": 12493 }, { "epoch": 297.4776119402985, "grad_norm": 1.3591254949569702, "learning_rate": 9.76267281105991e-06, "loss": 38.5259, "step": 12494 }, { "epoch": 297.5014925373134, "grad_norm": 1.3373651504516602, "learning_rate": 9.761904761904762e-06, "loss": 40.6433, "step": 12495 }, { "epoch": 297.52537313432833, "grad_norm": 1.4196820259094238, "learning_rate": 9.761136712749616e-06, "loss": 39.7973, "step": 12496 }, { "epoch": 297.5492537313433, "grad_norm": 1.3090853691101074, "learning_rate": 9.76036866359447e-06, "loss": 39.9212, "step": 12497 }, { "epoch": 297.5731343283582, "grad_norm": 1.3402692079544067, "learning_rate": 9.759600614439325e-06, "loss": 38.6373, "step": 12498 }, { "epoch": 297.5970149253731, "grad_norm": 1.3605996370315552, "learning_rate": 9.75883256528418e-06, "loss": 41.23, "step": 12499 }, { "epoch": 297.6208955223881, "grad_norm": 1.3669112920761108, "learning_rate": 9.758064516129034e-06, "loss": 39.791, "step": 12500 }, { "epoch": 297.644776119403, "grad_norm": 1.2975012063980103, "learning_rate": 9.757296466973887e-06, "loss": 40.0856, "step": 12501 }, { "epoch": 297.6686567164179, "grad_norm": 1.3601964712142944, "learning_rate": 9.756528417818741e-06, "loss": 38.4881, "step": 12502 }, { "epoch": 297.6925373134328, "grad_norm": 1.3754804134368896, "learning_rate": 9.755760368663595e-06, "loss": 39.8309, "step": 12503 }, { "epoch": 297.7164179104478, "grad_norm": 1.3505442142486572, "learning_rate": 9.75499231950845e-06, "loss": 40.9224, "step": 12504 }, { "epoch": 297.7402985074627, "grad_norm": 1.3477957248687744, "learning_rate": 9.754224270353302e-06, "loss": 39.1197, "step": 12505 }, { "epoch": 297.7641791044776, "grad_norm": 1.3202415704727173, "learning_rate": 9.753456221198157e-06, "loss": 40.8585, "step": 12506 }, { "epoch": 297.78805970149256, "grad_norm": 1.3938634395599365, "learning_rate": 9.752688172043011e-06, "loss": 39.6154, "step": 12507 }, { "epoch": 297.81194029850747, "grad_norm": 1.3415958881378174, "learning_rate": 9.751920122887866e-06, "loss": 39.7303, "step": 12508 }, { "epoch": 297.8358208955224, "grad_norm": 1.3713908195495605, "learning_rate": 9.75115207373272e-06, "loss": 38.6207, "step": 12509 }, { "epoch": 297.85970149253734, "grad_norm": 1.3323261737823486, "learning_rate": 9.750384024577574e-06, "loss": 39.0726, "step": 12510 }, { "epoch": 297.88358208955225, "grad_norm": 1.4704492092132568, "learning_rate": 9.749615975422427e-06, "loss": 39.9677, "step": 12511 }, { "epoch": 297.90746268656716, "grad_norm": 1.3719098567962646, "learning_rate": 9.748847926267281e-06, "loss": 39.9573, "step": 12512 }, { "epoch": 297.93134328358207, "grad_norm": 1.3476169109344482, "learning_rate": 9.748079877112136e-06, "loss": 39.9597, "step": 12513 }, { "epoch": 297.95522388059703, "grad_norm": 1.375940203666687, "learning_rate": 9.74731182795699e-06, "loss": 40.1794, "step": 12514 }, { "epoch": 297.97910447761194, "grad_norm": 1.3494105339050293, "learning_rate": 9.746543778801845e-06, "loss": 39.745, "step": 12515 }, { "epoch": 298.0, "grad_norm": 1.3168789148330688, "learning_rate": 9.745775729646699e-06, "loss": 35.2545, "step": 12516 }, { "epoch": 298.0238805970149, "grad_norm": 1.3103113174438477, "learning_rate": 9.745007680491553e-06, "loss": 40.5034, "step": 12517 }, { "epoch": 298.0477611940299, "grad_norm": 1.3686431646347046, "learning_rate": 9.744239631336406e-06, "loss": 39.5346, "step": 12518 }, { "epoch": 298.0716417910448, "grad_norm": 1.3161636590957642, "learning_rate": 9.74347158218126e-06, "loss": 40.0991, "step": 12519 }, { "epoch": 298.0955223880597, "grad_norm": 1.3967808485031128, "learning_rate": 9.742703533026115e-06, "loss": 40.2769, "step": 12520 }, { "epoch": 298.1194029850746, "grad_norm": 1.3339892625808716, "learning_rate": 9.74193548387097e-06, "loss": 40.9469, "step": 12521 }, { "epoch": 298.14328358208957, "grad_norm": 1.3345316648483276, "learning_rate": 9.741167434715822e-06, "loss": 39.4551, "step": 12522 }, { "epoch": 298.1671641791045, "grad_norm": 1.291387915611267, "learning_rate": 9.740399385560678e-06, "loss": 39.6719, "step": 12523 }, { "epoch": 298.1910447761194, "grad_norm": 1.3396512269973755, "learning_rate": 9.73963133640553e-06, "loss": 39.6672, "step": 12524 }, { "epoch": 298.21492537313435, "grad_norm": 1.4008339643478394, "learning_rate": 9.738863287250385e-06, "loss": 39.2287, "step": 12525 }, { "epoch": 298.23880597014926, "grad_norm": 1.3402092456817627, "learning_rate": 9.73809523809524e-06, "loss": 41.1075, "step": 12526 }, { "epoch": 298.26268656716417, "grad_norm": 1.3202424049377441, "learning_rate": 9.737327188940094e-06, "loss": 39.5672, "step": 12527 }, { "epoch": 298.28656716417913, "grad_norm": 1.3928008079528809, "learning_rate": 9.736559139784946e-06, "loss": 39.1569, "step": 12528 }, { "epoch": 298.31044776119404, "grad_norm": 1.326823115348816, "learning_rate": 9.735791090629801e-06, "loss": 39.7786, "step": 12529 }, { "epoch": 298.33432835820895, "grad_norm": 1.3675310611724854, "learning_rate": 9.735023041474655e-06, "loss": 40.3798, "step": 12530 }, { "epoch": 298.35820895522386, "grad_norm": 1.4018009901046753, "learning_rate": 9.73425499231951e-06, "loss": 39.9368, "step": 12531 }, { "epoch": 298.3820895522388, "grad_norm": 1.3318321704864502, "learning_rate": 9.733486943164362e-06, "loss": 39.2089, "step": 12532 }, { "epoch": 298.40597014925373, "grad_norm": 1.3800923824310303, "learning_rate": 9.732718894009218e-06, "loss": 39.3173, "step": 12533 }, { "epoch": 298.42985074626864, "grad_norm": 1.3737059831619263, "learning_rate": 9.731950844854071e-06, "loss": 39.4696, "step": 12534 }, { "epoch": 298.4537313432836, "grad_norm": 1.4488118886947632, "learning_rate": 9.731182795698925e-06, "loss": 38.2507, "step": 12535 }, { "epoch": 298.4776119402985, "grad_norm": 1.3755863904953003, "learning_rate": 9.73041474654378e-06, "loss": 39.88, "step": 12536 }, { "epoch": 298.5014925373134, "grad_norm": 1.4037566184997559, "learning_rate": 9.729646697388634e-06, "loss": 39.1805, "step": 12537 }, { "epoch": 298.52537313432833, "grad_norm": 1.3505160808563232, "learning_rate": 9.728878648233487e-06, "loss": 40.4877, "step": 12538 }, { "epoch": 298.5492537313433, "grad_norm": 1.3249446153640747, "learning_rate": 9.728110599078341e-06, "loss": 39.6774, "step": 12539 }, { "epoch": 298.5731343283582, "grad_norm": 1.3377820253372192, "learning_rate": 9.727342549923196e-06, "loss": 41.0823, "step": 12540 }, { "epoch": 298.5970149253731, "grad_norm": 1.4506940841674805, "learning_rate": 9.72657450076805e-06, "loss": 40.2263, "step": 12541 }, { "epoch": 298.6208955223881, "grad_norm": 1.3631247282028198, "learning_rate": 9.725806451612903e-06, "loss": 39.6468, "step": 12542 }, { "epoch": 298.644776119403, "grad_norm": 1.3837403059005737, "learning_rate": 9.725038402457759e-06, "loss": 39.7162, "step": 12543 }, { "epoch": 298.6686567164179, "grad_norm": 1.3425891399383545, "learning_rate": 9.724270353302612e-06, "loss": 39.4191, "step": 12544 }, { "epoch": 298.6925373134328, "grad_norm": 1.350037932395935, "learning_rate": 9.723502304147466e-06, "loss": 39.4149, "step": 12545 }, { "epoch": 298.7164179104478, "grad_norm": 1.3475396633148193, "learning_rate": 9.72273425499232e-06, "loss": 39.8077, "step": 12546 }, { "epoch": 298.7402985074627, "grad_norm": 1.3671423196792603, "learning_rate": 9.721966205837175e-06, "loss": 40.204, "step": 12547 }, { "epoch": 298.7641791044776, "grad_norm": 1.3700217008590698, "learning_rate": 9.721198156682027e-06, "loss": 39.2159, "step": 12548 }, { "epoch": 298.78805970149256, "grad_norm": 1.3983404636383057, "learning_rate": 9.720430107526882e-06, "loss": 40.1103, "step": 12549 }, { "epoch": 298.81194029850747, "grad_norm": 1.35637366771698, "learning_rate": 9.719662058371736e-06, "loss": 39.8219, "step": 12550 }, { "epoch": 298.8358208955224, "grad_norm": 1.3659613132476807, "learning_rate": 9.71889400921659e-06, "loss": 39.7939, "step": 12551 }, { "epoch": 298.85970149253734, "grad_norm": 1.3472787141799927, "learning_rate": 9.718125960061445e-06, "loss": 40.1811, "step": 12552 }, { "epoch": 298.88358208955225, "grad_norm": 1.312730073928833, "learning_rate": 9.7173579109063e-06, "loss": 39.6976, "step": 12553 }, { "epoch": 298.90746268656716, "grad_norm": 1.343832015991211, "learning_rate": 9.716589861751154e-06, "loss": 40.3149, "step": 12554 }, { "epoch": 298.93134328358207, "grad_norm": 1.3567701578140259, "learning_rate": 9.715821812596006e-06, "loss": 39.3748, "step": 12555 }, { "epoch": 298.95522388059703, "grad_norm": 1.37165105342865, "learning_rate": 9.71505376344086e-06, "loss": 40.1276, "step": 12556 }, { "epoch": 298.97910447761194, "grad_norm": 1.389018177986145, "learning_rate": 9.714285714285715e-06, "loss": 39.1144, "step": 12557 }, { "epoch": 299.0, "grad_norm": 1.3244390487670898, "learning_rate": 9.71351766513057e-06, "loss": 34.0428, "step": 12558 }, { "epoch": 299.0238805970149, "grad_norm": 1.3653366565704346, "learning_rate": 9.712749615975422e-06, "loss": 40.0635, "step": 12559 }, { "epoch": 299.0477611940299, "grad_norm": 1.325190782546997, "learning_rate": 9.711981566820278e-06, "loss": 39.3184, "step": 12560 }, { "epoch": 299.0716417910448, "grad_norm": 1.3510862588882446, "learning_rate": 9.711213517665131e-06, "loss": 41.3473, "step": 12561 }, { "epoch": 299.0955223880597, "grad_norm": 1.3670927286148071, "learning_rate": 9.710445468509985e-06, "loss": 40.4661, "step": 12562 }, { "epoch": 299.1194029850746, "grad_norm": 1.4206501245498657, "learning_rate": 9.70967741935484e-06, "loss": 39.5681, "step": 12563 }, { "epoch": 299.14328358208957, "grad_norm": 1.3419721126556396, "learning_rate": 9.708909370199694e-06, "loss": 39.8101, "step": 12564 }, { "epoch": 299.1671641791045, "grad_norm": 1.3433177471160889, "learning_rate": 9.708141321044547e-06, "loss": 40.1283, "step": 12565 }, { "epoch": 299.1910447761194, "grad_norm": 1.3483846187591553, "learning_rate": 9.707373271889401e-06, "loss": 39.898, "step": 12566 }, { "epoch": 299.21492537313435, "grad_norm": 1.3293039798736572, "learning_rate": 9.706605222734256e-06, "loss": 39.5821, "step": 12567 }, { "epoch": 299.23880597014926, "grad_norm": 1.3781390190124512, "learning_rate": 9.70583717357911e-06, "loss": 39.6567, "step": 12568 }, { "epoch": 299.26268656716417, "grad_norm": 1.3596470355987549, "learning_rate": 9.705069124423964e-06, "loss": 40.364, "step": 12569 }, { "epoch": 299.28656716417913, "grad_norm": 1.359511375427246, "learning_rate": 9.704301075268819e-06, "loss": 40.2639, "step": 12570 }, { "epoch": 299.31044776119404, "grad_norm": 1.4580790996551514, "learning_rate": 9.703533026113671e-06, "loss": 39.8875, "step": 12571 }, { "epoch": 299.33432835820895, "grad_norm": 1.4354662895202637, "learning_rate": 9.702764976958526e-06, "loss": 38.6244, "step": 12572 }, { "epoch": 299.35820895522386, "grad_norm": 1.5091147422790527, "learning_rate": 9.70199692780338e-06, "loss": 39.5232, "step": 12573 }, { "epoch": 299.3820895522388, "grad_norm": 1.3274410963058472, "learning_rate": 9.701228878648235e-06, "loss": 39.5237, "step": 12574 }, { "epoch": 299.40597014925373, "grad_norm": 1.3536714315414429, "learning_rate": 9.700460829493087e-06, "loss": 39.0075, "step": 12575 }, { "epoch": 299.42985074626864, "grad_norm": 1.3335291147232056, "learning_rate": 9.699692780337943e-06, "loss": 39.646, "step": 12576 }, { "epoch": 299.4537313432836, "grad_norm": 1.33394455909729, "learning_rate": 9.698924731182796e-06, "loss": 40.2574, "step": 12577 }, { "epoch": 299.4776119402985, "grad_norm": 1.3775063753128052, "learning_rate": 9.69815668202765e-06, "loss": 39.7572, "step": 12578 }, { "epoch": 299.5014925373134, "grad_norm": 1.3762882947921753, "learning_rate": 9.697388632872505e-06, "loss": 40.2971, "step": 12579 }, { "epoch": 299.52537313432833, "grad_norm": 1.319227933883667, "learning_rate": 9.69662058371736e-06, "loss": 39.175, "step": 12580 }, { "epoch": 299.5492537313433, "grad_norm": 1.336674451828003, "learning_rate": 9.695852534562212e-06, "loss": 39.7144, "step": 12581 }, { "epoch": 299.5731343283582, "grad_norm": 1.348189353942871, "learning_rate": 9.695084485407066e-06, "loss": 40.3608, "step": 12582 }, { "epoch": 299.5970149253731, "grad_norm": 1.376707911491394, "learning_rate": 9.69431643625192e-06, "loss": 38.7137, "step": 12583 }, { "epoch": 299.6208955223881, "grad_norm": 1.3515238761901855, "learning_rate": 9.693548387096775e-06, "loss": 40.0398, "step": 12584 }, { "epoch": 299.644776119403, "grad_norm": 1.3244366645812988, "learning_rate": 9.692780337941628e-06, "loss": 40.2406, "step": 12585 }, { "epoch": 299.6686567164179, "grad_norm": 1.3240755796432495, "learning_rate": 9.692012288786484e-06, "loss": 39.4715, "step": 12586 }, { "epoch": 299.6925373134328, "grad_norm": 1.3450161218643188, "learning_rate": 9.691244239631337e-06, "loss": 39.5401, "step": 12587 }, { "epoch": 299.7164179104478, "grad_norm": 1.3614428043365479, "learning_rate": 9.690476190476191e-06, "loss": 39.349, "step": 12588 }, { "epoch": 299.7402985074627, "grad_norm": 1.3769946098327637, "learning_rate": 9.689708141321045e-06, "loss": 41.5788, "step": 12589 }, { "epoch": 299.7641791044776, "grad_norm": 1.3412418365478516, "learning_rate": 9.6889400921659e-06, "loss": 38.7476, "step": 12590 }, { "epoch": 299.78805970149256, "grad_norm": 1.3806984424591064, "learning_rate": 9.688172043010754e-06, "loss": 40.1056, "step": 12591 }, { "epoch": 299.81194029850747, "grad_norm": 1.3299615383148193, "learning_rate": 9.687403993855607e-06, "loss": 40.3366, "step": 12592 }, { "epoch": 299.8358208955224, "grad_norm": 1.3296973705291748, "learning_rate": 9.686635944700463e-06, "loss": 39.5177, "step": 12593 }, { "epoch": 299.85970149253734, "grad_norm": 1.3284283876419067, "learning_rate": 9.685867895545316e-06, "loss": 39.851, "step": 12594 }, { "epoch": 299.88358208955225, "grad_norm": 1.3745864629745483, "learning_rate": 9.68509984639017e-06, "loss": 39.9436, "step": 12595 }, { "epoch": 299.90746268656716, "grad_norm": 1.3521562814712524, "learning_rate": 9.684331797235024e-06, "loss": 39.7698, "step": 12596 }, { "epoch": 299.93134328358207, "grad_norm": 1.3993449211120605, "learning_rate": 9.683563748079879e-06, "loss": 38.247, "step": 12597 }, { "epoch": 299.95522388059703, "grad_norm": 1.3439006805419922, "learning_rate": 9.682795698924731e-06, "loss": 40.0123, "step": 12598 }, { "epoch": 299.97910447761194, "grad_norm": 1.350148320198059, "learning_rate": 9.682027649769586e-06, "loss": 39.761, "step": 12599 }, { "epoch": 300.0, "grad_norm": 1.3337271213531494, "learning_rate": 9.68125960061444e-06, "loss": 34.5813, "step": 12600 }, { "epoch": 300.0238805970149, "grad_norm": 1.3550653457641602, "learning_rate": 9.680491551459295e-06, "loss": 41.1562, "step": 12601 }, { "epoch": 300.0477611940299, "grad_norm": 1.3235752582550049, "learning_rate": 9.679723502304147e-06, "loss": 38.7533, "step": 12602 }, { "epoch": 300.0716417910448, "grad_norm": 1.3229491710662842, "learning_rate": 9.678955453149003e-06, "loss": 39.8484, "step": 12603 }, { "epoch": 300.0955223880597, "grad_norm": 1.4746869802474976, "learning_rate": 9.678187403993856e-06, "loss": 39.4896, "step": 12604 }, { "epoch": 300.1194029850746, "grad_norm": 1.3418134450912476, "learning_rate": 9.67741935483871e-06, "loss": 39.7731, "step": 12605 }, { "epoch": 300.14328358208957, "grad_norm": 1.3531923294067383, "learning_rate": 9.676651305683565e-06, "loss": 40.2075, "step": 12606 }, { "epoch": 300.1671641791045, "grad_norm": 1.3416266441345215, "learning_rate": 9.67588325652842e-06, "loss": 40.8514, "step": 12607 }, { "epoch": 300.1910447761194, "grad_norm": 1.326061725616455, "learning_rate": 9.675115207373272e-06, "loss": 40.078, "step": 12608 }, { "epoch": 300.21492537313435, "grad_norm": 1.3717138767242432, "learning_rate": 9.674347158218126e-06, "loss": 39.7811, "step": 12609 }, { "epoch": 300.23880597014926, "grad_norm": 1.322521448135376, "learning_rate": 9.67357910906298e-06, "loss": 40.2176, "step": 12610 }, { "epoch": 300.26268656716417, "grad_norm": NaN, "learning_rate": 9.672811059907835e-06, "loss": 44.2503, "step": 12611 }, { "epoch": 300.28656716417913, "grad_norm": 1.329533576965332, "learning_rate": 9.672811059907835e-06, "loss": 38.8934, "step": 12612 }, { "epoch": 300.31044776119404, "grad_norm": 1.3591505289077759, "learning_rate": 9.67204301075269e-06, "loss": 40.5248, "step": 12613 }, { "epoch": 300.33432835820895, "grad_norm": 1.3239678144454956, "learning_rate": 9.671274961597544e-06, "loss": 39.912, "step": 12614 }, { "epoch": 300.35820895522386, "grad_norm": 1.4500211477279663, "learning_rate": 9.670506912442396e-06, "loss": 38.1549, "step": 12615 }, { "epoch": 300.3820895522388, "grad_norm": 1.381905198097229, "learning_rate": 9.669738863287251e-06, "loss": 39.4929, "step": 12616 }, { "epoch": 300.40597014925373, "grad_norm": 1.3318748474121094, "learning_rate": 9.668970814132105e-06, "loss": 38.9169, "step": 12617 }, { "epoch": 300.42985074626864, "grad_norm": 1.3806242942810059, "learning_rate": 9.66820276497696e-06, "loss": 39.2582, "step": 12618 }, { "epoch": 300.4537313432836, "grad_norm": 1.3014929294586182, "learning_rate": 9.667434715821812e-06, "loss": 40.0552, "step": 12619 }, { "epoch": 300.4776119402985, "grad_norm": 1.3541465997695923, "learning_rate": 9.666666666666667e-06, "loss": 39.1173, "step": 12620 }, { "epoch": 300.5014925373134, "grad_norm": 1.338958978652954, "learning_rate": 9.665898617511521e-06, "loss": 39.0282, "step": 12621 }, { "epoch": 300.52537313432833, "grad_norm": 1.3544566631317139, "learning_rate": 9.665130568356375e-06, "loss": 39.6045, "step": 12622 }, { "epoch": 300.5492537313433, "grad_norm": 1.3536746501922607, "learning_rate": 9.66436251920123e-06, "loss": 39.9885, "step": 12623 }, { "epoch": 300.5731343283582, "grad_norm": 1.3834919929504395, "learning_rate": 9.663594470046084e-06, "loss": 40.7764, "step": 12624 }, { "epoch": 300.5970149253731, "grad_norm": 1.3482842445373535, "learning_rate": 9.662826420890939e-06, "loss": 39.6264, "step": 12625 }, { "epoch": 300.6208955223881, "grad_norm": 1.3372992277145386, "learning_rate": 9.662058371735791e-06, "loss": 38.6624, "step": 12626 }, { "epoch": 300.644776119403, "grad_norm": 1.3168578147888184, "learning_rate": 9.661290322580646e-06, "loss": 39.1013, "step": 12627 }, { "epoch": 300.6686567164179, "grad_norm": 1.432504653930664, "learning_rate": 9.6605222734255e-06, "loss": 38.3793, "step": 12628 }, { "epoch": 300.6925373134328, "grad_norm": 1.3363069295883179, "learning_rate": 9.659754224270354e-06, "loss": 39.0472, "step": 12629 }, { "epoch": 300.7164179104478, "grad_norm": 1.3596229553222656, "learning_rate": 9.658986175115209e-06, "loss": 40.3776, "step": 12630 }, { "epoch": 300.7402985074627, "grad_norm": 1.3634675741195679, "learning_rate": 9.658218125960063e-06, "loss": 39.9369, "step": 12631 }, { "epoch": 300.7641791044776, "grad_norm": 1.3322718143463135, "learning_rate": 9.657450076804916e-06, "loss": 40.2539, "step": 12632 }, { "epoch": 300.78805970149256, "grad_norm": 1.3592358827590942, "learning_rate": 9.65668202764977e-06, "loss": 39.6446, "step": 12633 }, { "epoch": 300.81194029850747, "grad_norm": 1.375900387763977, "learning_rate": 9.655913978494625e-06, "loss": 40.0198, "step": 12634 }, { "epoch": 300.8358208955224, "grad_norm": 1.3708959817886353, "learning_rate": 9.655145929339479e-06, "loss": 39.6719, "step": 12635 }, { "epoch": 300.85970149253734, "grad_norm": 1.3167537450790405, "learning_rate": 9.654377880184332e-06, "loss": 40.8419, "step": 12636 }, { "epoch": 300.88358208955225, "grad_norm": 1.3980684280395508, "learning_rate": 9.653609831029186e-06, "loss": 39.2367, "step": 12637 }, { "epoch": 300.90746268656716, "grad_norm": 1.3153401613235474, "learning_rate": 9.65284178187404e-06, "loss": 40.4549, "step": 12638 }, { "epoch": 300.93134328358207, "grad_norm": 1.38412344455719, "learning_rate": 9.652073732718895e-06, "loss": 40.9893, "step": 12639 }, { "epoch": 300.95522388059703, "grad_norm": 1.3708827495574951, "learning_rate": 9.65130568356375e-06, "loss": 40.4019, "step": 12640 }, { "epoch": 300.97910447761194, "grad_norm": 1.3264217376708984, "learning_rate": 9.650537634408604e-06, "loss": 40.749, "step": 12641 }, { "epoch": 301.0, "grad_norm": 1.3699177503585815, "learning_rate": 9.649769585253456e-06, "loss": 35.3522, "step": 12642 }, { "epoch": 301.0238805970149, "grad_norm": 1.381623387336731, "learning_rate": 9.64900153609831e-06, "loss": 39.3422, "step": 12643 }, { "epoch": 301.0477611940299, "grad_norm": 1.32789945602417, "learning_rate": 9.648233486943165e-06, "loss": 40.0121, "step": 12644 }, { "epoch": 301.0716417910448, "grad_norm": 1.3132963180541992, "learning_rate": 9.64746543778802e-06, "loss": 41.594, "step": 12645 }, { "epoch": 301.0955223880597, "grad_norm": 1.4094632863998413, "learning_rate": 9.646697388632872e-06, "loss": 38.9649, "step": 12646 }, { "epoch": 301.1194029850746, "grad_norm": 1.3353461027145386, "learning_rate": 9.645929339477728e-06, "loss": 39.8509, "step": 12647 }, { "epoch": 301.14328358208957, "grad_norm": 1.371297001838684, "learning_rate": 9.645161290322581e-06, "loss": 39.668, "step": 12648 }, { "epoch": 301.1671641791045, "grad_norm": 1.339345932006836, "learning_rate": 9.644393241167435e-06, "loss": 40.426, "step": 12649 }, { "epoch": 301.1910447761194, "grad_norm": 1.3318936824798584, "learning_rate": 9.64362519201229e-06, "loss": 39.148, "step": 12650 }, { "epoch": 301.21492537313435, "grad_norm": 1.317421555519104, "learning_rate": 9.642857142857144e-06, "loss": 39.9141, "step": 12651 }, { "epoch": 301.23880597014926, "grad_norm": 1.3858025074005127, "learning_rate": 9.642089093701997e-06, "loss": 39.04, "step": 12652 }, { "epoch": 301.26268656716417, "grad_norm": 1.3479372262954712, "learning_rate": 9.641321044546851e-06, "loss": 40.8899, "step": 12653 }, { "epoch": 301.28656716417913, "grad_norm": 1.3442624807357788, "learning_rate": 9.640552995391706e-06, "loss": 40.8677, "step": 12654 }, { "epoch": 301.31044776119404, "grad_norm": 1.348113775253296, "learning_rate": 9.63978494623656e-06, "loss": 41.1216, "step": 12655 }, { "epoch": 301.33432835820895, "grad_norm": 1.3358938694000244, "learning_rate": 9.639016897081413e-06, "loss": 39.4821, "step": 12656 }, { "epoch": 301.35820895522386, "grad_norm": 1.3874601125717163, "learning_rate": 9.638248847926269e-06, "loss": 39.3, "step": 12657 }, { "epoch": 301.3820895522388, "grad_norm": 1.363355040550232, "learning_rate": 9.637480798771121e-06, "loss": 39.2365, "step": 12658 }, { "epoch": 301.40597014925373, "grad_norm": 1.3101584911346436, "learning_rate": 9.636712749615976e-06, "loss": 39.0737, "step": 12659 }, { "epoch": 301.42985074626864, "grad_norm": 1.4125254154205322, "learning_rate": 9.63594470046083e-06, "loss": 38.5963, "step": 12660 }, { "epoch": 301.4537313432836, "grad_norm": 1.326185941696167, "learning_rate": 9.635176651305685e-06, "loss": 38.9582, "step": 12661 }, { "epoch": 301.4776119402985, "grad_norm": 1.3593380451202393, "learning_rate": 9.634408602150539e-06, "loss": 40.485, "step": 12662 }, { "epoch": 301.5014925373134, "grad_norm": 1.3217839002609253, "learning_rate": 9.633640552995392e-06, "loss": 39.6491, "step": 12663 }, { "epoch": 301.52537313432833, "grad_norm": NaN, "learning_rate": 9.632872503840248e-06, "loss": 50.7627, "step": 12664 }, { "epoch": 301.5492537313433, "grad_norm": 1.3656069040298462, "learning_rate": 9.632872503840248e-06, "loss": 39.2735, "step": 12665 }, { "epoch": 301.5731343283582, "grad_norm": 1.3402661085128784, "learning_rate": 9.6321044546851e-06, "loss": 38.7978, "step": 12666 }, { "epoch": 301.5970149253731, "grad_norm": 1.2973023653030396, "learning_rate": 9.631336405529955e-06, "loss": 39.4476, "step": 12667 }, { "epoch": 301.6208955223881, "grad_norm": 1.3430237770080566, "learning_rate": 9.63056835637481e-06, "loss": 40.0678, "step": 12668 }, { "epoch": 301.644776119403, "grad_norm": 1.3484400510787964, "learning_rate": 9.629800307219664e-06, "loss": 40.1841, "step": 12669 }, { "epoch": 301.6686567164179, "grad_norm": 1.3315935134887695, "learning_rate": 9.629032258064516e-06, "loss": 39.2595, "step": 12670 }, { "epoch": 301.6925373134328, "grad_norm": 1.349768877029419, "learning_rate": 9.62826420890937e-06, "loss": 40.6426, "step": 12671 }, { "epoch": 301.7164179104478, "grad_norm": 1.3538974523544312, "learning_rate": 9.627496159754225e-06, "loss": 40.9061, "step": 12672 }, { "epoch": 301.7402985074627, "grad_norm": 1.3443973064422607, "learning_rate": 9.62672811059908e-06, "loss": 38.521, "step": 12673 }, { "epoch": 301.7641791044776, "grad_norm": 1.3252538442611694, "learning_rate": 9.625960061443932e-06, "loss": 39.2229, "step": 12674 }, { "epoch": 301.78805970149256, "grad_norm": 1.324066162109375, "learning_rate": 9.625192012288788e-06, "loss": 40.3036, "step": 12675 }, { "epoch": 301.81194029850747, "grad_norm": 1.3287010192871094, "learning_rate": 9.624423963133641e-06, "loss": 40.0184, "step": 12676 }, { "epoch": 301.8358208955224, "grad_norm": 1.4800901412963867, "learning_rate": 9.623655913978495e-06, "loss": 40.3543, "step": 12677 }, { "epoch": 301.85970149253734, "grad_norm": 1.3507963418960571, "learning_rate": 9.62288786482335e-06, "loss": 39.0888, "step": 12678 }, { "epoch": 301.88358208955225, "grad_norm": 1.376643419265747, "learning_rate": 9.622119815668204e-06, "loss": 39.4809, "step": 12679 }, { "epoch": 301.90746268656716, "grad_norm": 1.4043809175491333, "learning_rate": 9.621351766513057e-06, "loss": 39.8787, "step": 12680 }, { "epoch": 301.93134328358207, "grad_norm": 1.3516989946365356, "learning_rate": 9.620583717357911e-06, "loss": 40.1194, "step": 12681 }, { "epoch": 301.95522388059703, "grad_norm": 1.33291494846344, "learning_rate": 9.619815668202766e-06, "loss": 39.8546, "step": 12682 }, { "epoch": 301.97910447761194, "grad_norm": 1.3477933406829834, "learning_rate": 9.61904761904762e-06, "loss": 38.745, "step": 12683 }, { "epoch": 302.0, "grad_norm": 1.3328123092651367, "learning_rate": 9.618279569892474e-06, "loss": 35.6738, "step": 12684 }, { "epoch": 302.0238805970149, "grad_norm": 1.3758219480514526, "learning_rate": 9.617511520737329e-06, "loss": 40.4523, "step": 12685 }, { "epoch": 302.0477611940299, "grad_norm": 1.4127066135406494, "learning_rate": 9.616743471582181e-06, "loss": 39.8954, "step": 12686 }, { "epoch": 302.0716417910448, "grad_norm": 1.3757494688034058, "learning_rate": 9.615975422427036e-06, "loss": 39.7456, "step": 12687 }, { "epoch": 302.0955223880597, "grad_norm": 1.340047836303711, "learning_rate": 9.61520737327189e-06, "loss": 39.1823, "step": 12688 }, { "epoch": 302.1194029850746, "grad_norm": 1.339449405670166, "learning_rate": 9.614439324116745e-06, "loss": 39.2132, "step": 12689 }, { "epoch": 302.14328358208957, "grad_norm": 1.3286352157592773, "learning_rate": 9.613671274961597e-06, "loss": 39.6406, "step": 12690 }, { "epoch": 302.1671641791045, "grad_norm": 1.4057762622833252, "learning_rate": 9.612903225806453e-06, "loss": 40.5587, "step": 12691 }, { "epoch": 302.1910447761194, "grad_norm": 1.333540916442871, "learning_rate": 9.612135176651306e-06, "loss": 40.4043, "step": 12692 }, { "epoch": 302.21492537313435, "grad_norm": 1.3230715990066528, "learning_rate": 9.61136712749616e-06, "loss": 39.6938, "step": 12693 }, { "epoch": 302.23880597014926, "grad_norm": 1.3517324924468994, "learning_rate": 9.610599078341015e-06, "loss": 39.9191, "step": 12694 }, { "epoch": 302.26268656716417, "grad_norm": 1.3605026006698608, "learning_rate": 9.60983102918587e-06, "loss": 39.2992, "step": 12695 }, { "epoch": 302.28656716417913, "grad_norm": 1.375166893005371, "learning_rate": 9.609062980030722e-06, "loss": 40.4031, "step": 12696 }, { "epoch": 302.31044776119404, "grad_norm": 1.3517038822174072, "learning_rate": 9.608294930875576e-06, "loss": 38.9248, "step": 12697 }, { "epoch": 302.33432835820895, "grad_norm": 1.3882815837860107, "learning_rate": 9.60752688172043e-06, "loss": 39.9659, "step": 12698 }, { "epoch": 302.35820895522386, "grad_norm": 1.3518110513687134, "learning_rate": 9.606758832565285e-06, "loss": 38.2864, "step": 12699 }, { "epoch": 302.3820895522388, "grad_norm": 1.3038362264633179, "learning_rate": 9.60599078341014e-06, "loss": 40.9984, "step": 12700 }, { "epoch": 302.40597014925373, "grad_norm": 1.3223636150360107, "learning_rate": 9.605222734254994e-06, "loss": 37.8475, "step": 12701 }, { "epoch": 302.42985074626864, "grad_norm": 1.3549749851226807, "learning_rate": 9.604454685099848e-06, "loss": 40.633, "step": 12702 }, { "epoch": 302.4537313432836, "grad_norm": 1.335107445716858, "learning_rate": 9.603686635944701e-06, "loss": 39.0797, "step": 12703 }, { "epoch": 302.4776119402985, "grad_norm": 1.3497073650360107, "learning_rate": 9.602918586789555e-06, "loss": 39.6304, "step": 12704 }, { "epoch": 302.5014925373134, "grad_norm": 1.3466951847076416, "learning_rate": 9.60215053763441e-06, "loss": 39.1616, "step": 12705 }, { "epoch": 302.52537313432833, "grad_norm": 1.353600263595581, "learning_rate": 9.601382488479264e-06, "loss": 41.0906, "step": 12706 }, { "epoch": 302.5492537313433, "grad_norm": 1.3591915369033813, "learning_rate": 9.600614439324117e-06, "loss": 39.7416, "step": 12707 }, { "epoch": 302.5731343283582, "grad_norm": 1.2923225164413452, "learning_rate": 9.599846390168973e-06, "loss": 39.6209, "step": 12708 }, { "epoch": 302.5970149253731, "grad_norm": 1.3331888914108276, "learning_rate": 9.599078341013826e-06, "loss": 39.4014, "step": 12709 }, { "epoch": 302.6208955223881, "grad_norm": 1.4213148355484009, "learning_rate": 9.59831029185868e-06, "loss": 41.3983, "step": 12710 }, { "epoch": 302.644776119403, "grad_norm": 1.3530646562576294, "learning_rate": 9.597542242703534e-06, "loss": 39.1877, "step": 12711 }, { "epoch": 302.6686567164179, "grad_norm": 1.315900206565857, "learning_rate": 9.596774193548389e-06, "loss": 41.6166, "step": 12712 }, { "epoch": 302.6925373134328, "grad_norm": 1.3501310348510742, "learning_rate": 9.596006144393241e-06, "loss": 39.0214, "step": 12713 }, { "epoch": 302.7164179104478, "grad_norm": 1.3144563436508179, "learning_rate": 9.595238095238096e-06, "loss": 39.9078, "step": 12714 }, { "epoch": 302.7402985074627, "grad_norm": 1.3603039979934692, "learning_rate": 9.59447004608295e-06, "loss": 38.6283, "step": 12715 }, { "epoch": 302.7641791044776, "grad_norm": 1.3105252981185913, "learning_rate": 9.593701996927805e-06, "loss": 39.6563, "step": 12716 }, { "epoch": 302.78805970149256, "grad_norm": 1.4027690887451172, "learning_rate": 9.592933947772657e-06, "loss": 40.7292, "step": 12717 }, { "epoch": 302.81194029850747, "grad_norm": 1.3632760047912598, "learning_rate": 9.592165898617513e-06, "loss": 39.2811, "step": 12718 }, { "epoch": 302.8358208955224, "grad_norm": 1.3625577688217163, "learning_rate": 9.591397849462366e-06, "loss": 39.7685, "step": 12719 }, { "epoch": 302.85970149253734, "grad_norm": 1.3734958171844482, "learning_rate": 9.59062980030722e-06, "loss": 39.6211, "step": 12720 }, { "epoch": 302.88358208955225, "grad_norm": 1.3222651481628418, "learning_rate": 9.589861751152075e-06, "loss": 40.3223, "step": 12721 }, { "epoch": 302.90746268656716, "grad_norm": 1.3519395589828491, "learning_rate": 9.589093701996929e-06, "loss": 40.3243, "step": 12722 }, { "epoch": 302.93134328358207, "grad_norm": 1.3459551334381104, "learning_rate": 9.588325652841782e-06, "loss": 39.4549, "step": 12723 }, { "epoch": 302.95522388059703, "grad_norm": 1.3417903184890747, "learning_rate": 9.587557603686636e-06, "loss": 39.5492, "step": 12724 }, { "epoch": 302.97910447761194, "grad_norm": 1.3185065984725952, "learning_rate": 9.58678955453149e-06, "loss": 39.7039, "step": 12725 }, { "epoch": 303.0, "grad_norm": 1.3194881677627563, "learning_rate": 9.586021505376345e-06, "loss": 34.072, "step": 12726 }, { "epoch": 303.0238805970149, "grad_norm": 1.3276913166046143, "learning_rate": 9.5852534562212e-06, "loss": 41.0751, "step": 12727 }, { "epoch": 303.0477611940299, "grad_norm": 1.3438597917556763, "learning_rate": 9.584485407066054e-06, "loss": 39.4506, "step": 12728 }, { "epoch": 303.0716417910448, "grad_norm": 1.3028395175933838, "learning_rate": 9.583717357910906e-06, "loss": 40.2383, "step": 12729 }, { "epoch": 303.0955223880597, "grad_norm": 1.353468418121338, "learning_rate": 9.58294930875576e-06, "loss": 40.2345, "step": 12730 }, { "epoch": 303.1194029850746, "grad_norm": 1.3157622814178467, "learning_rate": 9.582181259600615e-06, "loss": 41.0113, "step": 12731 }, { "epoch": 303.14328358208957, "grad_norm": 1.3998305797576904, "learning_rate": 9.58141321044547e-06, "loss": 39.9494, "step": 12732 }, { "epoch": 303.1671641791045, "grad_norm": 1.3921682834625244, "learning_rate": 9.580645161290322e-06, "loss": 38.6844, "step": 12733 }, { "epoch": 303.1910447761194, "grad_norm": 1.3297284841537476, "learning_rate": 9.579877112135177e-06, "loss": 40.8484, "step": 12734 }, { "epoch": 303.21492537313435, "grad_norm": 1.3571804761886597, "learning_rate": 9.579109062980031e-06, "loss": 39.7698, "step": 12735 }, { "epoch": 303.23880597014926, "grad_norm": 1.3114620447158813, "learning_rate": 9.578341013824885e-06, "loss": 39.9366, "step": 12736 }, { "epoch": 303.26268656716417, "grad_norm": 1.3461639881134033, "learning_rate": 9.57757296466974e-06, "loss": 40.4931, "step": 12737 }, { "epoch": 303.28656716417913, "grad_norm": 1.3669192790985107, "learning_rate": 9.576804915514594e-06, "loss": 39.0146, "step": 12738 }, { "epoch": 303.31044776119404, "grad_norm": 1.3761050701141357, "learning_rate": 9.576036866359449e-06, "loss": 39.0692, "step": 12739 }, { "epoch": 303.33432835820895, "grad_norm": 1.3110144138336182, "learning_rate": 9.575268817204301e-06, "loss": 41.0644, "step": 12740 }, { "epoch": 303.35820895522386, "grad_norm": 1.38132905960083, "learning_rate": 9.574500768049156e-06, "loss": 38.9349, "step": 12741 }, { "epoch": 303.3820895522388, "grad_norm": 1.33363938331604, "learning_rate": 9.57373271889401e-06, "loss": 39.1442, "step": 12742 }, { "epoch": 303.40597014925373, "grad_norm": 1.3255728483200073, "learning_rate": 9.572964669738864e-06, "loss": 39.2858, "step": 12743 }, { "epoch": 303.42985074626864, "grad_norm": 1.3444545269012451, "learning_rate": 9.572196620583719e-06, "loss": 39.4161, "step": 12744 }, { "epoch": 303.4537313432836, "grad_norm": 1.364499807357788, "learning_rate": 9.571428571428573e-06, "loss": 39.5802, "step": 12745 }, { "epoch": 303.4776119402985, "grad_norm": 1.339177131652832, "learning_rate": 9.570660522273426e-06, "loss": 39.9649, "step": 12746 }, { "epoch": 303.5014925373134, "grad_norm": 1.3815948963165283, "learning_rate": 9.56989247311828e-06, "loss": 38.903, "step": 12747 }, { "epoch": 303.52537313432833, "grad_norm": 1.326019287109375, "learning_rate": 9.569124423963135e-06, "loss": 40.8753, "step": 12748 }, { "epoch": 303.5492537313433, "grad_norm": 1.3339266777038574, "learning_rate": 9.568356374807989e-06, "loss": 39.4425, "step": 12749 }, { "epoch": 303.5731343283582, "grad_norm": 1.362027645111084, "learning_rate": 9.567588325652842e-06, "loss": 39.4122, "step": 12750 }, { "epoch": 303.5970149253731, "grad_norm": 1.3513429164886475, "learning_rate": 9.566820276497696e-06, "loss": 39.4366, "step": 12751 }, { "epoch": 303.6208955223881, "grad_norm": 1.3467832803726196, "learning_rate": 9.56605222734255e-06, "loss": 39.2939, "step": 12752 }, { "epoch": 303.644776119403, "grad_norm": 1.327791690826416, "learning_rate": 9.565284178187405e-06, "loss": 39.4774, "step": 12753 }, { "epoch": 303.6686567164179, "grad_norm": 1.371856689453125, "learning_rate": 9.56451612903226e-06, "loss": 38.7308, "step": 12754 }, { "epoch": 303.6925373134328, "grad_norm": 1.3422439098358154, "learning_rate": 9.563748079877114e-06, "loss": 39.9892, "step": 12755 }, { "epoch": 303.7164179104478, "grad_norm": 1.3657747507095337, "learning_rate": 9.562980030721966e-06, "loss": 40.2655, "step": 12756 }, { "epoch": 303.7402985074627, "grad_norm": 1.39497971534729, "learning_rate": 9.56221198156682e-06, "loss": 39.5771, "step": 12757 }, { "epoch": 303.7641791044776, "grad_norm": 1.336796522140503, "learning_rate": 9.561443932411675e-06, "loss": 38.3334, "step": 12758 }, { "epoch": 303.78805970149256, "grad_norm": 1.3345658779144287, "learning_rate": 9.56067588325653e-06, "loss": 40.546, "step": 12759 }, { "epoch": 303.81194029850747, "grad_norm": 1.3331531286239624, "learning_rate": 9.559907834101382e-06, "loss": 39.1942, "step": 12760 }, { "epoch": 303.8358208955224, "grad_norm": 1.3863259553909302, "learning_rate": 9.559139784946238e-06, "loss": 40.4493, "step": 12761 }, { "epoch": 303.85970149253734, "grad_norm": 1.4084495306015015, "learning_rate": 9.558371735791091e-06, "loss": 38.3903, "step": 12762 }, { "epoch": 303.88358208955225, "grad_norm": 1.3785548210144043, "learning_rate": 9.557603686635945e-06, "loss": 40.6485, "step": 12763 }, { "epoch": 303.90746268656716, "grad_norm": 1.3098984956741333, "learning_rate": 9.5568356374808e-06, "loss": 40.3322, "step": 12764 }, { "epoch": 303.93134328358207, "grad_norm": 1.3673075437545776, "learning_rate": 9.556067588325654e-06, "loss": 39.4192, "step": 12765 }, { "epoch": 303.95522388059703, "grad_norm": 1.4208009243011475, "learning_rate": 9.555299539170507e-06, "loss": 39.9062, "step": 12766 }, { "epoch": 303.97910447761194, "grad_norm": 1.3299107551574707, "learning_rate": 9.554531490015361e-06, "loss": 39.9304, "step": 12767 }, { "epoch": 304.0, "grad_norm": 1.3571200370788574, "learning_rate": 9.553763440860216e-06, "loss": 35.1312, "step": 12768 }, { "epoch": 304.0238805970149, "grad_norm": 1.3853367567062378, "learning_rate": 9.55299539170507e-06, "loss": 39.6879, "step": 12769 }, { "epoch": 304.0477611940299, "grad_norm": 1.3726671934127808, "learning_rate": 9.552227342549923e-06, "loss": 40.5179, "step": 12770 }, { "epoch": 304.0716417910448, "grad_norm": 1.3614238500595093, "learning_rate": 9.551459293394779e-06, "loss": 39.5002, "step": 12771 }, { "epoch": 304.0955223880597, "grad_norm": 1.3359687328338623, "learning_rate": 9.550691244239631e-06, "loss": 39.7253, "step": 12772 }, { "epoch": 304.1194029850746, "grad_norm": 1.3386355638504028, "learning_rate": 9.549923195084486e-06, "loss": 38.9308, "step": 12773 }, { "epoch": 304.14328358208957, "grad_norm": 1.331958532333374, "learning_rate": 9.54915514592934e-06, "loss": 40.2338, "step": 12774 }, { "epoch": 304.1671641791045, "grad_norm": 1.3362756967544556, "learning_rate": 9.548387096774195e-06, "loss": 39.8058, "step": 12775 }, { "epoch": 304.1910447761194, "grad_norm": 1.348582148551941, "learning_rate": 9.547619047619049e-06, "loss": 40.8508, "step": 12776 }, { "epoch": 304.21492537313435, "grad_norm": 1.33968186378479, "learning_rate": 9.546850998463902e-06, "loss": 38.2397, "step": 12777 }, { "epoch": 304.23880597014926, "grad_norm": 1.3412657976150513, "learning_rate": 9.546082949308758e-06, "loss": 39.6847, "step": 12778 }, { "epoch": 304.26268656716417, "grad_norm": 1.335480809211731, "learning_rate": 9.54531490015361e-06, "loss": 40.3796, "step": 12779 }, { "epoch": 304.28656716417913, "grad_norm": 1.3274022340774536, "learning_rate": 9.544546850998465e-06, "loss": 40.6508, "step": 12780 }, { "epoch": 304.31044776119404, "grad_norm": 1.3379935026168823, "learning_rate": 9.54377880184332e-06, "loss": 38.41, "step": 12781 }, { "epoch": 304.33432835820895, "grad_norm": 1.3414567708969116, "learning_rate": 9.543010752688174e-06, "loss": 39.3998, "step": 12782 }, { "epoch": 304.35820895522386, "grad_norm": 1.3425453901290894, "learning_rate": 9.542242703533026e-06, "loss": 41.0084, "step": 12783 }, { "epoch": 304.3820895522388, "grad_norm": 1.3583341836929321, "learning_rate": 9.54147465437788e-06, "loss": 40.2919, "step": 12784 }, { "epoch": 304.40597014925373, "grad_norm": 1.3102350234985352, "learning_rate": 9.540706605222735e-06, "loss": 40.1406, "step": 12785 }, { "epoch": 304.42985074626864, "grad_norm": 1.3502137660980225, "learning_rate": 9.53993855606759e-06, "loss": 39.7117, "step": 12786 }, { "epoch": 304.4537313432836, "grad_norm": 1.3686370849609375, "learning_rate": 9.539170506912442e-06, "loss": 40.0079, "step": 12787 }, { "epoch": 304.4776119402985, "grad_norm": 1.3494575023651123, "learning_rate": 9.538402457757298e-06, "loss": 38.9334, "step": 12788 }, { "epoch": 304.5014925373134, "grad_norm": 1.3497549295425415, "learning_rate": 9.537634408602151e-06, "loss": 40.6448, "step": 12789 }, { "epoch": 304.52537313432833, "grad_norm": 1.360905647277832, "learning_rate": 9.536866359447005e-06, "loss": 39.001, "step": 12790 }, { "epoch": 304.5492537313433, "grad_norm": 1.3337323665618896, "learning_rate": 9.53609831029186e-06, "loss": 40.2053, "step": 12791 }, { "epoch": 304.5731343283582, "grad_norm": 1.3877524137496948, "learning_rate": 9.535330261136714e-06, "loss": 39.4614, "step": 12792 }, { "epoch": 304.5970149253731, "grad_norm": 1.3897804021835327, "learning_rate": 9.534562211981567e-06, "loss": 39.7605, "step": 12793 }, { "epoch": 304.6208955223881, "grad_norm": 1.352094292640686, "learning_rate": 9.533794162826421e-06, "loss": 39.8026, "step": 12794 }, { "epoch": 304.644776119403, "grad_norm": 1.2944467067718506, "learning_rate": 9.533026113671276e-06, "loss": 40.0136, "step": 12795 }, { "epoch": 304.6686567164179, "grad_norm": 1.3279463052749634, "learning_rate": 9.53225806451613e-06, "loss": 40.1679, "step": 12796 }, { "epoch": 304.6925373134328, "grad_norm": 1.314193844795227, "learning_rate": 9.531490015360984e-06, "loss": 40.1055, "step": 12797 }, { "epoch": 304.7164179104478, "grad_norm": 1.3351632356643677, "learning_rate": 9.530721966205839e-06, "loss": 40.1641, "step": 12798 }, { "epoch": 304.7402985074627, "grad_norm": 1.3894504308700562, "learning_rate": 9.529953917050691e-06, "loss": 39.9962, "step": 12799 }, { "epoch": 304.7641791044776, "grad_norm": 1.3149863481521606, "learning_rate": 9.529185867895546e-06, "loss": 39.4114, "step": 12800 }, { "epoch": 304.78805970149256, "grad_norm": 1.339985966682434, "learning_rate": 9.5284178187404e-06, "loss": 39.0485, "step": 12801 }, { "epoch": 304.81194029850747, "grad_norm": 1.336387276649475, "learning_rate": 9.527649769585255e-06, "loss": 39.7236, "step": 12802 }, { "epoch": 304.8358208955224, "grad_norm": 1.3940194845199585, "learning_rate": 9.526881720430107e-06, "loss": 39.5499, "step": 12803 }, { "epoch": 304.85970149253734, "grad_norm": 1.370534896850586, "learning_rate": 9.526113671274963e-06, "loss": 40.0668, "step": 12804 }, { "epoch": 304.88358208955225, "grad_norm": 1.358071208000183, "learning_rate": 9.525345622119816e-06, "loss": 40.4777, "step": 12805 }, { "epoch": 304.90746268656716, "grad_norm": 1.3235597610473633, "learning_rate": 9.52457757296467e-06, "loss": 37.7732, "step": 12806 }, { "epoch": 304.93134328358207, "grad_norm": 1.360295295715332, "learning_rate": 9.523809523809525e-06, "loss": 38.2547, "step": 12807 }, { "epoch": 304.95522388059703, "grad_norm": 1.3285367488861084, "learning_rate": 9.523041474654379e-06, "loss": 39.7729, "step": 12808 }, { "epoch": 304.97910447761194, "grad_norm": 1.32861328125, "learning_rate": 9.522273425499234e-06, "loss": 40.8735, "step": 12809 }, { "epoch": 305.0, "grad_norm": 1.3854413032531738, "learning_rate": 9.521505376344086e-06, "loss": 34.5389, "step": 12810 }, { "epoch": 305.0238805970149, "grad_norm": 1.368944764137268, "learning_rate": 9.52073732718894e-06, "loss": 39.0011, "step": 12811 }, { "epoch": 305.0477611940299, "grad_norm": 1.3294095993041992, "learning_rate": 9.519969278033795e-06, "loss": 40.7761, "step": 12812 }, { "epoch": 305.0716417910448, "grad_norm": 1.353795051574707, "learning_rate": 9.51920122887865e-06, "loss": 38.7337, "step": 12813 }, { "epoch": 305.0955223880597, "grad_norm": 1.3665062189102173, "learning_rate": 9.518433179723504e-06, "loss": 39.5622, "step": 12814 }, { "epoch": 305.1194029850746, "grad_norm": 1.3240019083023071, "learning_rate": 9.517665130568358e-06, "loss": 39.6576, "step": 12815 }, { "epoch": 305.14328358208957, "grad_norm": 1.3577924966812134, "learning_rate": 9.51689708141321e-06, "loss": 38.7632, "step": 12816 }, { "epoch": 305.1671641791045, "grad_norm": 1.3424854278564453, "learning_rate": 9.516129032258065e-06, "loss": 39.9704, "step": 12817 }, { "epoch": 305.1910447761194, "grad_norm": 1.3364191055297852, "learning_rate": 9.51536098310292e-06, "loss": 40.1727, "step": 12818 }, { "epoch": 305.21492537313435, "grad_norm": 1.3493194580078125, "learning_rate": 9.514592933947774e-06, "loss": 39.6083, "step": 12819 }, { "epoch": 305.23880597014926, "grad_norm": 1.3672661781311035, "learning_rate": 9.513824884792627e-06, "loss": 40.6131, "step": 12820 }, { "epoch": 305.26268656716417, "grad_norm": 1.3320049047470093, "learning_rate": 9.513056835637483e-06, "loss": 39.5971, "step": 12821 }, { "epoch": 305.28656716417913, "grad_norm": 1.3687903881072998, "learning_rate": 9.512288786482335e-06, "loss": 40.0645, "step": 12822 }, { "epoch": 305.31044776119404, "grad_norm": 1.3357595205307007, "learning_rate": 9.51152073732719e-06, "loss": 39.8688, "step": 12823 }, { "epoch": 305.33432835820895, "grad_norm": 1.3751375675201416, "learning_rate": 9.510752688172044e-06, "loss": 39.8961, "step": 12824 }, { "epoch": 305.35820895522386, "grad_norm": NaN, "learning_rate": 9.509984639016899e-06, "loss": 65.2965, "step": 12825 }, { "epoch": 305.3820895522388, "grad_norm": 1.3911668062210083, "learning_rate": 9.509984639016899e-06, "loss": 40.0663, "step": 12826 }, { "epoch": 305.40597014925373, "grad_norm": 1.3347141742706299, "learning_rate": 9.509216589861751e-06, "loss": 38.2048, "step": 12827 }, { "epoch": 305.42985074626864, "grad_norm": 1.3264564275741577, "learning_rate": 9.508448540706606e-06, "loss": 39.9249, "step": 12828 }, { "epoch": 305.4537313432836, "grad_norm": 1.3564708232879639, "learning_rate": 9.50768049155146e-06, "loss": 39.8633, "step": 12829 }, { "epoch": 305.4776119402985, "grad_norm": NaN, "learning_rate": 9.506912442396314e-06, "loss": 34.2523, "step": 12830 }, { "epoch": 305.5014925373134, "grad_norm": 1.4629424810409546, "learning_rate": 9.506912442396314e-06, "loss": 39.8556, "step": 12831 }, { "epoch": 305.52537313432833, "grad_norm": 1.3861699104309082, "learning_rate": 9.506144393241167e-06, "loss": 40.0435, "step": 12832 }, { "epoch": 305.5492537313433, "grad_norm": 1.4651962518692017, "learning_rate": 9.505376344086023e-06, "loss": 41.1001, "step": 12833 }, { "epoch": 305.5731343283582, "grad_norm": 1.3143638372421265, "learning_rate": 9.504608294930876e-06, "loss": 39.1669, "step": 12834 }, { "epoch": 305.5970149253731, "grad_norm": 1.3727763891220093, "learning_rate": 9.50384024577573e-06, "loss": 40.5085, "step": 12835 }, { "epoch": 305.6208955223881, "grad_norm": 1.335640549659729, "learning_rate": 9.503072196620585e-06, "loss": 39.2527, "step": 12836 }, { "epoch": 305.644776119403, "grad_norm": 1.34287428855896, "learning_rate": 9.502304147465439e-06, "loss": 40.2142, "step": 12837 }, { "epoch": 305.6686567164179, "grad_norm": NaN, "learning_rate": 9.501536098310292e-06, "loss": 70.6685, "step": 12838 }, { "epoch": 305.6925373134328, "grad_norm": 1.3217763900756836, "learning_rate": 9.501536098310292e-06, "loss": 40.0942, "step": 12839 }, { "epoch": 305.7164179104478, "grad_norm": 1.3633452653884888, "learning_rate": 9.500768049155146e-06, "loss": 38.9992, "step": 12840 }, { "epoch": 305.7402985074627, "grad_norm": 1.3429813385009766, "learning_rate": 9.5e-06, "loss": 38.7263, "step": 12841 }, { "epoch": 305.7641791044776, "grad_norm": 1.3552817106246948, "learning_rate": 9.499231950844855e-06, "loss": 40.8342, "step": 12842 }, { "epoch": 305.78805970149256, "grad_norm": 1.3672183752059937, "learning_rate": 9.498463901689708e-06, "loss": 39.6003, "step": 12843 }, { "epoch": 305.81194029850747, "grad_norm": 1.3372924327850342, "learning_rate": 9.497695852534564e-06, "loss": 38.5349, "step": 12844 }, { "epoch": 305.8358208955224, "grad_norm": 1.3845667839050293, "learning_rate": 9.496927803379416e-06, "loss": 40.1263, "step": 12845 }, { "epoch": 305.85970149253734, "grad_norm": 1.3281112909317017, "learning_rate": 9.49615975422427e-06, "loss": 40.6712, "step": 12846 }, { "epoch": 305.88358208955225, "grad_norm": 1.3896592855453491, "learning_rate": 9.495391705069125e-06, "loss": 39.0345, "step": 12847 }, { "epoch": 305.90746268656716, "grad_norm": 2.136523485183716, "learning_rate": 9.49462365591398e-06, "loss": 38.7471, "step": 12848 }, { "epoch": 305.93134328358207, "grad_norm": 1.3525270223617554, "learning_rate": 9.493855606758834e-06, "loss": 40.5716, "step": 12849 }, { "epoch": 305.95522388059703, "grad_norm": 1.3138959407806396, "learning_rate": 9.493087557603687e-06, "loss": 40.0039, "step": 12850 }, { "epoch": 305.97910447761194, "grad_norm": 1.3345534801483154, "learning_rate": 9.492319508448543e-06, "loss": 39.8123, "step": 12851 }, { "epoch": 306.0, "grad_norm": 1.3116215467453003, "learning_rate": 9.491551459293395e-06, "loss": 34.8694, "step": 12852 }, { "epoch": 306.0238805970149, "grad_norm": 1.3692957162857056, "learning_rate": 9.49078341013825e-06, "loss": 40.6492, "step": 12853 }, { "epoch": 306.0477611940299, "grad_norm": 1.3375858068466187, "learning_rate": 9.490015360983104e-06, "loss": 39.9653, "step": 12854 }, { "epoch": 306.0716417910448, "grad_norm": 1.3308744430541992, "learning_rate": 9.489247311827959e-06, "loss": 39.9464, "step": 12855 }, { "epoch": 306.0955223880597, "grad_norm": 1.2974597215652466, "learning_rate": 9.488479262672811e-06, "loss": 40.9825, "step": 12856 }, { "epoch": 306.1194029850746, "grad_norm": 1.3411680459976196, "learning_rate": 9.487711213517666e-06, "loss": 39.4004, "step": 12857 }, { "epoch": 306.14328358208957, "grad_norm": 1.3502929210662842, "learning_rate": 9.48694316436252e-06, "loss": 39.6345, "step": 12858 }, { "epoch": 306.1671641791045, "grad_norm": 1.3736488819122314, "learning_rate": 9.486175115207374e-06, "loss": 40.1867, "step": 12859 }, { "epoch": 306.1910447761194, "grad_norm": 1.3756834268569946, "learning_rate": 9.485407066052229e-06, "loss": 41.1986, "step": 12860 }, { "epoch": 306.21492537313435, "grad_norm": 1.4271636009216309, "learning_rate": 9.484639016897083e-06, "loss": 40.3357, "step": 12861 }, { "epoch": 306.23880597014926, "grad_norm": 1.3240729570388794, "learning_rate": 9.483870967741936e-06, "loss": 39.6438, "step": 12862 }, { "epoch": 306.26268656716417, "grad_norm": 1.3257936239242554, "learning_rate": 9.48310291858679e-06, "loss": 39.9746, "step": 12863 }, { "epoch": 306.28656716417913, "grad_norm": 1.3694722652435303, "learning_rate": 9.482334869431645e-06, "loss": 40.3268, "step": 12864 }, { "epoch": 306.31044776119404, "grad_norm": 1.3472061157226562, "learning_rate": 9.481566820276499e-06, "loss": 40.8395, "step": 12865 }, { "epoch": 306.33432835820895, "grad_norm": 1.3722432851791382, "learning_rate": 9.480798771121352e-06, "loss": 38.5825, "step": 12866 }, { "epoch": 306.35820895522386, "grad_norm": 1.324428677558899, "learning_rate": 9.480030721966206e-06, "loss": 39.5338, "step": 12867 }, { "epoch": 306.3820895522388, "grad_norm": 1.3712388277053833, "learning_rate": 9.47926267281106e-06, "loss": 40.3259, "step": 12868 }, { "epoch": 306.40597014925373, "grad_norm": 1.3844447135925293, "learning_rate": 9.478494623655915e-06, "loss": 38.5846, "step": 12869 }, { "epoch": 306.42985074626864, "grad_norm": 1.348456621170044, "learning_rate": 9.47772657450077e-06, "loss": 39.4644, "step": 12870 }, { "epoch": 306.4537313432836, "grad_norm": 1.3571041822433472, "learning_rate": 9.476958525345624e-06, "loss": 38.6301, "step": 12871 }, { "epoch": 306.4776119402985, "grad_norm": 1.3698256015777588, "learning_rate": 9.476190476190476e-06, "loss": 38.9797, "step": 12872 }, { "epoch": 306.5014925373134, "grad_norm": 1.3456406593322754, "learning_rate": 9.47542242703533e-06, "loss": 40.9539, "step": 12873 }, { "epoch": 306.52537313432833, "grad_norm": 1.389725685119629, "learning_rate": 9.474654377880185e-06, "loss": 39.2536, "step": 12874 }, { "epoch": 306.5492537313433, "grad_norm": 1.440771222114563, "learning_rate": 9.47388632872504e-06, "loss": 39.6165, "step": 12875 }, { "epoch": 306.5731343283582, "grad_norm": 1.3524260520935059, "learning_rate": 9.473118279569892e-06, "loss": 40.325, "step": 12876 }, { "epoch": 306.5970149253731, "grad_norm": 1.3539128303527832, "learning_rate": 9.472350230414748e-06, "loss": 39.6698, "step": 12877 }, { "epoch": 306.6208955223881, "grad_norm": 1.3633776903152466, "learning_rate": 9.471582181259601e-06, "loss": 39.8005, "step": 12878 }, { "epoch": 306.644776119403, "grad_norm": 1.3480182886123657, "learning_rate": 9.470814132104455e-06, "loss": 38.4308, "step": 12879 }, { "epoch": 306.6686567164179, "grad_norm": 1.3972728252410889, "learning_rate": 9.47004608294931e-06, "loss": 39.7503, "step": 12880 }, { "epoch": 306.6925373134328, "grad_norm": 1.3864327669143677, "learning_rate": 9.469278033794164e-06, "loss": 39.3978, "step": 12881 }, { "epoch": 306.7164179104478, "grad_norm": 1.3531129360198975, "learning_rate": 9.468509984639017e-06, "loss": 39.8466, "step": 12882 }, { "epoch": 306.7402985074627, "grad_norm": 1.4137777090072632, "learning_rate": 9.467741935483871e-06, "loss": 39.8011, "step": 12883 }, { "epoch": 306.7641791044776, "grad_norm": 1.365920901298523, "learning_rate": 9.466973886328726e-06, "loss": 39.3758, "step": 12884 }, { "epoch": 306.78805970149256, "grad_norm": 1.351515293121338, "learning_rate": 9.46620583717358e-06, "loss": 39.832, "step": 12885 }, { "epoch": 306.81194029850747, "grad_norm": 1.3400176763534546, "learning_rate": 9.465437788018434e-06, "loss": 38.6929, "step": 12886 }, { "epoch": 306.8358208955224, "grad_norm": 1.3743629455566406, "learning_rate": 9.464669738863289e-06, "loss": 39.57, "step": 12887 }, { "epoch": 306.85970149253734, "grad_norm": 1.3986587524414062, "learning_rate": 9.463901689708143e-06, "loss": 39.5437, "step": 12888 }, { "epoch": 306.88358208955225, "grad_norm": 1.3792511224746704, "learning_rate": 9.463133640552996e-06, "loss": 38.2615, "step": 12889 }, { "epoch": 306.90746268656716, "grad_norm": 1.3742238283157349, "learning_rate": 9.46236559139785e-06, "loss": 39.7991, "step": 12890 }, { "epoch": 306.93134328358207, "grad_norm": 1.3483901023864746, "learning_rate": 9.461597542242705e-06, "loss": 40.7774, "step": 12891 }, { "epoch": 306.95522388059703, "grad_norm": 1.3627054691314697, "learning_rate": 9.460829493087559e-06, "loss": 39.8293, "step": 12892 }, { "epoch": 306.97910447761194, "grad_norm": 1.3304975032806396, "learning_rate": 9.460061443932412e-06, "loss": 39.8801, "step": 12893 }, { "epoch": 307.0, "grad_norm": 1.336501121520996, "learning_rate": 9.459293394777268e-06, "loss": 34.9331, "step": 12894 }, { "epoch": 307.0238805970149, "grad_norm": 1.3885352611541748, "learning_rate": 9.45852534562212e-06, "loss": 39.1302, "step": 12895 }, { "epoch": 307.0477611940299, "grad_norm": 1.408706545829773, "learning_rate": 9.457757296466975e-06, "loss": 38.505, "step": 12896 }, { "epoch": 307.0716417910448, "grad_norm": 1.3279216289520264, "learning_rate": 9.456989247311829e-06, "loss": 40.4502, "step": 12897 }, { "epoch": 307.0955223880597, "grad_norm": 1.3287336826324463, "learning_rate": 9.456221198156684e-06, "loss": 40.0122, "step": 12898 }, { "epoch": 307.1194029850746, "grad_norm": 1.3591032028198242, "learning_rate": 9.455453149001536e-06, "loss": 39.8847, "step": 12899 }, { "epoch": 307.14328358208957, "grad_norm": 1.3309327363967896, "learning_rate": 9.45468509984639e-06, "loss": 40.8142, "step": 12900 }, { "epoch": 307.1671641791045, "grad_norm": 1.3613929748535156, "learning_rate": 9.453917050691245e-06, "loss": 40.7951, "step": 12901 }, { "epoch": 307.1910447761194, "grad_norm": 1.3485040664672852, "learning_rate": 9.4531490015361e-06, "loss": 39.75, "step": 12902 }, { "epoch": 307.21492537313435, "grad_norm": 1.3500962257385254, "learning_rate": 9.452380952380952e-06, "loss": 39.983, "step": 12903 }, { "epoch": 307.23880597014926, "grad_norm": 1.3170082569122314, "learning_rate": 9.451612903225808e-06, "loss": 39.0171, "step": 12904 }, { "epoch": 307.26268656716417, "grad_norm": 1.3343394994735718, "learning_rate": 9.45084485407066e-06, "loss": 38.7758, "step": 12905 }, { "epoch": 307.28656716417913, "grad_norm": 1.3409732580184937, "learning_rate": 9.450076804915515e-06, "loss": 39.1572, "step": 12906 }, { "epoch": 307.31044776119404, "grad_norm": 1.3621944189071655, "learning_rate": 9.44930875576037e-06, "loss": 40.1904, "step": 12907 }, { "epoch": 307.33432835820895, "grad_norm": 1.3682432174682617, "learning_rate": 9.448540706605224e-06, "loss": 40.2746, "step": 12908 }, { "epoch": 307.35820895522386, "grad_norm": 1.3501904010772705, "learning_rate": 9.447772657450077e-06, "loss": 41.2117, "step": 12909 }, { "epoch": 307.3820895522388, "grad_norm": 1.3220387697219849, "learning_rate": 9.447004608294931e-06, "loss": 38.4849, "step": 12910 }, { "epoch": 307.40597014925373, "grad_norm": 1.3529763221740723, "learning_rate": 9.446236559139785e-06, "loss": 38.7611, "step": 12911 }, { "epoch": 307.42985074626864, "grad_norm": 1.3263792991638184, "learning_rate": 9.44546850998464e-06, "loss": 38.7122, "step": 12912 }, { "epoch": 307.4537313432836, "grad_norm": 1.3966134786605835, "learning_rate": 9.444700460829494e-06, "loss": 39.8683, "step": 12913 }, { "epoch": 307.4776119402985, "grad_norm": 1.3754708766937256, "learning_rate": 9.443932411674349e-06, "loss": 39.7254, "step": 12914 }, { "epoch": 307.5014925373134, "grad_norm": 1.3642849922180176, "learning_rate": 9.443164362519201e-06, "loss": 40.3072, "step": 12915 }, { "epoch": 307.52537313432833, "grad_norm": 1.322484016418457, "learning_rate": 9.442396313364056e-06, "loss": 40.5317, "step": 12916 }, { "epoch": 307.5492537313433, "grad_norm": 1.3049285411834717, "learning_rate": 9.44162826420891e-06, "loss": 39.0044, "step": 12917 }, { "epoch": 307.5731343283582, "grad_norm": 1.3565062284469604, "learning_rate": 9.440860215053764e-06, "loss": 39.9684, "step": 12918 }, { "epoch": 307.5970149253731, "grad_norm": 1.309466004371643, "learning_rate": 9.440092165898617e-06, "loss": 39.6693, "step": 12919 }, { "epoch": 307.6208955223881, "grad_norm": 1.3731716871261597, "learning_rate": 9.439324116743472e-06, "loss": 39.7845, "step": 12920 }, { "epoch": 307.644776119403, "grad_norm": 1.385774850845337, "learning_rate": 9.438556067588326e-06, "loss": 39.7692, "step": 12921 }, { "epoch": 307.6686567164179, "grad_norm": 1.3720496892929077, "learning_rate": 9.43778801843318e-06, "loss": 40.0597, "step": 12922 }, { "epoch": 307.6925373134328, "grad_norm": 1.3822605609893799, "learning_rate": 9.437019969278035e-06, "loss": 40.5062, "step": 12923 }, { "epoch": 307.7164179104478, "grad_norm": 1.4210195541381836, "learning_rate": 9.436251920122889e-06, "loss": 41.0104, "step": 12924 }, { "epoch": 307.7402985074627, "grad_norm": 1.298309326171875, "learning_rate": 9.435483870967743e-06, "loss": 39.3123, "step": 12925 }, { "epoch": 307.7641791044776, "grad_norm": 1.3537744283676147, "learning_rate": 9.434715821812596e-06, "loss": 40.0403, "step": 12926 }, { "epoch": 307.78805970149256, "grad_norm": 1.3646912574768066, "learning_rate": 9.43394777265745e-06, "loss": 40.2991, "step": 12927 }, { "epoch": 307.81194029850747, "grad_norm": 1.391618013381958, "learning_rate": 9.433179723502305e-06, "loss": 38.5147, "step": 12928 }, { "epoch": 307.8358208955224, "grad_norm": 1.3317821025848389, "learning_rate": 9.43241167434716e-06, "loss": 40.5113, "step": 12929 }, { "epoch": 307.85970149253734, "grad_norm": 1.3626054525375366, "learning_rate": 9.431643625192014e-06, "loss": 40.4101, "step": 12930 }, { "epoch": 307.88358208955225, "grad_norm": 1.359518051147461, "learning_rate": 9.430875576036868e-06, "loss": 39.4971, "step": 12931 }, { "epoch": 307.90746268656716, "grad_norm": 1.4003812074661255, "learning_rate": 9.43010752688172e-06, "loss": 38.8987, "step": 12932 }, { "epoch": 307.93134328358207, "grad_norm": 1.3643344640731812, "learning_rate": 9.429339477726575e-06, "loss": 38.6227, "step": 12933 }, { "epoch": 307.95522388059703, "grad_norm": 1.3846169710159302, "learning_rate": 9.42857142857143e-06, "loss": 39.4187, "step": 12934 }, { "epoch": 307.97910447761194, "grad_norm": 1.3632073402404785, "learning_rate": 9.427803379416284e-06, "loss": 40.5925, "step": 12935 }, { "epoch": 308.0, "grad_norm": 1.3191345930099487, "learning_rate": 9.427035330261137e-06, "loss": 33.6311, "step": 12936 }, { "epoch": 308.0238805970149, "grad_norm": 3.497459888458252, "learning_rate": 9.426267281105993e-06, "loss": 40.0274, "step": 12937 }, { "epoch": 308.0477611940299, "grad_norm": 1.3503661155700684, "learning_rate": 9.425499231950845e-06, "loss": 39.9119, "step": 12938 }, { "epoch": 308.0716417910448, "grad_norm": 1.3584961891174316, "learning_rate": 9.4247311827957e-06, "loss": 38.3361, "step": 12939 }, { "epoch": 308.0955223880597, "grad_norm": 1.3682007789611816, "learning_rate": 9.423963133640554e-06, "loss": 39.243, "step": 12940 }, { "epoch": 308.1194029850746, "grad_norm": 1.320678472518921, "learning_rate": 9.423195084485409e-06, "loss": 38.7118, "step": 12941 }, { "epoch": 308.14328358208957, "grad_norm": 1.3840537071228027, "learning_rate": 9.422427035330261e-06, "loss": 40.2661, "step": 12942 }, { "epoch": 308.1671641791045, "grad_norm": 1.3785028457641602, "learning_rate": 9.421658986175116e-06, "loss": 39.2049, "step": 12943 }, { "epoch": 308.1910447761194, "grad_norm": 1.3325321674346924, "learning_rate": 9.42089093701997e-06, "loss": 39.8249, "step": 12944 }, { "epoch": 308.21492537313435, "grad_norm": 1.3423806428909302, "learning_rate": 9.420122887864824e-06, "loss": 39.5163, "step": 12945 }, { "epoch": 308.23880597014926, "grad_norm": 1.3715713024139404, "learning_rate": 9.419354838709677e-06, "loss": 39.4549, "step": 12946 }, { "epoch": 308.26268656716417, "grad_norm": 1.3559876680374146, "learning_rate": 9.418586789554533e-06, "loss": 39.6097, "step": 12947 }, { "epoch": 308.28656716417913, "grad_norm": 1.373295783996582, "learning_rate": 9.417818740399386e-06, "loss": 39.2284, "step": 12948 }, { "epoch": 308.31044776119404, "grad_norm": 1.3399384021759033, "learning_rate": 9.41705069124424e-06, "loss": 40.7638, "step": 12949 }, { "epoch": 308.33432835820895, "grad_norm": 1.366416573524475, "learning_rate": 9.416282642089095e-06, "loss": 40.5248, "step": 12950 }, { "epoch": 308.35820895522386, "grad_norm": 1.3040127754211426, "learning_rate": 9.415514592933949e-06, "loss": 39.1393, "step": 12951 }, { "epoch": 308.3820895522388, "grad_norm": 1.3607370853424072, "learning_rate": 9.414746543778802e-06, "loss": 39.5737, "step": 12952 }, { "epoch": 308.40597014925373, "grad_norm": 1.3512691259384155, "learning_rate": 9.413978494623656e-06, "loss": 40.2654, "step": 12953 }, { "epoch": 308.42985074626864, "grad_norm": 1.3598406314849854, "learning_rate": 9.41321044546851e-06, "loss": 39.5386, "step": 12954 }, { "epoch": 308.4537313432836, "grad_norm": 1.3221805095672607, "learning_rate": 9.412442396313365e-06, "loss": 38.4559, "step": 12955 }, { "epoch": 308.4776119402985, "grad_norm": 1.3805758953094482, "learning_rate": 9.411674347158218e-06, "loss": 39.8135, "step": 12956 }, { "epoch": 308.5014925373134, "grad_norm": 1.3240687847137451, "learning_rate": 9.410906298003074e-06, "loss": 41.7457, "step": 12957 }, { "epoch": 308.52537313432833, "grad_norm": 1.3510078191757202, "learning_rate": 9.410138248847926e-06, "loss": 38.5939, "step": 12958 }, { "epoch": 308.5492537313433, "grad_norm": 1.385909080505371, "learning_rate": 9.40937019969278e-06, "loss": 39.866, "step": 12959 }, { "epoch": 308.5731343283582, "grad_norm": 1.3505635261535645, "learning_rate": 9.408602150537635e-06, "loss": 39.9695, "step": 12960 }, { "epoch": 308.5970149253731, "grad_norm": 1.3302843570709229, "learning_rate": 9.40783410138249e-06, "loss": 40.237, "step": 12961 }, { "epoch": 308.6208955223881, "grad_norm": 1.3552064895629883, "learning_rate": 9.407066052227344e-06, "loss": 40.2747, "step": 12962 }, { "epoch": 308.644776119403, "grad_norm": 1.3736135959625244, "learning_rate": 9.406298003072197e-06, "loss": 39.308, "step": 12963 }, { "epoch": 308.6686567164179, "grad_norm": 1.3170803785324097, "learning_rate": 9.405529953917053e-06, "loss": 39.8547, "step": 12964 }, { "epoch": 308.6925373134328, "grad_norm": 1.478446364402771, "learning_rate": 9.404761904761905e-06, "loss": 39.8032, "step": 12965 }, { "epoch": 308.7164179104478, "grad_norm": 1.3616975545883179, "learning_rate": 9.40399385560676e-06, "loss": 39.9362, "step": 12966 }, { "epoch": 308.7402985074627, "grad_norm": NaN, "learning_rate": 9.403225806451614e-06, "loss": 50.5924, "step": 12967 }, { "epoch": 308.7641791044776, "grad_norm": 1.4063929319381714, "learning_rate": 9.403225806451614e-06, "loss": 39.1251, "step": 12968 }, { "epoch": 308.78805970149256, "grad_norm": 1.3380964994430542, "learning_rate": 9.402457757296468e-06, "loss": 40.9791, "step": 12969 }, { "epoch": 308.81194029850747, "grad_norm": 1.3491864204406738, "learning_rate": 9.401689708141321e-06, "loss": 40.0646, "step": 12970 }, { "epoch": 308.8358208955224, "grad_norm": 1.3342701196670532, "learning_rate": 9.400921658986176e-06, "loss": 38.7247, "step": 12971 }, { "epoch": 308.85970149253734, "grad_norm": 1.4535540342330933, "learning_rate": 9.40015360983103e-06, "loss": 40.0136, "step": 12972 }, { "epoch": 308.88358208955225, "grad_norm": 1.3381571769714355, "learning_rate": 9.399385560675884e-06, "loss": 39.9805, "step": 12973 }, { "epoch": 308.90746268656716, "grad_norm": 1.4401580095291138, "learning_rate": 9.398617511520739e-06, "loss": 39.9734, "step": 12974 }, { "epoch": 308.93134328358207, "grad_norm": 1.3463611602783203, "learning_rate": 9.397849462365593e-06, "loss": 39.4244, "step": 12975 }, { "epoch": 308.95522388059703, "grad_norm": 1.3688201904296875, "learning_rate": 9.397081413210446e-06, "loss": 39.7733, "step": 12976 }, { "epoch": 308.97910447761194, "grad_norm": 1.4254612922668457, "learning_rate": 9.3963133640553e-06, "loss": 40.1632, "step": 12977 }, { "epoch": 309.0, "grad_norm": 1.3536877632141113, "learning_rate": 9.395545314900155e-06, "loss": 34.936, "step": 12978 }, { "epoch": 309.0238805970149, "grad_norm": 1.3877365589141846, "learning_rate": 9.394777265745009e-06, "loss": 40.493, "step": 12979 }, { "epoch": 309.0477611940299, "grad_norm": 1.345043420791626, "learning_rate": 9.394009216589862e-06, "loss": 39.526, "step": 12980 }, { "epoch": 309.0716417910448, "grad_norm": 1.3193007707595825, "learning_rate": 9.393241167434716e-06, "loss": 40.6148, "step": 12981 }, { "epoch": 309.0955223880597, "grad_norm": 1.3124363422393799, "learning_rate": 9.39247311827957e-06, "loss": 38.7514, "step": 12982 }, { "epoch": 309.1194029850746, "grad_norm": 1.370776891708374, "learning_rate": 9.391705069124425e-06, "loss": 38.903, "step": 12983 }, { "epoch": 309.14328358208957, "grad_norm": 1.343902587890625, "learning_rate": 9.39093701996928e-06, "loss": 39.7792, "step": 12984 }, { "epoch": 309.1671641791045, "grad_norm": 1.3357861042022705, "learning_rate": 9.390168970814134e-06, "loss": 40.3968, "step": 12985 }, { "epoch": 309.1910447761194, "grad_norm": 1.3513929843902588, "learning_rate": 9.389400921658986e-06, "loss": 39.555, "step": 12986 }, { "epoch": 309.21492537313435, "grad_norm": 1.3533761501312256, "learning_rate": 9.38863287250384e-06, "loss": 38.8925, "step": 12987 }, { "epoch": 309.23880597014926, "grad_norm": 1.3631012439727783, "learning_rate": 9.387864823348695e-06, "loss": 39.1602, "step": 12988 }, { "epoch": 309.26268656716417, "grad_norm": 1.3410412073135376, "learning_rate": 9.38709677419355e-06, "loss": 39.5801, "step": 12989 }, { "epoch": 309.28656716417913, "grad_norm": 1.341262698173523, "learning_rate": 9.386328725038402e-06, "loss": 38.4861, "step": 12990 }, { "epoch": 309.31044776119404, "grad_norm": 1.3569667339324951, "learning_rate": 9.385560675883258e-06, "loss": 38.6098, "step": 12991 }, { "epoch": 309.33432835820895, "grad_norm": 1.3531187772750854, "learning_rate": 9.384792626728111e-06, "loss": 39.7664, "step": 12992 }, { "epoch": 309.35820895522386, "grad_norm": 1.3902581930160522, "learning_rate": 9.384024577572965e-06, "loss": 41.1127, "step": 12993 }, { "epoch": 309.3820895522388, "grad_norm": 1.3646399974822998, "learning_rate": 9.38325652841782e-06, "loss": 38.7894, "step": 12994 }, { "epoch": 309.40597014925373, "grad_norm": 1.3172783851623535, "learning_rate": 9.382488479262674e-06, "loss": 41.0165, "step": 12995 }, { "epoch": 309.42985074626864, "grad_norm": 1.3652892112731934, "learning_rate": 9.381720430107528e-06, "loss": 40.7461, "step": 12996 }, { "epoch": 309.4537313432836, "grad_norm": 1.3352701663970947, "learning_rate": 9.380952380952381e-06, "loss": 39.217, "step": 12997 }, { "epoch": 309.4776119402985, "grad_norm": 1.3894429206848145, "learning_rate": 9.380184331797235e-06, "loss": 39.0643, "step": 12998 }, { "epoch": 309.5014925373134, "grad_norm": 1.3462095260620117, "learning_rate": 9.37941628264209e-06, "loss": 40.908, "step": 12999 }, { "epoch": 309.52537313432833, "grad_norm": 1.3393863439559937, "learning_rate": 9.378648233486944e-06, "loss": 38.9259, "step": 13000 }, { "epoch": 309.5492537313433, "grad_norm": 1.3498257398605347, "learning_rate": 9.377880184331799e-06, "loss": 39.5703, "step": 13001 }, { "epoch": 309.5731343283582, "grad_norm": 1.3245081901550293, "learning_rate": 9.377112135176653e-06, "loss": 39.2052, "step": 13002 }, { "epoch": 309.5970149253731, "grad_norm": 1.3770476579666138, "learning_rate": 9.376344086021506e-06, "loss": 39.5681, "step": 13003 }, { "epoch": 309.6208955223881, "grad_norm": 1.3496479988098145, "learning_rate": 9.37557603686636e-06, "loss": 38.9192, "step": 13004 }, { "epoch": 309.644776119403, "grad_norm": 1.388259768486023, "learning_rate": 9.374807987711214e-06, "loss": 40.3084, "step": 13005 }, { "epoch": 309.6686567164179, "grad_norm": 1.3530129194259644, "learning_rate": 9.374039938556069e-06, "loss": 40.5695, "step": 13006 }, { "epoch": 309.6925373134328, "grad_norm": 1.3428235054016113, "learning_rate": 9.373271889400922e-06, "loss": 39.0886, "step": 13007 }, { "epoch": 309.7164179104478, "grad_norm": 1.3203991651535034, "learning_rate": 9.372503840245778e-06, "loss": 39.7234, "step": 13008 }, { "epoch": 309.7402985074627, "grad_norm": 1.3959317207336426, "learning_rate": 9.37173579109063e-06, "loss": 38.4999, "step": 13009 }, { "epoch": 309.7641791044776, "grad_norm": 1.3502607345581055, "learning_rate": 9.370967741935485e-06, "loss": 39.6613, "step": 13010 }, { "epoch": 309.78805970149256, "grad_norm": 1.345345139503479, "learning_rate": 9.370199692780339e-06, "loss": 40.0734, "step": 13011 }, { "epoch": 309.81194029850747, "grad_norm": 1.355659008026123, "learning_rate": 9.369431643625193e-06, "loss": 41.1436, "step": 13012 }, { "epoch": 309.8358208955224, "grad_norm": 1.3566057682037354, "learning_rate": 9.368663594470046e-06, "loss": 39.2079, "step": 13013 }, { "epoch": 309.85970149253734, "grad_norm": 1.4288116693496704, "learning_rate": 9.3678955453149e-06, "loss": 40.0612, "step": 13014 }, { "epoch": 309.88358208955225, "grad_norm": 1.3493951559066772, "learning_rate": 9.367127496159755e-06, "loss": 39.7756, "step": 13015 }, { "epoch": 309.90746268656716, "grad_norm": 1.3299452066421509, "learning_rate": 9.36635944700461e-06, "loss": 40.8353, "step": 13016 }, { "epoch": 309.93134328358207, "grad_norm": 1.3943078517913818, "learning_rate": 9.365591397849462e-06, "loss": 41.5327, "step": 13017 }, { "epoch": 309.95522388059703, "grad_norm": 1.3210630416870117, "learning_rate": 9.364823348694318e-06, "loss": 40.4752, "step": 13018 }, { "epoch": 309.97910447761194, "grad_norm": 1.3783128261566162, "learning_rate": 9.36405529953917e-06, "loss": 39.1162, "step": 13019 }, { "epoch": 310.0, "grad_norm": 1.3778574466705322, "learning_rate": 9.363287250384025e-06, "loss": 34.1358, "step": 13020 }, { "epoch": 310.0, "step": 13020, "total_flos": 6.4678035126781e+17, "train_loss": 2.5676592043101696, "train_runtime": 25362.0232, "train_samples_per_second": 65.417, "train_steps_per_second": 0.513 } ], "logging_steps": 1.0, "max_steps": 13020, "num_input_tokens_seen": 0, "num_train_epochs": 310, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.4678035126781e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }